blob: 6c2a1d73ebae49500e9dff0809bca1592a5f58bd [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""Tests to ensure that the lxml tree builder generates good trees."""
2
3import re
4import warnings
5
6try:
7 import lxml.etree
8 LXML_PRESENT = True
9 LXML_VERSION = lxml.etree.LXML_VERSION
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010except ImportError as e:
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011 LXML_PRESENT = False
12 LXML_VERSION = (0,)
13
14if LXML_PRESENT:
15 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
16
17from bs4 import (
18 BeautifulSoup,
19 BeautifulStoneSoup,
20 )
21from bs4.element import Comment, Doctype, SoupStrainer
22from bs4.testing import skipIf
23from bs4.tests import test_htmlparser
24from bs4.testing import (
25 HTMLTreeBuilderSmokeTest,
26 XMLTreeBuilderSmokeTest,
27 SoupTest,
28 skipIf,
29)
30
31@skipIf(
32 not LXML_PRESENT,
33 "lxml seems not to be present, not testing its tree builder.")
34class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
35 """See ``HTMLTreeBuilderSmokeTest``."""
36
37 @property
38 def default_builder(self):
39 return LXMLTreeBuilder()
40
41 def test_out_of_range_entity(self):
42 self.assertSoupEquals(
43 "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
44 self.assertSoupEquals(
45 "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
46 self.assertSoupEquals(
47 "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
48
49 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
50 # test if an old version of lxml is installed.
51
52 @skipIf(
53 not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
54 "Skipping doctype test for old version of lxml to avoid segfault.")
55 def test_empty_doctype(self):
56 soup = self.soup("<!DOCTYPE>")
57 doctype = soup.contents[0]
58 self.assertEqual("", doctype.strip())
59
60 def test_beautifulstonesoup_is_xml_parser(self):
61 # Make sure that the deprecated BSS class uses an xml builder
62 # if one is installed.
63 with warnings.catch_warnings(record=True) as w:
64 soup = BeautifulStoneSoup("<b />")
Patrick Williamsc0f7c042017-02-23 20:41:17 -060065 self.assertEqual("<b/>", str(soup.b))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050066 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
67
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068@skipIf(
69 not LXML_PRESENT,
70 "lxml seems not to be present, not testing its XML tree builder.")
71class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
72 """See ``HTMLTreeBuilderSmokeTest``."""
73
74 @property
75 def default_builder(self):
76 return LXMLTreeBuilderForXML()