blob: 6b6cdd07cb77bdb02a0f5ba3b7d7e5cb98d8b088 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""Tests to ensure that the lxml tree builder generates good trees."""
2
Patrick Williamsc124f4f2015-09-15 14:41:29 -05003import warnings
4
5try:
6 import lxml.etree
7 LXML_PRESENT = True
8 LXML_VERSION = lxml.etree.LXML_VERSION
Patrick Williamsc0f7c042017-02-23 20:41:17 -06009except ImportError as e:
Patrick Williamsc124f4f2015-09-15 14:41:29 -050010 LXML_PRESENT = False
11 LXML_VERSION = (0,)
12
13if LXML_PRESENT:
14 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
15
Andrew Geissler82c905d2020-04-13 13:39:40 -050016from bs4 import BeautifulStoneSoup
Patrick Williamsc124f4f2015-09-15 14:41:29 -050017from bs4.testing import skipIf
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018from bs4.testing import (
19 HTMLTreeBuilderSmokeTest,
20 XMLTreeBuilderSmokeTest,
21 SoupTest,
22 skipIf,
23)
24
25@skipIf(
26 not LXML_PRESENT,
27 "lxml seems not to be present, not testing its tree builder.")
28class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
29 """See ``HTMLTreeBuilderSmokeTest``."""
30
31 @property
32 def default_builder(self):
33 return LXMLTreeBuilder()
34
35 def test_out_of_range_entity(self):
36 self.assertSoupEquals(
37 "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
38 self.assertSoupEquals(
39 "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
40 self.assertSoupEquals(
41 "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
42
43 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
44 # test if an old version of lxml is installed.
45
46 @skipIf(
47 not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
48 "Skipping doctype test for old version of lxml to avoid segfault.")
49 def test_empty_doctype(self):
50 soup = self.soup("<!DOCTYPE>")
51 doctype = soup.contents[0]
52 self.assertEqual("", doctype.strip())
53
54 def test_beautifulstonesoup_is_xml_parser(self):
55 # Make sure that the deprecated BSS class uses an xml builder
56 # if one is installed.
57 with warnings.catch_warnings(record=True) as w:
58 soup = BeautifulStoneSoup("<b />")
Patrick Williamsc0f7c042017-02-23 20:41:17 -060059 self.assertEqual("<b/>", str(soup.b))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050060 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
61
Patrick Williamsc124f4f2015-09-15 14:41:29 -050062@skipIf(
63 not LXML_PRESENT,
64 "lxml seems not to be present, not testing its XML tree builder.")
65class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
66 """See ``HTMLTreeBuilderSmokeTest``."""
67
68 @property
69 def default_builder(self):
70 return LXMLTreeBuilderForXML()