blob: 92ad10fb044b3d4c68f2b09c09f88b53659e842a [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""Tests of the builder registry."""
2
3import unittest
4
5from bs4 import BeautifulSoup
6from bs4.builder import (
7 builder_registry as registry,
8 HTMLParserTreeBuilder,
9 TreeBuilderRegistry,
10)
11
12try:
13 from bs4.builder import HTML5TreeBuilder
14 HTML5LIB_PRESENT = True
15except ImportError:
16 HTML5LIB_PRESENT = False
17
18try:
19 from bs4.builder import (
20 LXMLTreeBuilderForXML,
21 LXMLTreeBuilder,
22 )
23 LXML_PRESENT = True
24except ImportError:
25 LXML_PRESENT = False
26
27
28class BuiltInRegistryTest(unittest.TestCase):
29 """Test the built-in registry with the default builders registered."""
30
31 def test_combination(self):
32 if LXML_PRESENT:
33 self.assertEqual(registry.lookup('fast', 'html'),
34 LXMLTreeBuilder)
35
36 if LXML_PRESENT:
37 self.assertEqual(registry.lookup('permissive', 'xml'),
38 LXMLTreeBuilderForXML)
39 self.assertEqual(registry.lookup('strict', 'html'),
40 HTMLParserTreeBuilder)
41 if HTML5LIB_PRESENT:
42 self.assertEqual(registry.lookup('html5lib', 'html'),
43 HTML5TreeBuilder)
44
45 def test_lookup_by_markup_type(self):
46 if LXML_PRESENT:
47 self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
48 self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
49 else:
50 self.assertEqual(registry.lookup('xml'), None)
51 if HTML5LIB_PRESENT:
52 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
53 else:
54 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
55
56 def test_named_library(self):
57 if LXML_PRESENT:
58 self.assertEqual(registry.lookup('lxml', 'xml'),
59 LXMLTreeBuilderForXML)
60 self.assertEqual(registry.lookup('lxml', 'html'),
61 LXMLTreeBuilder)
62 if HTML5LIB_PRESENT:
63 self.assertEqual(registry.lookup('html5lib'),
64 HTML5TreeBuilder)
65
66 self.assertEqual(registry.lookup('html.parser'),
67 HTMLParserTreeBuilder)
68
69 def test_beautifulsoup_constructor_does_lookup(self):
70 # You can pass in a string.
71 BeautifulSoup("", features="html")
72 # Or a list of strings.
73 BeautifulSoup("", features=["html", "fast"])
74
75 # You'll get an exception if BS can't find an appropriate
76 # builder.
77 self.assertRaises(ValueError, BeautifulSoup,
78 "", features="no-such-feature")
79
80class RegistryTest(unittest.TestCase):
81 """Test the TreeBuilderRegistry class in general."""
82
83 def setUp(self):
84 self.registry = TreeBuilderRegistry()
85
86 def builder_for_features(self, *feature_list):
87 cls = type('Builder_' + '_'.join(feature_list),
88 (object,), {'features' : feature_list})
89
90 self.registry.register(cls)
91 return cls
92
93 def test_register_with_no_features(self):
94 builder = self.builder_for_features()
95
96 # Since the builder advertises no features, you can't find it
97 # by looking up features.
98 self.assertEqual(self.registry.lookup('foo'), None)
99
100 # But you can find it by doing a lookup with no features, if
101 # this happens to be the only registered builder.
102 self.assertEqual(self.registry.lookup(), builder)
103
104 def test_register_with_features_makes_lookup_succeed(self):
105 builder = self.builder_for_features('foo', 'bar')
106 self.assertEqual(self.registry.lookup('foo'), builder)
107 self.assertEqual(self.registry.lookup('bar'), builder)
108
109 def test_lookup_fails_when_no_builder_implements_feature(self):
110 builder = self.builder_for_features('foo', 'bar')
111 self.assertEqual(self.registry.lookup('baz'), None)
112
113 def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
114 builder1 = self.builder_for_features('foo')
115 builder2 = self.builder_for_features('bar')
116 self.assertEqual(self.registry.lookup(), builder2)
117
118 def test_lookup_fails_when_no_tree_builders_registered(self):
119 self.assertEqual(self.registry.lookup(), None)
120
121 def test_lookup_gets_most_recent_builder_supporting_all_features(self):
122 has_one = self.builder_for_features('foo')
123 has_the_other = self.builder_for_features('bar')
124 has_both_early = self.builder_for_features('foo', 'bar', 'baz')
125 has_both_late = self.builder_for_features('foo', 'bar', 'quux')
126 lacks_one = self.builder_for_features('bar')
127 has_the_other = self.builder_for_features('foo')
128
129 # There are two builders featuring 'foo' and 'bar', but
130 # the one that also features 'quux' was registered later.
131 self.assertEqual(self.registry.lookup('foo', 'bar'),
132 has_both_late)
133
134 # There is only one builder featuring 'foo', 'bar', and 'baz'.
135 self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
136 has_both_early)
137
138 def test_lookup_fails_when_cannot_reconcile_requested_features(self):
139 builder1 = self.builder_for_features('foo', 'bar')
140 builder2 = self.builder_for_features('foo', 'baz')
141 self.assertEqual(self.registry.lookup('bar', 'baz'), None)