Blame - import-layers/yocto-poky/bitbake/lib/bs4/testing.py - mdmillerii/openbmc

blob: fd4495ac58c92f21a29d24c520738d55a908e0a2 [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	"""Helper classes for tests."""
				2
				3	import copy
				4	import functools
				5	import unittest
				6	from unittest import TestCase
				7	from bs4 import BeautifulSoup
				8	from bs4.element import (
				9	CharsetMetaAttributeValue,
				10	Comment,
				11	ContentMetaAttributeValue,
				12	Doctype,
				13	SoupStrainer,
				14	)
				15
				16	from bs4.builder import HTMLParserTreeBuilder
				17	default_builder = HTMLParserTreeBuilder
				18
				19
				20	class SoupTest(unittest.TestCase):
				21
				22	@property
				23	def default_builder(self):
				24	return default_builder()
				25
				26	def soup(self, markup, **kwargs):
				27	"""Build a Beautiful Soup object from markup."""
				28	builder = kwargs.pop('builder', self.default_builder)
				29	return BeautifulSoup(markup, builder=builder, **kwargs)
				30
				31	def document_for(self, markup):
				32	"""Turn an HTML fragment into a document.
				33
				34	The details depend on the builder.
				35	"""
				36	return self.default_builder.test_fragment_to_document(markup)
				37
				38	def assertSoupEquals(self, to_parse, compare_parsed_to=None):
				39	builder = self.default_builder
				40	obj = BeautifulSoup(to_parse, builder=builder)
				41	if compare_parsed_to is None:
				42	compare_parsed_to = to_parse
				43
				44	self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
				45
				46
				47	class HTMLTreeBuilderSmokeTest(object):
				48
				49	"""A basic test of a treebuilder's competence.
				50
				51	Any HTML treebuilder, present or future, should be able to pass
				52	these tests. With invalid markup, there's room for interpretation,
				53	and different parsers can handle it differently. But with the
				54	markup in these tests, there's not much room for interpretation.
				55	"""
				56
				57	def assertDoctypeHandled(self, doctype_fragment):
				58	"""Assert that a given doctype string is handled correctly."""
				59	doctype_str, soup = self._document_with_doctype(doctype_fragment)
				60
				61	# Make sure a Doctype object was created.
				62	doctype = soup.contents[0]
				63	self.assertEqual(doctype.__class__, Doctype)
				64	self.assertEqual(doctype, doctype_fragment)
				65	self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
				66
				67	# Make sure that the doctype was correctly associated with the
				68	# parse tree and that the rest of the document parsed.
				69	self.assertEqual(soup.p.contents[0], 'foo')
				70
				71	def _document_with_doctype(self, doctype_fragment):
				72	"""Generate and parse a document with the given doctype."""
				73	doctype = '<!DOCTYPE %s>' % doctype_fragment
				74	markup = doctype + '\n<p>foo</p>'
				75	soup = self.soup(markup)
				76	return doctype, soup
				77
				78	def test_normal_doctypes(self):
				79	"""Make sure normal, everyday HTML doctypes are handled correctly."""
				80	self.assertDoctypeHandled("html")
				81	self.assertDoctypeHandled(
				82	'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
				83
				84	def test_empty_doctype(self):
				85	soup = self.soup("<!DOCTYPE>")
				86	doctype = soup.contents[0]
				87	self.assertEqual("", doctype.strip())
				88
				89	def test_public_doctype_with_url(self):
				90	doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
				91	self.assertDoctypeHandled(doctype)
				92
				93	def test_system_doctype(self):
				94	self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
				95
				96	def test_namespaced_system_doctype(self):
				97	# We can handle a namespaced doctype with a system ID.
				98	self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
				99
				100	def test_namespaced_public_doctype(self):
				101	# Test a namespaced doctype with a public id.
				102	self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
				103
				104	def test_real_xhtml_document(self):
				105	"""A real XHTML document should come out more or less the same as it went in."""
				106	markup = b"""<?xml version="1.0" encoding="utf-8"?>
				107	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
				108	<html xmlns="http://www.w3.org/1999/xhtml">
				109	<head><title>Hello.</title></head>
				110	<body>Goodbye.</body>
				111	</html>"""
				112	soup = self.soup(markup)
				113	self.assertEqual(
				114	soup.encode("utf-8").replace(b"\n", b""),
				115	markup.replace(b"\n", b""))
				116
				117	def test_deepcopy(self):
				118	"""Make sure you can copy the tree builder.
				119
				120	This is important because the builder is part of a
				121	BeautifulSoup object, and we want to be able to copy that.
				122	"""
				123	copy.deepcopy(self.default_builder)
				124
				125	def test_p_tag_is_never_empty_element(self):
				126	"""A <p> tag is never designated as an empty-element tag.
				127
				128	Even if the markup shows it as an empty-element tag, it
				129	shouldn't be presented that way.
				130	"""
				131	soup = self.soup("<p/>")
				132	self.assertFalse(soup.p.is_empty_element)
				133	self.assertEqual(str(soup.p), "<p></p>")
				134
				135	def test_unclosed_tags_get_closed(self):
				136	"""A tag that's not closed by the end of the document should be closed.
				137
				138	This applies to all tags except empty-element tags.
				139	"""
				140	self.assertSoupEquals("<p>", "<p></p>")
				141	self.assertSoupEquals("<b>", "<b></b>")
				142
				143	self.assertSoupEquals("<br>", "<br/>")
				144
				145	def test_br_is_always_empty_element_tag(self):
				146	"""A <br> tag is designated as an empty-element tag.
				147
				148	Some parsers treat <br></br> as one <br/> tag, some parsers as
				149	two tags, but it should always be an empty-element tag.
				150	"""
				151	soup = self.soup("<br></br>")
				152	self.assertTrue(soup.br.is_empty_element)
				153	self.assertEqual(str(soup.br), "<br/>")
				154
				155	def test_nested_formatting_elements(self):
				156	self.assertSoupEquals("<em><em></em></em>")
				157
				158	def test_comment(self):
				159	# Comments are represented as Comment objects.
				160	markup = "<p>foo<!--foobar-->baz</p>"
				161	self.assertSoupEquals(markup)
				162
				163	soup = self.soup(markup)
				164	comment = soup.find(text="foobar")
				165	self.assertEqual(comment.__class__, Comment)
				166
				167	# The comment is properly integrated into the tree.
				168	foo = soup.find(text="foo")
				169	self.assertEqual(comment, foo.next_element)
				170	baz = soup.find(text="baz")
				171	self.assertEqual(comment, baz.previous_element)
				172
				173	def test_preserved_whitespace_in_pre_and_textarea(self):
				174	"""Whitespace must be preserved in <pre> and <textarea> tags."""
				175	self.assertSoupEquals("<pre> </pre>")
				176	self.assertSoupEquals("<textarea> woo </textarea>")
				177
				178	def test_nested_inline_elements(self):
				179	"""Inline elements can be nested indefinitely."""
				180	b_tag = "<b>Inside a B tag</b>"
				181	self.assertSoupEquals(b_tag)
				182
				183	nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
				184	self.assertSoupEquals(nested_b_tag)
				185
				186	double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
				187	self.assertSoupEquals(nested_b_tag)
				188
				189	def test_nested_block_level_elements(self):
				190	"""Block elements can be nested."""
				191	soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
				192	blockquote = soup.blockquote
				193	self.assertEqual(blockquote.p.b.string, 'Foo')
				194	self.assertEqual(blockquote.b.string, 'Foo')
				195
				196	def test_correctly_nested_tables(self):
				197	"""One table can go inside another one."""
				198	markup = ('<table id="1">'
				199	'<tr>'
				200	"<td>Here's another table:"
				201	'<table id="2">'
				202	'<tr><td>foo</td></tr>'
				203	'</table></td>')
				204
				205	self.assertSoupEquals(
				206	markup,
				207	'<table id="1"><tr><td>Here\'s another table:'
				208	'<table id="2"><tr><td>foo</td></tr></table>'
				209	'</td></tr></table>')
				210
				211	self.assertSoupEquals(
				212	"<table><thead><tr><td>Foo</td></tr></thead>"
				213	"<tbody><tr><td>Bar</td></tr></tbody>"
				214	"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
				215
				216	def test_deeply_nested_multivalued_attribute(self):
				217	# html5lib can set the attributes of the same tag many times
				218	# as it rearranges the tree. This has caused problems with
				219	# multivalued attributes.
				220	markup = '<table><div><div class="css"></div></div></table>'
				221	soup = self.soup(markup)
				222	self.assertEqual(["css"], soup.div.div['class'])
				223
				224	def test_angle_brackets_in_attribute_values_are_escaped(self):
				225	self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
				226
				227	def test_entities_in_attributes_converted_to_unicode(self):
				228	expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
				229	self.assertSoupEquals('<p id="piñata"></p>', expect)
				230	self.assertSoupEquals('<p id="piñata"></p>', expect)
				231	self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
				232	self.assertSoupEquals('<p id="piñata"></p>', expect)
				233
				234	def test_entities_in_text_converted_to_unicode(self):
				235	expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
				236	self.assertSoupEquals("<p>piñata</p>", expect)
				237	self.assertSoupEquals("<p>piñata</p>", expect)
				238	self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
				239	self.assertSoupEquals("<p>piñata</p>", expect)
				240
				241	def test_quot_entity_converted_to_quotation_mark(self):
				242	self.assertSoupEquals("<p>I said "good day!"</p>",
				243	'<p>I said "good day!"</p>')
				244
				245	def test_out_of_range_entity(self):
				246	expect = u"\N{REPLACEMENT CHARACTER}"
				247	self.assertSoupEquals("&#10000000000000;", expect)
				248	self.assertSoupEquals("&#x10000000000000;", expect)
				249	self.assertSoupEquals("&#1000000000;", expect)
				250
				251	def test_multipart_strings(self):
				252	"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
				253	soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
				254	self.assertEqual("p", soup.h2.string.next_element.name)
				255	self.assertEqual("p", soup.p.name)
				256
				257	def test_basic_namespaces(self):
				258	"""Parsers don't need to understand namespaces, but at the
				259	very least they should not choke on namespaces or lose
				260	data."""
				261
				262	markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
				263	soup = self.soup(markup)
				264	self.assertEqual(markup, soup.encode())
				265	html = soup.html
				266	self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
				267	self.assertEqual(
				268	'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
				269	self.assertEqual(
				270	'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
				271
				272	def test_multivalued_attribute_value_becomes_list(self):
				273	markup = b'<a class="foo bar">'
				274	soup = self.soup(markup)
				275	self.assertEqual(['foo', 'bar'], soup.a['class'])
				276
				277	#
				278	# Generally speaking, tests below this point are more tests of
				279	# Beautiful Soup than tests of the tree builders. But parsers are
				280	# weird, so we run these tests separately for every tree builder
				281	# to detect any differences between them.
				282	#
				283
				284	def test_can_parse_unicode_document(self):
				285	# A seemingly innocuous document... but it's in Unicode! And
				286	# it contains characters that can't be represented in the
				287	# encoding found in the declaration! The horror!
				288	markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
				289	soup = self.soup(markup)
				290	self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
				291
				292	def test_soupstrainer(self):
				293	"""Parsers should be able to work with SoupStrainers."""
				294	strainer = SoupStrainer("b")
				295	soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
				296	parse_only=strainer)
				297	self.assertEqual(soup.decode(), "<b>bold</b>")
				298
				299	def test_single_quote_attribute_values_become_double_quotes(self):
				300	self.assertSoupEquals("<foo attr='bar'></foo>",
				301	'<foo attr="bar"></foo>')
				302
				303	def test_attribute_values_with_nested_quotes_are_left_alone(self):
				304	text = """<foo attr='bar "brawls" happen'>a</foo>"""
				305	self.assertSoupEquals(text)
				306
				307	def test_attribute_values_with_double_nested_quotes_get_quoted(self):
				308	text = """<foo attr='bar "brawls" happen'>a</foo>"""
				309	soup = self.soup(text)
				310	soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
				311	self.assertSoupEquals(
				312	soup.foo.decode(),
				313	"""<foo attr="Brawls happen at "Bob\'s Bar"">a</foo>""")
				314
				315	def test_ampersand_in_attribute_value_gets_escaped(self):
				316	self.assertSoupEquals('<this is="really messed up & stuff"></this>',
				317	'<this is="really messed up & stuff"></this>')
				318
				319	self.assertSoupEquals(
				320	'<a href="http://example.org?a=1&b=2;3">foo</a>',
				321	'<a href="http://example.org?a=1&b=2;3">foo</a>')
				322
				323	def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
				324	self.assertSoupEquals('<a href="http://example.org?a=1&b=2;3"></a>')
				325
				326	def test_entities_in_strings_converted_during_parsing(self):
				327	# Both XML and HTML entities are converted to Unicode characters
				328	# during parsing.
				329	text = "<p><<sacré bleu!>></p>"
				330	expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
				331	self.assertSoupEquals(text, expected)
				332
				333	def test_smart_quotes_converted_on_the_way_in(self):
				334	# Microsoft smart quotes are converted to Unicode characters during
				335	# parsing.
				336	quote = b"<p>\x91Foo\x92</p>"
				337	soup = self.soup(quote)
				338	self.assertEqual(
				339	soup.p.string,
				340	u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
				341
				342	def test_non_breaking_spaces_converted_on_the_way_in(self):
				343	soup = self.soup("<a>  </a>")
				344	self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
				345
				346	def test_entities_converted_on_the_way_out(self):
				347	text = "<p><<sacré bleu!>></p>"
				348	expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
				349	soup = self.soup(text)
				350	self.assertEqual(soup.p.encode("utf-8"), expected)
				351
				352	def test_real_iso_latin_document(self):
				353	# Smoke test of interrelated functionality, using an
				354	# easy-to-understand document.
				355
				356	# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
				357	unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
				358
				359	# That's because we're going to encode it into ISO-Latin-1, and use
				360	# that to test.
				361	iso_latin_html = unicode_html.encode("iso-8859-1")
				362
				363	# Parse the ISO-Latin-1 HTML.
				364	soup = self.soup(iso_latin_html)
				365	# Encode it to UTF-8.
				366	result = soup.encode("utf-8")
				367
				368	# What do we expect the result to look like? Well, it would
				369	# look like unicode_html, except that the META tag would say
				370	# UTF-8 instead of ISO-Latin-1.
				371	expected = unicode_html.replace("ISO-Latin-1", "utf-8")
				372
				373	# And, of course, it would be in UTF-8, not Unicode.
				374	expected = expected.encode("utf-8")
				375
				376	# Ta-da!
				377	self.assertEqual(result, expected)
				378
				379	def test_real_shift_jis_document(self):
				380	# Smoke test to make sure the parser can handle a document in
				381	# Shift-JIS encoding, without choking.
				382	shift_jis_html = (
				383	b'<html><head></head><body><pre>'
				384	b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
				385	b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
				386	b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
				387	b'</pre></body></html>')
				388	unicode_html = shift_jis_html.decode("shift-jis")
				389	soup = self.soup(unicode_html)
				390
				391	# Make sure the parse tree is correctly encoded to various
				392	# encodings.
				393	self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
				394	self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
				395
				396	def test_real_hebrew_document(self):
				397	# A real-world test to make sure we can convert ISO-8859-9 (a
				398	# Hebrew encoding) to UTF-8.
				399	hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
				400	soup = self.soup(
				401	hebrew_document, from_encoding="iso8859-8")
				402	self.assertEqual(soup.original_encoding, 'iso8859-8')
				403	self.assertEqual(
				404	soup.encode('utf-8'),
				405	hebrew_document.decode("iso8859-8").encode("utf-8"))
				406
				407	def test_meta_tag_reflects_current_encoding(self):
				408	# Here's the <meta> tag saying that a document is
				409	# encoded in Shift-JIS.
				410	meta_tag = ('<meta content="text/html; charset=x-sjis" '
				411	'http-equiv="Content-type"/>')
				412
				413	# Here's a document incorporating that meta tag.
				414	shift_jis_html = (
				415	'<html><head>\n%s\n'
				416	'<meta http-equiv="Content-language" content="ja"/>'
				417	'</head><body>Shift-JIS markup goes here.') % meta_tag
				418	soup = self.soup(shift_jis_html)
				419
				420	# Parse the document, and the charset is seemingly unaffected.
				421	parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
				422	content = parsed_meta['content']
				423	self.assertEqual('text/html; charset=x-sjis', content)
				424
				425	# But that value is actually a ContentMetaAttributeValue object.
				426	self.assertTrue(isinstance(content, ContentMetaAttributeValue))
				427
				428	# And it will take on a value that reflects its current
				429	# encoding.
				430	self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
				431
				432	# For the rest of the story, see TestSubstitutions in
				433	# test_tree.py.
				434
				435	def test_html5_style_meta_tag_reflects_current_encoding(self):
				436	# Here's the <meta> tag saying that a document is
				437	# encoded in Shift-JIS.
				438	meta_tag = ('<meta id="encoding" charset="x-sjis" />')
				439
				440	# Here's a document incorporating that meta tag.
				441	shift_jis_html = (
				442	'<html><head>\n%s\n'
				443	'<meta http-equiv="Content-language" content="ja"/>'
				444	'</head><body>Shift-JIS markup goes here.') % meta_tag
				445	soup = self.soup(shift_jis_html)
				446
				447	# Parse the document, and the charset is seemingly unaffected.
				448	parsed_meta = soup.find('meta', id="encoding")
				449	charset = parsed_meta['charset']
				450	self.assertEqual('x-sjis', charset)
				451
				452	# But that value is actually a CharsetMetaAttributeValue object.
				453	self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
				454
				455	# And it will take on a value that reflects its current
				456	# encoding.
				457	self.assertEqual('utf8', charset.encode("utf8"))
				458
				459	def test_tag_with_no_attributes_can_have_attributes_added(self):
				460	data = self.soup("<a>text</a>")
				461	data.a['foo'] = 'bar'
				462	self.assertEqual('<a foo="bar">text</a>', data.a.decode())
				463
				464	class XMLTreeBuilderSmokeTest(object):
				465
				466	def test_docstring_generated(self):
				467	soup = self.soup("<root/>")
				468	self.assertEqual(
				469	soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
				470
				471	def test_real_xhtml_document(self):
				472	"""A real XHTML document should come out exactly the same as it went in."""
				473	markup = b"""<?xml version="1.0" encoding="utf-8"?>
				474	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
				475	<html xmlns="http://www.w3.org/1999/xhtml">
				476	<head><title>Hello.</title></head>
				477	<body>Goodbye.</body>
				478	</html>"""
				479	soup = self.soup(markup)
				480	self.assertEqual(
				481	soup.encode("utf-8"), markup)
				482
				483	def test_formatter_processes_script_tag_for_xml_documents(self):
				484	doc = """
				485	<script type="text/javascript">
				486	</script>
				487	"""
				488	soup = BeautifulSoup(doc, "xml")
				489	# lxml would have stripped this while parsing, but we can add
				490	# it later.
				491	soup.script.string = 'console.log("< < hey > > ");'
				492	encoded = soup.encode()
				493	self.assertTrue(b"< < hey > >" in encoded)
				494
				495	def test_can_parse_unicode_document(self):
				496	markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
				497	soup = self.soup(markup)
				498	self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
				499
				500	def test_popping_namespaced_tag(self):
				501	markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
				502	soup = self.soup(markup)
				503	self.assertEqual(
				504	unicode(soup.rss), markup)
				505
				506	def test_docstring_includes_correct_encoding(self):
				507	soup = self.soup("<root/>")
				508	self.assertEqual(
				509	soup.encode("latin1"),
				510	b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
				511
				512	def test_large_xml_document(self):
				513	"""A large XML document should come out the same as it went in."""
				514	markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
				515	+ b'0' * (2**12)
				516	+ b'</root>')
				517	soup = self.soup(markup)
				518	self.assertEqual(soup.encode("utf-8"), markup)
				519
				520
				521	def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
				522	self.assertSoupEquals("<p>", "<p/>")
				523	self.assertSoupEquals("<p>foo</p>")
				524
				525	def test_namespaces_are_preserved(self):
				526	markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
				527	soup = self.soup(markup)
				528	root = soup.root
				529	self.assertEqual("http://example.com/", root['xmlns:a'])
				530	self.assertEqual("http://example.net/", root['xmlns:b'])
				531
				532	def test_closing_namespaced_tag(self):
				533	markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
				534	soup = self.soup(markup)
				535	self.assertEqual(unicode(soup.p), markup)
				536
				537	def test_namespaced_attributes(self):
				538	markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
				539	soup = self.soup(markup)
				540	self.assertEqual(unicode(soup.foo), markup)
				541
				542	def test_namespaced_attributes_xml_namespace(self):
				543	markup = '<foo xml:lang="fr">bar</foo>'
				544	soup = self.soup(markup)
				545	self.assertEqual(unicode(soup.foo), markup)
				546
				547	class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
				548	"""Smoke test for a tree builder that supports HTML5."""
				549
				550	def test_real_xhtml_document(self):
				551	# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
				552	# XHTML documents in any particular way.
				553	pass
				554
				555	def test_html_tags_have_namespace(self):
				556	markup = "<a>"
				557	soup = self.soup(markup)
				558	self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
				559
				560	def test_svg_tags_have_namespace(self):
				561	markup = '<svg><circle/></svg>'
				562	soup = self.soup(markup)
				563	namespace = "http://www.w3.org/2000/svg"
				564	self.assertEqual(namespace, soup.svg.namespace)
				565	self.assertEqual(namespace, soup.circle.namespace)
				566
				567
				568	def test_mathml_tags_have_namespace(self):
				569	markup = '<math><msqrt>5</msqrt></math>'
				570	soup = self.soup(markup)
				571	namespace = 'http://www.w3.org/1998/Math/MathML'
				572	self.assertEqual(namespace, soup.math.namespace)
				573	self.assertEqual(namespace, soup.msqrt.namespace)
				574
				575	def test_xml_declaration_becomes_comment(self):
				576	markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
				577	soup = self.soup(markup)
				578	self.assertTrue(isinstance(soup.contents[0], Comment))
				579	self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
				580	self.assertEqual("html", soup.contents[0].next_element.name)
				581
				582	def skipIf(condition, reason):
				583	def nothing(test, args, *kwargs):
				584	return None
				585
				586	def decorator(test_item):
				587	if condition:
				588	return nothing
				589	else:
				590	return test_item
				591
				592	return decorator