Blame - yocto-poky/bitbake/lib/bs4/tests/test_tree.py - stefanberger/openbmc

blob: f8515c0ea1c06056471949c1fdf5a2dbb5dd12fd [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	# -- coding: utf-8 --
				2	"""Tests for Beautiful Soup's tree traversal methods.
				3
				4	The tree traversal methods are the main advantage of using Beautiful
				5	Soup over just using a parser.
				6
				7	Different parsers will build different Beautiful Soup trees given the
				8	same markup, but all Beautiful Soup trees can be traversed with the
				9	methods tested here.
				10	"""
				11
				12	import copy
				13	import pickle
				14	import re
				15	import warnings
				16	from bs4 import BeautifulSoup
				17	from bs4.builder import (
				18	builder_registry,
				19	HTMLParserTreeBuilder,
				20	)
				21	from bs4.element import (
				22	CData,
				23	Comment,
				24	Doctype,
				25	NavigableString,
				26	SoupStrainer,
				27	Tag,
				28	)
				29	from bs4.testing import (
				30	SoupTest,
				31	skipIf,
				32	)
				33
				34	XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
				35	LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
				36
				37	class TreeTest(SoupTest):
				38
				39	def assertSelects(self, tags, should_match):
				40	"""Make sure that the given tags have the correct text.
				41
				42	This is used in tests that define a bunch of tags, each
				43	containing a single string, and then select certain strings by
				44	some mechanism.
				45	"""
				46	self.assertEqual([tag.string for tag in tags], should_match)
				47
				48	def assertSelectsIDs(self, tags, should_match):
				49	"""Make sure that the given tags have the correct IDs.
				50
				51	This is used in tests that define a bunch of tags, each
				52	containing a single string, and then select certain strings by
				53	some mechanism.
				54	"""
				55	self.assertEqual([tag['id'] for tag in tags], should_match)
				56
				57
				58	class TestFind(TreeTest):
				59	"""Basic tests of the find() method.
				60
				61	find() just calls find_all() with limit=1, so it's not tested all
				62	that thouroughly here.
				63	"""
				64
				65	def test_find_tag(self):
				66	soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
				67	self.assertEqual(soup.find("b").string, "2")
				68
				69	def test_unicode_text_find(self):
				70	soup = self.soup(u'<h1>Räksmörgås</h1>')
				71	self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås')
				72
				73	def test_find_everything(self):
				74	"""Test an optimization that finds all tags."""
				75	soup = self.soup("<a>foo</a><b>bar</b>")
				76	self.assertEqual(2, len(soup.find_all()))
				77
				78	def test_find_everything_with_name(self):
				79	"""Test an optimization that finds all tags with a given name."""
				80	soup = self.soup("<a>foo</a><b>bar</b><a>baz</a>")
				81	self.assertEqual(2, len(soup.find_all('a')))
				82
				83	class TestFindAll(TreeTest):
				84	"""Basic tests of the find_all() method."""
				85
				86	def test_find_all_text_nodes(self):
				87	"""You can search the tree for text nodes."""
				88	soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
				89	# Exact match.
				90	self.assertEqual(soup.find_all(text="bar"), [u"bar"])
				91	# Match any of a number of strings.
				92	self.assertEqual(
				93	soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
				94	# Match a regular expression.
				95	self.assertEqual(soup.find_all(text=re.compile('.*')),
				96	[u"Foo", u"bar", u'\xbb'])
				97	# Match anything.
				98	self.assertEqual(soup.find_all(text=True),
				99	[u"Foo", u"bar", u'\xbb'])
				100
				101	def test_find_all_limit(self):
				102	"""You can limit the number of items returned by find_all."""
				103	soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
				104	self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
				105	self.assertSelects(soup.find_all('a', limit=1), ["1"])
				106	self.assertSelects(
				107	soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
				108
				109	# A limit of 0 means no limit.
				110	self.assertSelects(
				111	soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
				112
				113	def test_calling_a_tag_is_calling_findall(self):
				114	soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
				115	self.assertSelects(soup('a', limit=1), ["1"])
				116	self.assertSelects(soup.b(id="foo"), ["3"])
				117
				118	def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
				119	soup = self.soup("<a></a>")
				120	# Create a self-referential list.
				121	l = []
				122	l.append(l)
				123
				124	# Without special code in _normalize_search_value, this would cause infinite
				125	# recursion.
				126	self.assertEqual([], soup.find_all(l))
				127
				128	def test_find_all_resultset(self):
				129	"""All find_all calls return a ResultSet"""
				130	soup = self.soup("<a></a>")
				131	result = soup.find_all("a")
				132	self.assertTrue(hasattr(result, "source"))
				133
				134	result = soup.find_all(True)
				135	self.assertTrue(hasattr(result, "source"))
				136
				137	result = soup.find_all(text="foo")
				138	self.assertTrue(hasattr(result, "source"))
				139
				140
				141	class TestFindAllBasicNamespaces(TreeTest):
				142
				143	def test_find_by_namespaced_name(self):
				144	soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
				145	self.assertEqual("4", soup.find("mathml:msqrt").string)
				146	self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
				147
				148
				149	class TestFindAllByName(TreeTest):
				150	"""Test ways of finding tags by tag name."""
				151
				152	def setUp(self):
				153	super(TreeTest, self).setUp()
				154	self.tree = self.soup("""<a>First tag.</a>
				155	<b>Second tag.</b>
				156	<c>Third <a>Nested tag.</a> tag.</c>""")
				157
				158	def test_find_all_by_tag_name(self):
				159	# Find all the <a> tags.
				160	self.assertSelects(
				161	self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
				162
				163	def test_find_all_by_name_and_text(self):
				164	self.assertSelects(
				165	self.tree.find_all('a', text='First tag.'), ['First tag.'])
				166
				167	self.assertSelects(
				168	self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
				169
				170	self.assertSelects(
				171	self.tree.find_all('a', text=re.compile("tag")),
				172	['First tag.', 'Nested tag.'])
				173
				174
				175	def test_find_all_on_non_root_element(self):
				176	# You can call find_all on any node, not just the root.
				177	self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
				178
				179	def test_calling_element_invokes_find_all(self):
				180	self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
				181
				182	def test_find_all_by_tag_strainer(self):
				183	self.assertSelects(
				184	self.tree.find_all(SoupStrainer('a')),
				185	['First tag.', 'Nested tag.'])
				186
				187	def test_find_all_by_tag_names(self):
				188	self.assertSelects(
				189	self.tree.find_all(['a', 'b']),
				190	['First tag.', 'Second tag.', 'Nested tag.'])
				191
				192	def test_find_all_by_tag_dict(self):
				193	self.assertSelects(
				194	self.tree.find_all({'a' : True, 'b' : True}),
				195	['First tag.', 'Second tag.', 'Nested tag.'])
				196
				197	def test_find_all_by_tag_re(self):
				198	self.assertSelects(
				199	self.tree.find_all(re.compile('^[ab]$')),
				200	['First tag.', 'Second tag.', 'Nested tag.'])
				201
				202	def test_find_all_with_tags_matching_method(self):
				203	# You can define an oracle method that determines whether
				204	# a tag matches the search.
				205	def id_matches_name(tag):
				206	return tag.name == tag.get('id')
				207
				208	tree = self.soup("""<a id="a">Match 1.</a>
				209	<a id="1">Does not match.</a>
				210	<b id="b">Match 2.</a>""")
				211
				212	self.assertSelects(
				213	tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
				214
				215
				216	class TestFindAllByAttribute(TreeTest):
				217
				218	def test_find_all_by_attribute_name(self):
				219	# You can pass in keyword arguments to find_all to search by
				220	# attribute.
				221	tree = self.soup("""
				222	<a id="first">Matching a.</a>
				223	<a id="second">
				224	Non-matching <b id="first">Matching b.</b>a.
				225	</a>""")
				226	self.assertSelects(tree.find_all(id='first'),
				227	["Matching a.", "Matching b."])
				228
				229	def test_find_all_by_utf8_attribute_value(self):
				230	peace = u"םולש".encode("utf8")
				231	data = u'<a title="םולש"></a>'.encode("utf8")
				232	soup = self.soup(data)
				233	self.assertEqual([soup.a], soup.find_all(title=peace))
				234	self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
				235	self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
				236
				237	def test_find_all_by_attribute_dict(self):
				238	# You can pass in a dictionary as the argument 'attrs'. This
				239	# lets you search for attributes like 'name' (a fixed argument
				240	# to find_all) and 'class' (a reserved word in Python.)
				241	tree = self.soup("""
				242	<a name="name1" class="class1">Name match.</a>
				243	<a name="name2" class="class2">Class match.</a>
				244	<a name="name3" class="class3">Non-match.</a>
				245	<name1>A tag called 'name1'.</name1>
				246	""")
				247
				248	# This doesn't do what you want.
				249	self.assertSelects(tree.find_all(name='name1'),
				250	["A tag called 'name1'."])
				251	# This does what you want.
				252	self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
				253	["Name match."])
				254
				255	self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
				256	["Class match."])
				257
				258	def test_find_all_by_class(self):
				259	tree = self.soup("""
				260	<a class="1">Class 1.</a>
				261	<a class="2">Class 2.</a>
				262	<b class="1">Class 1.</b>
				263	<c class="3 4">Class 3 and 4.</c>
				264	""")
				265
				266	# Passing in the class_ keyword argument will search against
				267	# the 'class' attribute.
				268	self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.'])
				269	self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.'])
				270	self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.'])
				271
				272	# Passing in a string to 'attrs' will also search the CSS class.
				273	self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
				274	self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
				275	self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
				276	self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
				277
				278	def test_find_by_class_when_multiple_classes_present(self):
				279	tree = self.soup("<gar class='foo bar'>Found it</gar>")
				280
				281	f = tree.find_all("gar", class_=re.compile("o"))
				282	self.assertSelects(f, ["Found it"])
				283
				284	f = tree.find_all("gar", class_=re.compile("a"))
				285	self.assertSelects(f, ["Found it"])
				286
				287	# Since the class is not the string "foo bar", but the two
				288	# strings "foo" and "bar", this will not find anything.
				289	f = tree.find_all("gar", class_=re.compile("o b"))
				290	self.assertSelects(f, [])
				291
				292	def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
				293	soup = self.soup("<a class='bar'>Found it</a>")
				294
				295	self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
				296
				297	def big_attribute_value(value):
				298	return len(value) > 3
				299
				300	self.assertSelects(soup.find_all("a", big_attribute_value), [])
				301
				302	def small_attribute_value(value):
				303	return len(value) <= 3
				304
				305	self.assertSelects(
				306	soup.find_all("a", small_attribute_value), ["Found it"])
				307
				308	def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
				309	soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
				310	a, a2 = soup.find_all("a")
				311	self.assertEqual([a, a2], soup.find_all("a", "foo"))
				312	self.assertEqual([a], soup.find_all("a", "bar"))
				313
				314	# If you specify the class as a string that contains a
				315	# space, only that specific value will be found.
				316	self.assertEqual([a], soup.find_all("a", class_="foo bar"))
				317	self.assertEqual([a], soup.find_all("a", "foo bar"))
				318	self.assertEqual([], soup.find_all("a", "bar foo"))
				319
				320	def test_find_all_by_attribute_soupstrainer(self):
				321	tree = self.soup("""
				322	<a id="first">Match.</a>
				323	<a id="second">Non-match.</a>""")
				324
				325	strainer = SoupStrainer(attrs={'id' : 'first'})
				326	self.assertSelects(tree.find_all(strainer), ['Match.'])
				327
				328	def test_find_all_with_missing_atribute(self):
				329	# You can pass in None as the value of an attribute to find_all.
				330	# This will match tags that do not have that attribute set.
				331	tree = self.soup("""<a id="1">ID present.</a>
				332	<a>No ID present.</a>
				333	<a id="">ID is empty.</a>""")
				334	self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
				335
				336	def test_find_all_with_defined_attribute(self):
				337	# You can pass in None as the value of an attribute to find_all.
				338	# This will match tags that have that attribute set to any value.
				339	tree = self.soup("""<a id="1">ID present.</a>
				340	<a>No ID present.</a>
				341	<a id="">ID is empty.</a>""")
				342	self.assertSelects(
				343	tree.find_all(id=True), ["ID present.", "ID is empty."])
				344
				345	def test_find_all_with_numeric_attribute(self):
				346	# If you search for a number, it's treated as a string.
				347	tree = self.soup("""<a id=1>Unquoted attribute.</a>
				348	<a id="1">Quoted attribute.</a>""")
				349
				350	expected = ["Unquoted attribute.", "Quoted attribute."]
				351	self.assertSelects(tree.find_all(id=1), expected)
				352	self.assertSelects(tree.find_all(id="1"), expected)
				353
				354	def test_find_all_with_list_attribute_values(self):
				355	# You can pass a list of attribute values instead of just one,
				356	# and you'll get tags that match any of the values.
				357	tree = self.soup("""<a id="1">1</a>
				358	<a id="2">2</a>
				359	<a id="3">3</a>
				360	<a>No ID.</a>""")
				361	self.assertSelects(tree.find_all(id=["1", "3", "4"]),
				362	["1", "3"])
				363
				364	def test_find_all_with_regular_expression_attribute_value(self):
				365	# You can pass a regular expression as an attribute value, and
				366	# you'll get tags whose values for that attribute match the
				367	# regular expression.
				368	tree = self.soup("""<a id="a">One a.</a>
				369	<a id="aa">Two as.</a>
				370	<a id="ab">Mixed as and bs.</a>
				371	<a id="b">One b.</a>
				372	<a>No ID.</a>""")
				373
				374	self.assertSelects(tree.find_all(id=re.compile("^a+$")),
				375	["One a.", "Two as."])
				376
				377	def test_find_by_name_and_containing_string(self):
				378	soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
				379	a = soup.a
				380
				381	self.assertEqual([a], soup.find_all("a", text="foo"))
				382	self.assertEqual([], soup.find_all("a", text="bar"))
				383	self.assertEqual([], soup.find_all("a", text="bar"))
				384
				385	def test_find_by_name_and_containing_string_when_string_is_buried(self):
				386	soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
				387	self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
				388
				389	def test_find_by_attribute_and_containing_string(self):
				390	soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
				391	a = soup.a
				392
				393	self.assertEqual([a], soup.find_all(id=2, text="foo"))
				394	self.assertEqual([], soup.find_all(id=1, text="bar"))
				395
				396
				397
				398
				399	class TestIndex(TreeTest):
				400	"""Test Tag.index"""
				401	def test_index(self):
				402	tree = self.soup("""<div>
				403	<a>Identical</a>
				404	<b>Not identical</b>
				405	<a>Identical</a>
				406
				407	<c><d>Identical with child</d></c>
				408	<b>Also not identical</b>
				409	<c><d>Identical with child</d></c>
				410	</div>""")
				411	div = tree.div
				412	for i, element in enumerate(div.contents):
				413	self.assertEqual(i, div.index(element))
				414	self.assertRaises(ValueError, tree.index, 1)
				415
				416
				417	class TestParentOperations(TreeTest):
				418	"""Test navigation and searching through an element's parents."""
				419
				420	def setUp(self):
				421	super(TestParentOperations, self).setUp()
				422	self.tree = self.soup('''<ul id="empty"></ul>
				423	<ul id="top">
				424	<ul id="middle">
				425	<ul id="bottom">
				426	<b>Start here</b>
				427	</ul>
				428	</ul>''')
				429	self.start = self.tree.b
				430
				431
				432	def test_parent(self):
				433	self.assertEqual(self.start.parent['id'], 'bottom')
				434	self.assertEqual(self.start.parent.parent['id'], 'middle')
				435	self.assertEqual(self.start.parent.parent.parent['id'], 'top')
				436
				437	def test_parent_of_top_tag_is_soup_object(self):
				438	top_tag = self.tree.contents[0]
				439	self.assertEqual(top_tag.parent, self.tree)
				440
				441	def test_soup_object_has_no_parent(self):
				442	self.assertEqual(None, self.tree.parent)
				443
				444	def test_find_parents(self):
				445	self.assertSelectsIDs(
				446	self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
				447	self.assertSelectsIDs(
				448	self.start.find_parents('ul', id="middle"), ['middle'])
				449
				450	def test_find_parent(self):
				451	self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
				452	self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top')
				453
				454	def test_parent_of_text_element(self):
				455	text = self.tree.find(text="Start here")
				456	self.assertEqual(text.parent.name, 'b')
				457
				458	def test_text_element_find_parent(self):
				459	text = self.tree.find(text="Start here")
				460	self.assertEqual(text.find_parent('ul')['id'], 'bottom')
				461
				462	def test_parent_generator(self):
				463	parents = [parent['id'] for parent in self.start.parents
				464	if parent is not None and 'id' in parent.attrs]
				465	self.assertEqual(parents, ['bottom', 'middle', 'top'])
				466
				467
				468	class ProximityTest(TreeTest):
				469
				470	def setUp(self):
				471	super(TreeTest, self).setUp()
				472	self.tree = self.soup(
				473	'<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
				474
				475
				476	class TestNextOperations(ProximityTest):
				477
				478	def setUp(self):
				479	super(TestNextOperations, self).setUp()
				480	self.start = self.tree.b
				481
				482	def test_next(self):
				483	self.assertEqual(self.start.next_element, "One")
				484	self.assertEqual(self.start.next_element.next_element['id'], "2")
				485
				486	def test_next_of_last_item_is_none(self):
				487	last = self.tree.find(text="Three")
				488	self.assertEqual(last.next_element, None)
				489
				490	def test_next_of_root_is_none(self):
				491	# The document root is outside the next/previous chain.
				492	self.assertEqual(self.tree.next_element, None)
				493
				494	def test_find_all_next(self):
				495	self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
				496	self.start.find_all_next(id=3)
				497	self.assertSelects(self.start.find_all_next(id=3), ["Three"])
				498
				499	def test_find_next(self):
				500	self.assertEqual(self.start.find_next('b')['id'], '2')
				501	self.assertEqual(self.start.find_next(text="Three"), "Three")
				502
				503	def test_find_next_for_text_element(self):
				504	text = self.tree.find(text="One")
				505	self.assertEqual(text.find_next("b").string, "Two")
				506	self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
				507
				508	def test_next_generator(self):
				509	start = self.tree.find(text="Two")
				510	successors = [node for node in start.next_elements]
				511	# There are two successors: the final <b> tag and its text contents.
				512	tag, contents = successors
				513	self.assertEqual(tag['id'], '3')
				514	self.assertEqual(contents, "Three")
				515
				516	class TestPreviousOperations(ProximityTest):
				517
				518	def setUp(self):
				519	super(TestPreviousOperations, self).setUp()
				520	self.end = self.tree.find(text="Three")
				521
				522	def test_previous(self):
				523	self.assertEqual(self.end.previous_element['id'], "3")
				524	self.assertEqual(self.end.previous_element.previous_element, "Two")
				525
				526	def test_previous_of_first_item_is_none(self):
				527	first = self.tree.find('html')
				528	self.assertEqual(first.previous_element, None)
				529
				530	def test_previous_of_root_is_none(self):
				531	# The document root is outside the next/previous chain.
				532	# XXX This is broken!
				533	#self.assertEqual(self.tree.previous_element, None)
				534	pass
				535
				536	def test_find_all_previous(self):
				537	# The <b> tag containing the "Three" node is the predecessor
				538	# of the "Three" node itself, which is why "Three" shows up
				539	# here.
				540	self.assertSelects(
				541	self.end.find_all_previous('b'), ["Three", "Two", "One"])
				542	self.assertSelects(self.end.find_all_previous(id=1), ["One"])
				543
				544	def test_find_previous(self):
				545	self.assertEqual(self.end.find_previous('b')['id'], '3')
				546	self.assertEqual(self.end.find_previous(text="One"), "One")
				547
				548	def test_find_previous_for_text_element(self):
				549	text = self.tree.find(text="Three")
				550	self.assertEqual(text.find_previous("b").string, "Three")
				551	self.assertSelects(
				552	text.find_all_previous("b"), ["Three", "Two", "One"])
				553
				554	def test_previous_generator(self):
				555	start = self.tree.find(text="One")
				556	predecessors = [node for node in start.previous_elements]
				557
				558	# There are four predecessors: the <b> tag containing "One"
				559	# the <body> tag, the <head> tag, and the <html> tag.
				560	b, body, head, html = predecessors
				561	self.assertEqual(b['id'], '1')
				562	self.assertEqual(body.name, "body")
				563	self.assertEqual(head.name, "head")
				564	self.assertEqual(html.name, "html")
				565
				566
				567	class SiblingTest(TreeTest):
				568
				569	def setUp(self):
				570	super(SiblingTest, self).setUp()
				571	markup = '''<html>
				572	<span id="1">
				573	<span id="1.1"></span>
				574	</span>
				575	<span id="2">
				576	<span id="2.1"></span>
				577	</span>
				578	<span id="3">
				579	<span id="3.1"></span>
				580	</span>
				581	<span id="4"></span>
				582	</html>'''
				583	# All that whitespace looks good but makes the tests more
				584	# difficult. Get rid of it.
				585	markup = re.compile("\n\s*").sub("", markup)
				586	self.tree = self.soup(markup)
				587
				588
				589	class TestNextSibling(SiblingTest):
				590
				591	def setUp(self):
				592	super(TestNextSibling, self).setUp()
				593	self.start = self.tree.find(id="1")
				594
				595	def test_next_sibling_of_root_is_none(self):
				596	self.assertEqual(self.tree.next_sibling, None)
				597
				598	def test_next_sibling(self):
				599	self.assertEqual(self.start.next_sibling['id'], '2')
				600	self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
				601
				602	# Note the difference between next_sibling and next_element.
				603	self.assertEqual(self.start.next_element['id'], '1.1')
				604
				605	def test_next_sibling_may_not_exist(self):
				606	self.assertEqual(self.tree.html.next_sibling, None)
				607
				608	nested_span = self.tree.find(id="1.1")
				609	self.assertEqual(nested_span.next_sibling, None)
				610
				611	last_span = self.tree.find(id="4")
				612	self.assertEqual(last_span.next_sibling, None)
				613
				614	def test_find_next_sibling(self):
				615	self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
				616
				617	def test_next_siblings(self):
				618	self.assertSelectsIDs(self.start.find_next_siblings("span"),
				619	['2', '3', '4'])
				620
				621	self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
				622
				623	def test_next_sibling_for_text_element(self):
				624	soup = self.soup("Foo<b>bar</b>baz")
				625	start = soup.find(text="Foo")
				626	self.assertEqual(start.next_sibling.name, 'b')
				627	self.assertEqual(start.next_sibling.next_sibling, 'baz')
				628
				629	self.assertSelects(start.find_next_siblings('b'), ['bar'])
				630	self.assertEqual(start.find_next_sibling(text="baz"), "baz")
				631	self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
				632
				633
				634	class TestPreviousSibling(SiblingTest):
				635
				636	def setUp(self):
				637	super(TestPreviousSibling, self).setUp()
				638	self.end = self.tree.find(id="4")
				639
				640	def test_previous_sibling_of_root_is_none(self):
				641	self.assertEqual(self.tree.previous_sibling, None)
				642
				643	def test_previous_sibling(self):
				644	self.assertEqual(self.end.previous_sibling['id'], '3')
				645	self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
				646
				647	# Note the difference between previous_sibling and previous_element.
				648	self.assertEqual(self.end.previous_element['id'], '3.1')
				649
				650	def test_previous_sibling_may_not_exist(self):
				651	self.assertEqual(self.tree.html.previous_sibling, None)
				652
				653	nested_span = self.tree.find(id="1.1")
				654	self.assertEqual(nested_span.previous_sibling, None)
				655
				656	first_span = self.tree.find(id="1")
				657	self.assertEqual(first_span.previous_sibling, None)
				658
				659	def test_find_previous_sibling(self):
				660	self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
				661
				662	def test_previous_siblings(self):
				663	self.assertSelectsIDs(self.end.find_previous_siblings("span"),
				664	['3', '2', '1'])
				665
				666	self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
				667
				668	def test_previous_sibling_for_text_element(self):
				669	soup = self.soup("Foo<b>bar</b>baz")
				670	start = soup.find(text="baz")
				671	self.assertEqual(start.previous_sibling.name, 'b')
				672	self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
				673
				674	self.assertSelects(start.find_previous_siblings('b'), ['bar'])
				675	self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
				676	self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
				677
				678
				679	class TestTagCreation(SoupTest):
				680	"""Test the ability to create new tags."""
				681	def test_new_tag(self):
				682	soup = self.soup("")
				683	new_tag = soup.new_tag("foo", bar="baz")
				684	self.assertTrue(isinstance(new_tag, Tag))
				685	self.assertEqual("foo", new_tag.name)
				686	self.assertEqual(dict(bar="baz"), new_tag.attrs)
				687	self.assertEqual(None, new_tag.parent)
				688
				689	def test_tag_inherits_self_closing_rules_from_builder(self):
				690	if XML_BUILDER_PRESENT:
				691	xml_soup = BeautifulSoup("", "xml")
				692	xml_br = xml_soup.new_tag("br")
				693	xml_p = xml_soup.new_tag("p")
				694
				695	# Both the <br> and <p> tag are empty-element, just because
				696	# they have no contents.
				697	self.assertEqual(b"<br/>", xml_br.encode())
				698	self.assertEqual(b"<p/>", xml_p.encode())
				699
				700	html_soup = BeautifulSoup("", "html")
				701	html_br = html_soup.new_tag("br")
				702	html_p = html_soup.new_tag("p")
				703
				704	# The HTML builder users HTML's rules about which tags are
				705	# empty-element tags, and the new tags reflect these rules.
				706	self.assertEqual(b"<br/>", html_br.encode())
				707	self.assertEqual(b"<p></p>", html_p.encode())
				708
				709	def test_new_string_creates_navigablestring(self):
				710	soup = self.soup("")
				711	s = soup.new_string("foo")
				712	self.assertEqual("foo", s)
				713	self.assertTrue(isinstance(s, NavigableString))
				714
				715	def test_new_string_can_create_navigablestring_subclass(self):
				716	soup = self.soup("")
				717	s = soup.new_string("foo", Comment)
				718	self.assertEqual("foo", s)
				719	self.assertTrue(isinstance(s, Comment))
				720
				721	class TestTreeModification(SoupTest):
				722
				723	def test_attribute_modification(self):
				724	soup = self.soup('<a id="1"></a>')
				725	soup.a['id'] = 2
				726	self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
				727	del(soup.a['id'])
				728	self.assertEqual(soup.decode(), self.document_for('<a></a>'))
				729	soup.a['id2'] = 'foo'
				730	self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
				731
				732	def test_new_tag_creation(self):
				733	builder = builder_registry.lookup('html')()
				734	soup = self.soup("<body></body>", builder=builder)
				735	a = Tag(soup, builder, 'a')
				736	ol = Tag(soup, builder, 'ol')
				737	a['href'] = 'http://foo.com/'
				738	soup.body.insert(0, a)
				739	soup.body.insert(1, ol)
				740	self.assertEqual(
				741	soup.body.encode(),
				742	b'<body><a href="http://foo.com/"></a><ol></ol></body>')
				743
				744	def test_append_to_contents_moves_tag(self):
				745	doc = """<p id="1">Don't leave me <b>here</b>.</p>
				746	<p id="2">Don\'t leave!</p>"""
				747	soup = self.soup(doc)
				748	second_para = soup.find(id='2')
				749	bold = soup.b
				750
				751	# Move the <b> tag to the end of the second paragraph.
				752	soup.find(id='2').append(soup.b)
				753
				754	# The <b> tag is now a child of the second paragraph.
				755	self.assertEqual(bold.parent, second_para)
				756
				757	self.assertEqual(
				758	soup.decode(), self.document_for(
				759	'<p id="1">Don\'t leave me .</p>\n'
				760	'<p id="2">Don\'t leave!<b>here</b></p>'))
				761
				762	def test_replace_with_returns_thing_that_was_replaced(self):
				763	text = "<a></a><b><c></c></b>"
				764	soup = self.soup(text)
				765	a = soup.a
				766	new_a = a.replace_with(soup.c)
				767	self.assertEqual(a, new_a)
				768
				769	def test_unwrap_returns_thing_that_was_replaced(self):
				770	text = "<a><b></b><c></c></a>"
				771	soup = self.soup(text)
				772	a = soup.a
				773	new_a = a.unwrap()
				774	self.assertEqual(a, new_a)
				775
				776	def test_replace_tag_with_itself(self):
				777	text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
				778	soup = self.soup(text)
				779	c = soup.c
				780	soup.c.replace_with(c)
				781	self.assertEqual(soup.decode(), self.document_for(text))
				782
				783	def test_replace_tag_with_its_parent_raises_exception(self):
				784	text = "<a><b></b></a>"
				785	soup = self.soup(text)
				786	self.assertRaises(ValueError, soup.b.replace_with, soup.a)
				787
				788	def test_insert_tag_into_itself_raises_exception(self):
				789	text = "<a><b></b></a>"
				790	soup = self.soup(text)
				791	self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
				792
				793	def test_replace_with_maintains_next_element_throughout(self):
				794	soup = self.soup('<p><a>one</a><b>three</b></p>')
				795	a = soup.a
				796	b = a.contents[0]
				797	# Make it so the <a> tag has two text children.
				798	a.insert(1, "two")
				799
				800	# Now replace each one with the empty string.
				801	left, right = a.contents
				802	left.replaceWith('')
				803	right.replaceWith('')
				804
				805	# The <b> tag is still connected to the tree.
				806	self.assertEqual("three", soup.b.string)
				807
				808	def test_replace_final_node(self):
				809	soup = self.soup("<b>Argh!</b>")
				810	soup.find(text="Argh!").replace_with("Hooray!")
				811	new_text = soup.find(text="Hooray!")
				812	b = soup.b
				813	self.assertEqual(new_text.previous_element, b)
				814	self.assertEqual(new_text.parent, b)
				815	self.assertEqual(new_text.previous_element.next_element, new_text)
				816	self.assertEqual(new_text.next_element, None)
				817
				818	def test_consecutive_text_nodes(self):
				819	# A builder should never create two consecutive text nodes,
				820	# but if you insert one next to another, Beautiful Soup will
				821	# handle it correctly.
				822	soup = self.soup("<a><b>Argh!</b><c></c></a>")
				823	soup.b.insert(1, "Hooray!")
				824
				825	self.assertEqual(
				826	soup.decode(), self.document_for(
				827	"<a><b>Argh!Hooray!</b><c></c></a>"))
				828
				829	new_text = soup.find(text="Hooray!")
				830	self.assertEqual(new_text.previous_element, "Argh!")
				831	self.assertEqual(new_text.previous_element.next_element, new_text)
				832
				833	self.assertEqual(new_text.previous_sibling, "Argh!")
				834	self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
				835
				836	self.assertEqual(new_text.next_sibling, None)
				837	self.assertEqual(new_text.next_element, soup.c)
				838
				839	def test_insert_string(self):
				840	soup = self.soup("<a></a>")
				841	soup.a.insert(0, "bar")
				842	soup.a.insert(0, "foo")
				843	# The string were added to the tag.
				844	self.assertEqual(["foo", "bar"], soup.a.contents)
				845	# And they were converted to NavigableStrings.
				846	self.assertEqual(soup.a.contents[0].next_element, "bar")
				847
				848	def test_insert_tag(self):
				849	builder = self.default_builder
				850	soup = self.soup(
				851	"<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
				852	magic_tag = Tag(soup, builder, 'magictag')
				853	magic_tag.insert(0, "the")
				854	soup.a.insert(1, magic_tag)
				855
				856	self.assertEqual(
				857	soup.decode(), self.document_for(
				858	"<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
				859
				860	# Make sure all the relationships are hooked up correctly.
				861	b_tag = soup.b
				862	self.assertEqual(b_tag.next_sibling, magic_tag)
				863	self.assertEqual(magic_tag.previous_sibling, b_tag)
				864
				865	find = b_tag.find(text="Find")
				866	self.assertEqual(find.next_element, magic_tag)
				867	self.assertEqual(magic_tag.previous_element, find)
				868
				869	c_tag = soup.c
				870	self.assertEqual(magic_tag.next_sibling, c_tag)
				871	self.assertEqual(c_tag.previous_sibling, magic_tag)
				872
				873	the = magic_tag.find(text="the")
				874	self.assertEqual(the.parent, magic_tag)
				875	self.assertEqual(the.next_element, c_tag)
				876	self.assertEqual(c_tag.previous_element, the)
				877
				878	def test_append_child_thats_already_at_the_end(self):
				879	data = "<a><b></b></a>"
				880	soup = self.soup(data)
				881	soup.a.append(soup.b)
				882	self.assertEqual(data, soup.decode())
				883
				884	def test_move_tag_to_beginning_of_parent(self):
				885	data = "<a><b></b><c></c><d></d></a>"
				886	soup = self.soup(data)
				887	soup.a.insert(0, soup.d)
				888	self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
				889
				890	def test_insert_works_on_empty_element_tag(self):
				891	# This is a little strange, since most HTML parsers don't allow
				892	# markup like this to come through. But in general, we don't
				893	# know what the parser would or wouldn't have allowed, so
				894	# I'm letting this succeed for now.
				895	soup = self.soup("<br/>")
				896	soup.br.insert(1, "Contents")
				897	self.assertEqual(str(soup.br), "<br>Contents</br>")
				898
				899	def test_insert_before(self):
				900	soup = self.soup("<a>foo</a><b>bar</b>")
				901	soup.b.insert_before("BAZ")
				902	soup.a.insert_before("QUUX")
				903	self.assertEqual(
				904	soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
				905
				906	soup.a.insert_before(soup.b)
				907	self.assertEqual(
				908	soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
				909
				910	def test_insert_after(self):
				911	soup = self.soup("<a>foo</a><b>bar</b>")
				912	soup.b.insert_after("BAZ")
				913	soup.a.insert_after("QUUX")
				914	self.assertEqual(
				915	soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
				916	soup.b.insert_after(soup.a)
				917	self.assertEqual(
				918	soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
				919
				920	def test_insert_after_raises_exception_if_after_has_no_meaning(self):
				921	soup = self.soup("")
				922	tag = soup.new_tag("a")
				923	string = soup.new_string("")
				924	self.assertRaises(ValueError, string.insert_after, tag)
				925	self.assertRaises(NotImplementedError, soup.insert_after, tag)
				926	self.assertRaises(ValueError, tag.insert_after, tag)
				927
				928	def test_insert_before_raises_notimplementederror_if_before_has_no_meaning(self):
				929	soup = self.soup("")
				930	tag = soup.new_tag("a")
				931	string = soup.new_string("")
				932	self.assertRaises(ValueError, string.insert_before, tag)
				933	self.assertRaises(NotImplementedError, soup.insert_before, tag)
				934	self.assertRaises(ValueError, tag.insert_before, tag)
				935
				936	def test_replace_with(self):
				937	soup = self.soup(
				938	"<p>There's <b>no</b> business like <b>show</b> business</p>")
				939	no, show = soup.find_all('b')
				940	show.replace_with(no)
				941	self.assertEqual(
				942	soup.decode(),
				943	self.document_for(
				944	"<p>There's business like <b>no</b> business</p>"))
				945
				946	self.assertEqual(show.parent, None)
				947	self.assertEqual(no.parent, soup.p)
				948	self.assertEqual(no.next_element, "no")
				949	self.assertEqual(no.next_sibling, " business")
				950
				951	def test_replace_first_child(self):
				952	data = "<a><b></b><c></c></a>"
				953	soup = self.soup(data)
				954	soup.b.replace_with(soup.c)
				955	self.assertEqual("<a><c></c></a>", soup.decode())
				956
				957	def test_replace_last_child(self):
				958	data = "<a><b></b><c></c></a>"
				959	soup = self.soup(data)
				960	soup.c.replace_with(soup.b)
				961	self.assertEqual("<a><b></b></a>", soup.decode())
				962
				963	def test_nested_tag_replace_with(self):
				964	soup = self.soup(
				965	"""<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
				966
				967	# Replace the entire <b> tag and its contents ("reserve the
				968	# right") with the <f> tag ("refuse").
				969	remove_tag = soup.b
				970	move_tag = soup.f
				971	remove_tag.replace_with(move_tag)
				972
				973	self.assertEqual(
				974	soup.decode(), self.document_for(
				975	"<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
				976
				977	# The <b> tag is now an orphan.
				978	self.assertEqual(remove_tag.parent, None)
				979	self.assertEqual(remove_tag.find(text="right").next_element, None)
				980	self.assertEqual(remove_tag.previous_element, None)
				981	self.assertEqual(remove_tag.next_sibling, None)
				982	self.assertEqual(remove_tag.previous_sibling, None)
				983
				984	# The <f> tag is now connected to the <a> tag.
				985	self.assertEqual(move_tag.parent, soup.a)
				986	self.assertEqual(move_tag.previous_element, "We")
				987	self.assertEqual(move_tag.next_element.next_element, soup.e)
				988	self.assertEqual(move_tag.next_sibling, None)
				989
				990	# The gap where the <f> tag used to be has been mended, and
				991	# the word "to" is now connected to the <g> tag.
				992	to_text = soup.find(text="to")
				993	g_tag = soup.g
				994	self.assertEqual(to_text.next_element, g_tag)
				995	self.assertEqual(to_text.next_sibling, g_tag)
				996	self.assertEqual(g_tag.previous_element, to_text)
				997	self.assertEqual(g_tag.previous_sibling, to_text)
				998
				999	def test_unwrap(self):
				1000	tree = self.soup("""
				1001	<p>Unneeded <em>formatting</em> is unneeded</p>
				1002	""")
				1003	tree.em.unwrap()
				1004	self.assertEqual(tree.em, None)
				1005	self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
				1006
				1007	def test_wrap(self):
				1008	soup = self.soup("I wish I was bold.")
				1009	value = soup.string.wrap(soup.new_tag("b"))
				1010	self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
				1011	self.assertEqual(
				1012	soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
				1013
				1014	def test_wrap_extracts_tag_from_elsewhere(self):
				1015	soup = self.soup("<b></b>I wish I was bold.")
				1016	soup.b.next_sibling.wrap(soup.b)
				1017	self.assertEqual(
				1018	soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
				1019
				1020	def test_wrap_puts_new_contents_at_the_end(self):
				1021	soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
				1022	soup.b.next_sibling.wrap(soup.b)
				1023	self.assertEqual(2, len(soup.b.contents))
				1024	self.assertEqual(
				1025	soup.decode(), self.document_for(
				1026	"<b>I like being bold.I wish I was bold.</b>"))
				1027
				1028	def test_extract(self):
				1029	soup = self.soup(
				1030	'<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
				1031
				1032	self.assertEqual(len(soup.body.contents), 3)
				1033	extracted = soup.find(id="nav").extract()
				1034
				1035	self.assertEqual(
				1036	soup.decode(), "<html><body>Some content. More content.</body></html>")
				1037	self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
				1038
				1039	# The extracted tag is now an orphan.
				1040	self.assertEqual(len(soup.body.contents), 2)
				1041	self.assertEqual(extracted.parent, None)
				1042	self.assertEqual(extracted.previous_element, None)
				1043	self.assertEqual(extracted.next_element.next_element, None)
				1044
				1045	# The gap where the extracted tag used to be has been mended.
				1046	content_1 = soup.find(text="Some content. ")
				1047	content_2 = soup.find(text=" More content.")
				1048	self.assertEqual(content_1.next_element, content_2)
				1049	self.assertEqual(content_1.next_sibling, content_2)
				1050	self.assertEqual(content_2.previous_element, content_1)
				1051	self.assertEqual(content_2.previous_sibling, content_1)
				1052
				1053	def test_extract_distinguishes_between_identical_strings(self):
				1054	soup = self.soup("<a>foo</a><b>bar</b>")
				1055	foo_1 = soup.a.string
				1056	bar_1 = soup.b.string
				1057	foo_2 = soup.new_string("foo")
				1058	bar_2 = soup.new_string("bar")
				1059	soup.a.append(foo_2)
				1060	soup.b.append(bar_2)
				1061
				1062	# Now there are two identical strings in the <a> tag, and two
				1063	# in the <b> tag. Let's remove the first "foo" and the second
				1064	# "bar".
				1065	foo_1.extract()
				1066	bar_2.extract()
				1067	self.assertEqual(foo_2, soup.a.string)
				1068	self.assertEqual(bar_2, soup.b.string)
				1069
				1070	def test_clear(self):
				1071	"""Tag.clear()"""
				1072	soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
				1073	# clear using extract()
				1074	a = soup.a
				1075	soup.p.clear()
				1076	self.assertEqual(len(soup.p.contents), 0)
				1077	self.assertTrue(hasattr(a, "contents"))
				1078
				1079	# clear using decompose()
				1080	em = a.em
				1081	a.clear(decompose=True)
				1082	self.assertEqual(0, len(em.contents))
				1083
				1084	def test_string_set(self):
				1085	"""Tag.string = 'string'"""
				1086	soup = self.soup("<a></a> <b><c></c></b>")
				1087	soup.a.string = "foo"
				1088	self.assertEqual(soup.a.contents, ["foo"])
				1089	soup.b.string = "bar"
				1090	self.assertEqual(soup.b.contents, ["bar"])
				1091
				1092	def test_string_set_does_not_affect_original_string(self):
				1093	soup = self.soup("<a><b>foo</b><c>bar</c>")
				1094	soup.b.string = soup.c.string
				1095	self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
				1096
				1097	def test_set_string_preserves_class_of_string(self):
				1098	soup = self.soup("<a></a>")
				1099	cdata = CData("foo")
				1100	soup.a.string = cdata
				1101	self.assertTrue(isinstance(soup.a.string, CData))
				1102
				1103	class TestElementObjects(SoupTest):
				1104	"""Test various features of element objects."""
				1105
				1106	def test_len(self):
				1107	"""The length of an element is its number of children."""
				1108	soup = self.soup("<top>1<b>2</b>3</top>")
				1109
				1110	# The BeautifulSoup object itself contains one element: the
				1111	# <top> tag.
				1112	self.assertEqual(len(soup.contents), 1)
				1113	self.assertEqual(len(soup), 1)
				1114
				1115	# The <top> tag contains three elements: the text node "1", the
				1116	# <b> tag, and the text node "3".
				1117	self.assertEqual(len(soup.top), 3)
				1118	self.assertEqual(len(soup.top.contents), 3)
				1119
				1120	def test_member_access_invokes_find(self):
				1121	"""Accessing a Python member .foo invokes find('foo')"""
				1122	soup = self.soup('<b><i></i></b>')
				1123	self.assertEqual(soup.b, soup.find('b'))
				1124	self.assertEqual(soup.b.i, soup.find('b').find('i'))
				1125	self.assertEqual(soup.a, None)
				1126
				1127	def test_deprecated_member_access(self):
				1128	soup = self.soup('<b><i></i></b>')
				1129	with warnings.catch_warnings(record=True) as w:
				1130	tag = soup.bTag
				1131	self.assertEqual(soup.b, tag)
				1132	self.assertEqual(
				1133	'.bTag is deprecated, use .find("b") instead.',
				1134	str(w[0].message))
				1135
				1136	def test_has_attr(self):
				1137	"""has_attr() checks for the presence of an attribute.
				1138
				1139	Please note note: has_attr() is different from
				1140	__in__. has_attr() checks the tag's attributes and __in__
				1141	checks the tag's chidlren.
				1142	"""
				1143	soup = self.soup("<foo attr='bar'>")
				1144	self.assertTrue(soup.foo.has_attr('attr'))
				1145	self.assertFalse(soup.foo.has_attr('attr2'))
				1146
				1147
				1148	def test_attributes_come_out_in_alphabetical_order(self):
				1149	markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
				1150	self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
				1151
				1152	def test_string(self):
				1153	# A tag that contains only a text node makes that node
				1154	# available as .string.
				1155	soup = self.soup("<b>foo</b>")
				1156	self.assertEqual(soup.b.string, 'foo')
				1157
				1158	def test_empty_tag_has_no_string(self):
				1159	# A tag with no children has no .stirng.
				1160	soup = self.soup("<b></b>")
				1161	self.assertEqual(soup.b.string, None)
				1162
				1163	def test_tag_with_multiple_children_has_no_string(self):
				1164	# A tag with no children has no .string.
				1165	soup = self.soup("<a>foo<b></b><b></b></b>")
				1166	self.assertEqual(soup.b.string, None)
				1167
				1168	soup = self.soup("<a>foo<b></b>bar</b>")
				1169	self.assertEqual(soup.b.string, None)
				1170
				1171	# Even if all the children are strings, due to trickery,
				1172	# it won't work--but this would be a good optimization.
				1173	soup = self.soup("<a>foo</b>")
				1174	soup.a.insert(1, "bar")
				1175	self.assertEqual(soup.a.string, None)
				1176
				1177	def test_tag_with_recursive_string_has_string(self):
				1178	# A tag with a single child which has a .string inherits that
				1179	# .string.
				1180	soup = self.soup("<a><b>foo</b></a>")
				1181	self.assertEqual(soup.a.string, "foo")
				1182	self.assertEqual(soup.string, "foo")
				1183
				1184	def test_lack_of_string(self):
				1185	"""Only a tag containing a single text node has a .string."""
				1186	soup = self.soup("<b>f<i>e</i>o</b>")
				1187	self.assertFalse(soup.b.string)
				1188
				1189	soup = self.soup("<b></b>")
				1190	self.assertFalse(soup.b.string)
				1191
				1192	def test_all_text(self):
				1193	"""Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
				1194	soup = self.soup("<a>a<b>r</b> <r> t </r></a>")
				1195	self.assertEqual(soup.a.text, "ar t ")
				1196	self.assertEqual(soup.a.get_text(strip=True), "art")
				1197	self.assertEqual(soup.a.get_text(","), "a,r, , t ")
				1198	self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
				1199
				1200	def test_get_text_ignores_comments(self):
				1201	soup = self.soup("foo<!--IGNORE-->bar")
				1202	self.assertEqual(soup.get_text(), "foobar")
				1203
				1204	self.assertEqual(
				1205	soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
				1206	self.assertEqual(
				1207	soup.get_text(types=None), "fooIGNOREbar")
				1208
				1209	def test_all_strings_ignores_comments(self):
				1210	soup = self.soup("foo<!--IGNORE-->bar")
				1211	self.assertEqual(['foo', 'bar'], list(soup.strings))
				1212
				1213	class TestCDAtaListAttributes(SoupTest):
				1214
				1215	"""Testing cdata-list attributes like 'class'.
				1216	"""
				1217	def test_single_value_becomes_list(self):
				1218	soup = self.soup("<a class='foo'>")
				1219	self.assertEqual(["foo"],soup.a['class'])
				1220
				1221	def test_multiple_values_becomes_list(self):
				1222	soup = self.soup("<a class='foo bar'>")
				1223	self.assertEqual(["foo", "bar"], soup.a['class'])
				1224
				1225	def test_multiple_values_separated_by_weird_whitespace(self):
				1226	soup = self.soup("<a class='foo\tbar\nbaz'>")
				1227	self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
				1228
				1229	def test_attributes_joined_into_string_on_output(self):
				1230	soup = self.soup("<a class='foo\tbar'>")
				1231	self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
				1232
				1233	def test_accept_charset(self):
				1234	soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
				1235	self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
				1236
				1237	def test_cdata_attribute_applying_only_to_one_tag(self):
				1238	data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
				1239	soup = self.soup(data)
				1240	# We saw in another test that accept-charset is a cdata-list
				1241	# attribute for the <form> tag. But it's not a cdata-list
				1242	# attribute for any other tag.
				1243	self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
				1244
				1245	def test_string_has_immutable_name_property(self):
				1246	string = self.soup("s").string
				1247	self.assertEqual(None, string.name)
				1248	def t():
				1249	string.name = 'foo'
				1250	self.assertRaises(AttributeError, t)
				1251
				1252	class TestPersistence(SoupTest):
				1253	"Testing features like pickle and deepcopy."
				1254
				1255	def setUp(self):
				1256	super(TestPersistence, self).setUp()
				1257	self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
				1258	"http://www.w3.org/TR/REC-html40/transitional.dtd">
				1259	<html>
				1260	<head>
				1261	<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
				1262	<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
				1263	<link rev="made" href="mailto:leonardr@segfault.org">
				1264	<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
				1265	<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
				1266	<meta name="author" content="Leonard Richardson">
				1267	</head>
				1268	<body>
				1269	<a href="foo">foo</a>
				1270	<a href="foo"><b>bar</b></a>
				1271	</body>
				1272	</html>"""
				1273	self.tree = self.soup(self.page)
				1274
				1275	def test_pickle_and_unpickle_identity(self):
				1276	# Pickling a tree, then unpickling it, yields a tree identical
				1277	# to the original.
				1278	dumped = pickle.dumps(self.tree, 2)
				1279	loaded = pickle.loads(dumped)
				1280	self.assertEqual(loaded.__class__, BeautifulSoup)
				1281	self.assertEqual(loaded.decode(), self.tree.decode())
				1282
				1283	def test_deepcopy_identity(self):
				1284	# Making a deepcopy of a tree yields an identical tree.
				1285	copied = copy.deepcopy(self.tree)
				1286	self.assertEqual(copied.decode(), self.tree.decode())
				1287
				1288	def test_unicode_pickle(self):
				1289	# A tree containing Unicode characters can be pickled.
				1290	html = u"<b>\N{SNOWMAN}</b>"
				1291	soup = self.soup(html)
				1292	dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
				1293	loaded = pickle.loads(dumped)
				1294	self.assertEqual(loaded.decode(), soup.decode())
				1295
				1296
				1297	class TestSubstitutions(SoupTest):
				1298
				1299	def test_default_formatter_is_minimal(self):
				1300	markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
				1301	soup = self.soup(markup)
				1302	decoded = soup.decode(formatter="minimal")
				1303	# The < is converted back into < but the e-with-acute is left alone.
				1304	self.assertEqual(
				1305	decoded,
				1306	self.document_for(
				1307	u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
				1308
				1309	def test_formatter_html(self):
				1310	markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
				1311	soup = self.soup(markup)
				1312	decoded = soup.decode(formatter="html")
				1313	self.assertEqual(
				1314	decoded,
				1315	self.document_for("<b><<Sacré bleu!>></b>"))
				1316
				1317	def test_formatter_minimal(self):
				1318	markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
				1319	soup = self.soup(markup)
				1320	decoded = soup.decode(formatter="minimal")
				1321	# The < is converted back into < but the e-with-acute is left alone.
				1322	self.assertEqual(
				1323	decoded,
				1324	self.document_for(
				1325	u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
				1326
				1327	def test_formatter_null(self):
				1328	markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
				1329	soup = self.soup(markup)
				1330	decoded = soup.decode(formatter=None)
				1331	# Neither the angle brackets nor the e-with-acute are converted.
				1332	# This is not valid HTML, but it's what the user wanted.
				1333	self.assertEqual(decoded,
				1334	self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
				1335
				1336	def test_formatter_custom(self):
				1337	markup = u"<b><foo></b><b>bar</b>"
				1338	soup = self.soup(markup)
				1339	decoded = soup.decode(formatter = lambda x: x.upper())
				1340	# Instead of normal entity conversion code, the custom
				1341	# callable is called on every string.
				1342	self.assertEqual(
				1343	decoded,
				1344	self.document_for(u"<b><FOO></b><b>BAR</b>"))
				1345
				1346	def test_formatter_is_run_on_attribute_values(self):
				1347	markup = u'<a href="http://a.com?a=b&c=é">e</a>'
				1348	soup = self.soup(markup)
				1349	a = soup.a
				1350
				1351	expect_minimal = u'<a href="http://a.com?a=b&c=é">e</a>'
				1352
				1353	self.assertEqual(expect_minimal, a.decode())
				1354	self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
				1355
				1356	expect_html = u'<a href="http://a.com?a=b&c=é">e</a>'
				1357	self.assertEqual(expect_html, a.decode(formatter="html"))
				1358
				1359	self.assertEqual(markup, a.decode(formatter=None))
				1360	expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
				1361	self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
				1362
				1363	def test_formatter_skips_script_tag_for_html_documents(self):
				1364	doc = """
				1365	<script type="text/javascript">
				1366	console.log("< < hey > > ");
				1367	</script>
				1368	"""
				1369	encoded = BeautifulSoup(doc).encode()
				1370	self.assertTrue(b"< < hey > >" in encoded)
				1371
				1372	def test_formatter_skips_style_tag_for_html_documents(self):
				1373	doc = """
				1374	<style type="text/css">
				1375	console.log("< < hey > > ");
				1376	</style>
				1377	"""
				1378	encoded = BeautifulSoup(doc).encode()
				1379	self.assertTrue(b"< < hey > >" in encoded)
				1380
				1381	def test_prettify_leaves_preformatted_text_alone(self):
				1382	soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ")
				1383	# Everything outside the <pre> tag is reformatted, but everything
				1384	# inside is left alone.
				1385	self.assertEqual(
				1386	u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
				1387	soup.div.prettify())
				1388
				1389	def test_prettify_accepts_formatter(self):
				1390	soup = BeautifulSoup("<html><body>foo</body></html>")
				1391	pretty = soup.prettify(formatter = lambda x: x.upper())
				1392	self.assertTrue("FOO" in pretty)
				1393
				1394	def test_prettify_outputs_unicode_by_default(self):
				1395	soup = self.soup("<a></a>")
				1396	self.assertEqual(unicode, type(soup.prettify()))
				1397
				1398	def test_prettify_can_encode_data(self):
				1399	soup = self.soup("<a></a>")
				1400	self.assertEqual(bytes, type(soup.prettify("utf-8")))
				1401
				1402	def test_html_entity_substitution_off_by_default(self):
				1403	markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
				1404	soup = self.soup(markup)
				1405	encoded = soup.b.encode("utf-8")
				1406	self.assertEqual(encoded, markup.encode('utf-8'))
				1407
				1408	def test_encoding_substitution(self):
				1409	# Here's the <meta> tag saying that a document is
				1410	# encoded in Shift-JIS.
				1411	meta_tag = ('<meta content="text/html; charset=x-sjis" '
				1412	'http-equiv="Content-type"/>')
				1413	soup = self.soup(meta_tag)
				1414
				1415	# Parse the document, and the charset apprears unchanged.
				1416	self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
				1417
				1418	# Encode the document into some encoding, and the encoding is
				1419	# substituted into the meta tag.
				1420	utf_8 = soup.encode("utf-8")
				1421	self.assertTrue(b"charset=utf-8" in utf_8)
				1422
				1423	euc_jp = soup.encode("euc_jp")
				1424	self.assertTrue(b"charset=euc_jp" in euc_jp)
				1425
				1426	shift_jis = soup.encode("shift-jis")
				1427	self.assertTrue(b"charset=shift-jis" in shift_jis)
				1428
				1429	utf_16_u = soup.encode("utf-16").decode("utf-16")
				1430	self.assertTrue("charset=utf-16" in utf_16_u)
				1431
				1432	def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
				1433	markup = ('<head><meta content="text/html; charset=x-sjis" '
				1434	'http-equiv="Content-type"/></head><pre>foo</pre>')
				1435
				1436	# Beautiful Soup used to try to rewrite the meta tag even if the
				1437	# meta tag got filtered out by the strainer. This test makes
				1438	# sure that doesn't happen.
				1439	strainer = SoupStrainer('pre')
				1440	soup = self.soup(markup, parse_only=strainer)
				1441	self.assertEqual(soup.contents[0].name, 'pre')
				1442
				1443	class TestEncoding(SoupTest):
				1444	"""Test the ability to encode objects into strings."""
				1445
				1446	def test_unicode_string_can_be_encoded(self):
				1447	html = u"<b>\N{SNOWMAN}</b>"
				1448	soup = self.soup(html)
				1449	self.assertEqual(soup.b.string.encode("utf-8"),
				1450	u"\N{SNOWMAN}".encode("utf-8"))
				1451
				1452	def test_tag_containing_unicode_string_can_be_encoded(self):
				1453	html = u"<b>\N{SNOWMAN}</b>"
				1454	soup = self.soup(html)
				1455	self.assertEqual(
				1456	soup.b.encode("utf-8"), html.encode("utf-8"))
				1457
				1458	def test_encoding_substitutes_unrecognized_characters_by_default(self):
				1459	html = u"<b>\N{SNOWMAN}</b>"
				1460	soup = self.soup(html)
				1461	self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>")
				1462
				1463	def test_encoding_can_be_made_strict(self):
				1464	html = u"<b>\N{SNOWMAN}</b>"
				1465	soup = self.soup(html)
				1466	self.assertRaises(
				1467	UnicodeEncodeError, soup.encode, "ascii", errors="strict")
				1468
				1469	def test_decode_contents(self):
				1470	html = u"<b>\N{SNOWMAN}</b>"
				1471	soup = self.soup(html)
				1472	self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
				1473
				1474	def test_encode_contents(self):
				1475	html = u"<b>\N{SNOWMAN}</b>"
				1476	soup = self.soup(html)
				1477	self.assertEqual(
				1478	u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
				1479	encoding="utf8"))
				1480
				1481	def test_deprecated_renderContents(self):
				1482	html = u"<b>\N{SNOWMAN}</b>"
				1483	soup = self.soup(html)
				1484	self.assertEqual(
				1485	u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
				1486
				1487	class TestNavigableStringSubclasses(SoupTest):
				1488
				1489	def test_cdata(self):
				1490	# None of the current builders turn CDATA sections into CData
				1491	# objects, but you can create them manually.
				1492	soup = self.soup("")
				1493	cdata = CData("foo")
				1494	soup.insert(1, cdata)
				1495	self.assertEqual(str(soup), "<![CDATA[foo]]>")
				1496	self.assertEqual(soup.find(text="foo"), "foo")
				1497	self.assertEqual(soup.contents[0], "foo")
				1498
				1499	def test_cdata_is_never_formatted(self):
				1500	"""Text inside a CData object is passed into the formatter.
				1501
				1502	But the return value is ignored.
				1503	"""
				1504
				1505	self.count = 0
				1506	def increment(*args):
				1507	self.count += 1
				1508	return "BITTER FAILURE"
				1509
				1510	soup = self.soup("")
				1511	cdata = CData("<><><>")
				1512	soup.insert(1, cdata)
				1513	self.assertEqual(
				1514	b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
				1515	self.assertEqual(1, self.count)
				1516
				1517	def test_doctype_ends_in_newline(self):
				1518	# Unlike other NavigableString subclasses, a DOCTYPE always ends
				1519	# in a newline.
				1520	doctype = Doctype("foo")
				1521	soup = self.soup("")
				1522	soup.insert(1, doctype)
				1523	self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
				1524
				1525
				1526	class TestSoupSelector(TreeTest):
				1527
				1528	HTML = """
				1529	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
				1530	"http://www.w3.org/TR/html4/strict.dtd">
				1531	<html>
				1532	<head>
				1533	<title>The title</title>
				1534	<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
				1535	</head>
				1536	<body>
				1537
				1538	<div id="main" class="fancy">
				1539	<div id="inner">
				1540	<h1 id="header1">An H1</h1>
				1541	<p>Some text</p>
				1542	<p class="onep" id="p1">Some more text</p>
				1543	<h2 id="header2">An H2</h2>
				1544	<p class="class1 class2 class3" id="pmulti">Another</p>
				1545	<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
				1546	<h2 id="header3">Another H2</h2>
				1547	<a id="me" href="http://simonwillison.net/" rel="me">me</a>
				1548	<span class="s1">
				1549	<a href="#" id="s1a1">span1a1</a>
				1550	<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
				1551	<span class="span2">
				1552	<a href="#" id="s2a1">span2a1</a>
				1553	</span>
				1554	<span class="span3"></span>
				1555	</span>
				1556	</div>
				1557	<p lang="en" id="lang-en">English</p>
				1558	<p lang="en-gb" id="lang-en-gb">English UK</p>
				1559	<p lang="en-us" id="lang-en-us">English US</p>
				1560	<p lang="fr" id="lang-fr">French</p>
				1561	</div>
				1562
				1563	<div id="footer">
				1564	</div>
				1565	"""
				1566
				1567	def setUp(self):
				1568	self.soup = BeautifulSoup(self.HTML)
				1569
				1570	def assertSelects(self, selector, expected_ids):
				1571	el_ids = [el['id'] for el in self.soup.select(selector)]
				1572	el_ids.sort()
				1573	expected_ids.sort()
				1574	self.assertEqual(expected_ids, el_ids,
				1575	"Selector %s, expected [%s], got [%s]" % (
				1576	selector, ', '.join(expected_ids), ', '.join(el_ids)
				1577	)
				1578	)
				1579
				1580	assertSelect = assertSelects
				1581
				1582	def assertSelectMultiple(self, *tests):
				1583	for selector, expected_ids in tests:
				1584	self.assertSelect(selector, expected_ids)
				1585
				1586	def test_one_tag_one(self):
				1587	els = self.soup.select('title')
				1588	self.assertEqual(len(els), 1)
				1589	self.assertEqual(els[0].name, 'title')
				1590	self.assertEqual(els[0].contents, [u'The title'])
				1591
				1592	def test_one_tag_many(self):
				1593	els = self.soup.select('div')
				1594	self.assertEqual(len(els), 3)
				1595	for div in els:
				1596	self.assertEqual(div.name, 'div')
				1597
				1598	def test_tag_in_tag_one(self):
				1599	els = self.soup.select('div div')
				1600	self.assertSelects('div div', ['inner'])
				1601
				1602	def test_tag_in_tag_many(self):
				1603	for selector in ('html div', 'html body div', 'body div'):
				1604	self.assertSelects(selector, ['main', 'inner', 'footer'])
				1605
				1606	def test_tag_no_match(self):
				1607	self.assertEqual(len(self.soup.select('del')), 0)
				1608
				1609	def test_invalid_tag(self):
				1610	self.assertRaises(ValueError, self.soup.select, 'tag%t')
				1611
				1612	def test_header_tags(self):
				1613	self.assertSelectMultiple(
				1614	('h1', ['header1']),
				1615	('h2', ['header2', 'header3']),
				1616	)
				1617
				1618	def test_class_one(self):
				1619	for selector in ('.onep', 'p.onep', 'html p.onep'):
				1620	els = self.soup.select(selector)
				1621	self.assertEqual(len(els), 1)
				1622	self.assertEqual(els[0].name, 'p')
				1623	self.assertEqual(els[0]['class'], ['onep'])
				1624
				1625	def test_class_mismatched_tag(self):
				1626	els = self.soup.select('div.onep')
				1627	self.assertEqual(len(els), 0)
				1628
				1629	def test_one_id(self):
				1630	for selector in ('div#inner', '#inner', 'div div#inner'):
				1631	self.assertSelects(selector, ['inner'])
				1632
				1633	def test_bad_id(self):
				1634	els = self.soup.select('#doesnotexist')
				1635	self.assertEqual(len(els), 0)
				1636
				1637	def test_items_in_id(self):
				1638	els = self.soup.select('div#inner p')
				1639	self.assertEqual(len(els), 3)
				1640	for el in els:
				1641	self.assertEqual(el.name, 'p')
				1642	self.assertEqual(els[1]['class'], ['onep'])
				1643	self.assertFalse(els[0].has_attr('class'))
				1644
				1645	def test_a_bunch_of_emptys(self):
				1646	for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
				1647	self.assertEqual(len(self.soup.select(selector)), 0)
				1648
				1649	def test_multi_class_support(self):
				1650	for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
				1651	'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
				1652	self.assertSelects(selector, ['pmulti'])
				1653
				1654	def test_multi_class_selection(self):
				1655	for selector in ('.class1.class3', '.class3.class2',
				1656	'.class1.class2.class3'):
				1657	self.assertSelects(selector, ['pmulti'])
				1658
				1659	def test_child_selector(self):
				1660	self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
				1661	self.assertSelects('.s1 > a span', ['s1a2s1'])
				1662
				1663	def test_child_selector_id(self):
				1664	self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1'])
				1665
				1666	def test_attribute_equals(self):
				1667	self.assertSelectMultiple(
				1668	('p[class="onep"]', ['p1']),
				1669	('p[id="p1"]', ['p1']),
				1670	('[class="onep"]', ['p1']),
				1671	('[id="p1"]', ['p1']),
				1672	('link[rel="stylesheet"]', ['l1']),
				1673	('link[type="text/css"]', ['l1']),
				1674	('link[href="blah.css"]', ['l1']),
				1675	('link[href="no-blah.css"]', []),
				1676	('[rel="stylesheet"]', ['l1']),
				1677	('[type="text/css"]', ['l1']),
				1678	('[href="blah.css"]', ['l1']),
				1679	('[href="no-blah.css"]', []),
				1680	('p[href="no-blah.css"]', []),
				1681	('[href="no-blah.css"]', []),
				1682	)
				1683
				1684	def test_attribute_tilde(self):
				1685	self.assertSelectMultiple(
				1686	('p[class~="class1"]', ['pmulti']),
				1687	('p[class~="class2"]', ['pmulti']),
				1688	('p[class~="class3"]', ['pmulti']),
				1689	('[class~="class1"]', ['pmulti']),
				1690	('[class~="class2"]', ['pmulti']),
				1691	('[class~="class3"]', ['pmulti']),
				1692	('a[rel~="friend"]', ['bob']),
				1693	('a[rel~="met"]', ['bob']),
				1694	('[rel~="friend"]', ['bob']),
				1695	('[rel~="met"]', ['bob']),
				1696	)
				1697
				1698	def test_attribute_startswith(self):
				1699	self.assertSelectMultiple(
				1700	('[rel^="style"]', ['l1']),
				1701	('link[rel^="style"]', ['l1']),
				1702	('notlink[rel^="notstyle"]', []),
				1703	('[rel^="notstyle"]', []),
				1704	('link[rel^="notstyle"]', []),
				1705	('link[href^="bla"]', ['l1']),
				1706	('a[href^="http://"]', ['bob', 'me']),
				1707	('[href^="http://"]', ['bob', 'me']),
				1708	('[id^="p"]', ['pmulti', 'p1']),
				1709	('[id^="m"]', ['me', 'main']),
				1710	('div[id^="m"]', ['main']),
				1711	('a[id^="m"]', ['me']),
				1712	)
				1713
				1714	def test_attribute_endswith(self):
				1715	self.assertSelectMultiple(
				1716	('[href$=".css"]', ['l1']),
				1717	('link[href$=".css"]', ['l1']),
				1718	('link[id$="1"]', ['l1']),
				1719	('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
				1720	('div[id$="1"]', []),
				1721	('[id$="noending"]', []),
				1722	)
				1723
				1724	def test_attribute_contains(self):
				1725	self.assertSelectMultiple(
				1726	# From test_attribute_startswith
				1727	('[rel*="style"]', ['l1']),
				1728	('link[rel*="style"]', ['l1']),
				1729	('notlink[rel*="notstyle"]', []),
				1730	('[rel*="notstyle"]', []),
				1731	('link[rel*="notstyle"]', []),
				1732	('link[href*="bla"]', ['l1']),
				1733	('a[href*="http://"]', ['bob', 'me']),
				1734	('[href*="http://"]', ['bob', 'me']),
				1735	('[id*="p"]', ['pmulti', 'p1']),
				1736	('div[id*="m"]', ['main']),
				1737	('a[id*="m"]', ['me']),
				1738	# From test_attribute_endswith
				1739	('[href*=".css"]', ['l1']),
				1740	('link[href*=".css"]', ['l1']),
				1741	('link[id*="1"]', ['l1']),
				1742	('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
				1743	('div[id*="1"]', []),
				1744	('[id*="noending"]', []),
				1745	# New for this test
				1746	('[href*="."]', ['bob', 'me', 'l1']),
				1747	('a[href*="."]', ['bob', 'me']),
				1748	('link[href*="."]', ['l1']),
				1749	('div[id*="n"]', ['main', 'inner']),
				1750	('div[id*="nn"]', ['inner']),
				1751	)
				1752
				1753	def test_attribute_exact_or_hypen(self):
				1754	self.assertSelectMultiple(
				1755	('p[lang\|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
				1756	('[lang\|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
				1757	('p[lang\|="fr"]', ['lang-fr']),
				1758	('p[lang\|="gb"]', []),
				1759	)
				1760
				1761	def test_attribute_exists(self):
				1762	self.assertSelectMultiple(
				1763	('[rel]', ['l1', 'bob', 'me']),
				1764	('link[rel]', ['l1']),
				1765	('a[rel]', ['bob', 'me']),
				1766	('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
				1767	('p[class]', ['p1', 'pmulti']),
				1768	('[blah]', []),
				1769	('p[blah]', []),
				1770	)
				1771
				1772	def test_nth_of_type(self):
				1773	# Try to select first paragraph
				1774	els = self.soup.select('div#inner p:nth-of-type(1)')
				1775	self.assertEqual(len(els), 1)
				1776	self.assertEqual(els[0].string, u'Some text')
				1777
				1778	# Try to select third paragraph
				1779	els = self.soup.select('div#inner p:nth-of-type(3)')
				1780	self.assertEqual(len(els), 1)
				1781	self.assertEqual(els[0].string, u'Another')
				1782
				1783	# Try to select (non-existent!) fourth paragraph
				1784	els = self.soup.select('div#inner p:nth-of-type(4)')
				1785	self.assertEqual(len(els), 0)
				1786
				1787	# Pass in an invalid value.
				1788	self.assertRaises(
				1789	ValueError, self.soup.select, 'div p:nth-of-type(0)')
				1790
				1791	def test_nth_of_type_direct_descendant(self):
				1792	els = self.soup.select('div#inner > p:nth-of-type(1)')
				1793	self.assertEqual(len(els), 1)
				1794	self.assertEqual(els[0].string, u'Some text')
				1795
				1796	def test_id_child_selector_nth_of_type(self):
				1797	self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
				1798
				1799	def test_select_on_element(self):
				1800	# Other tests operate on the tree; this operates on an element
				1801	# within the tree.
				1802	inner = self.soup.find("div", id="main")
				1803	selected = inner.select("div")
				1804	# The <div id="inner"> tag was selected. The <div id="footer">
				1805	# tag was not.
				1806	self.assertSelectsIDs(selected, ['inner'])
				1807
				1808	def test_overspecified_child_id(self):
				1809	self.assertSelects(".fancy #inner", ['inner'])
				1810	self.assertSelects(".normal #inner", [])
				1811
				1812	def test_adjacent_sibling_selector(self):
				1813	self.assertSelects('#p1 + h2', ['header2'])
				1814	self.assertSelects('#p1 + h2 + p', ['pmulti'])
				1815	self.assertSelects('#p1 + #header2 + .class1', ['pmulti'])
				1816	self.assertEqual([], self.soup.select('#p1 + p'))
				1817
				1818	def test_general_sibling_selector(self):
				1819	self.assertSelects('#p1 ~ h2', ['header2', 'header3'])
				1820	self.assertSelects('#p1 ~ #header2', ['header2'])
				1821	self.assertSelects('#p1 ~ h2 + a', ['me'])
				1822	self.assertSelects('#p1 ~ h2 + [rel="me"]', ['me'])
				1823	self.assertEqual([], self.soup.select('#inner ~ h2'))
				1824
				1825	def test_dangling_combinator(self):
				1826	self.assertRaises(ValueError, self.soup.select, 'h1 >')
				1827
				1828	def test_sibling_combinator_wont_select_same_tag_twice(self):
				1829	self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])