"""Tests to ensure that the html5lib tree builder generates good trees.""" import pytest import warnings from bs4 import BeautifulSoup from bs4.element import SoupStrainer from . import ( HTML5LIB_PRESENT, HTML5TreeBuilderSmokeTest, SoupTest, ) @pytest.mark.skipif( not HTML5LIB_PRESENT, reason="html5lib seems not to be present, not testing its tree builder." ) class TestHTML5LibBuilder(SoupTest, HTML5TreeBuilderSmokeTest): """See ``HTML5TreeBuilderSmokeTest``.""" @property def default_builder(self): from bs4.builder import HTML5TreeBuilder return HTML5TreeBuilder def test_soupstrainer(self): # The html5lib tree builder does not support SoupStrainers. strainer = SoupStrainer("b") markup = "
A bold statement.
" with warnings.catch_warnings(record=True) as w: soup = BeautifulSoup(markup, "html5lib", parse_only=strainer) assert soup.decode() == self.document_for(markup) [warning] = w assert warning.filename == __file__ assert "the html5lib tree builder doesn't support parse_only" in str(warning.message) def test_correctly_nested_tables(self): """html5lib inserts tags where other parsers don't.""" markup = ('Here's another table:"
'
| ')
self.assert_soup(
markup,
'
Here\'s another table:'
'
|
Foo |
Bar |
Baz |
foo
''' soup = self.soup(markup) # Verify that we can reach thetag; this means the tree is connected. assert b"
foo
" == soup.p.encode() def test_reparented_markup(self): markup = 'foo
\n' soup = self.soup(markup) assert "foo
\n" == soup.body.decode() assert 2 == len(soup.find_all('p')) def test_reparented_markup_ends_with_whitespace(self): markup = 'foo
\n\n' soup = self.soup(markup) assert "foo
\n\n" == soup.body.decode() assert 2 == len(soup.find_all('p')) def test_reparented_markup_containing_identical_whitespace_nodes(self): """Verify that we keep the two whitespace nodes in this document distinct when reparenting the adjacent