You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.1 KiB
36 lines
1.1 KiB
6 years ago
|
"""
|
||
|
HTML parsing library based on the `WHATWG HTML specification
|
||
|
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
||
|
existing HTML found in the wild and implements well-defined error recovery that
|
||
|
is largely compatible with modern desktop web browsers.
|
||
|
|
||
|
Example usage::
|
||
|
|
||
|
import html5lib
|
||
|
with open("my_document.html", "rb") as f:
|
||
|
tree = html5lib.parse(f)
|
||
|
|
||
|
For convenience, this module re-exports the following names:
|
||
|
|
||
|
* :func:`~.html5parser.parse`
|
||
|
* :func:`~.html5parser.parseFragment`
|
||
|
* :class:`~.html5parser.HTMLParser`
|
||
|
* :func:`~.treebuilders.getTreeBuilder`
|
||
|
* :func:`~.treewalkers.getTreeWalker`
|
||
|
* :func:`~.serializer.serialize`
|
||
|
"""
|
||
|
|
||
|
from __future__ import absolute_import, division, unicode_literals
|
||
|
|
||
|
from .html5parser import HTMLParser, parse, parseFragment
|
||
|
from .treebuilders import getTreeBuilder
|
||
|
from .treewalkers import getTreeWalker
|
||
|
from .serializer import serialize
|
||
|
|
||
|
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||
|
"getTreeWalker", "serialize"]
|
||
|
|
||
|
# this has to be at the top level, see how setup.py parses this
|
||
|
#: Distribution version number.
|
||
|
__version__ = "1.0.1"
|