diff --git a/libs/lxml/ElementInclude.py b/libs/lxml/ElementInclude.py deleted file mode 100644 index 8badf8b44..000000000 --- a/libs/lxml/ElementInclude.py +++ /dev/null @@ -1,219 +0,0 @@ -# -# ElementTree -# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ -# -# limited xinclude support for element trees -# -# history: -# 2003-08-15 fl created -# 2003-11-14 fl fixed default loader -# -# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -""" -Limited XInclude support for the ElementTree package. - -While lxml.etree has full support for XInclude (see -`etree.ElementTree.xinclude()`), this module provides a simpler, pure -Python, ElementTree compatible implementation that supports a simple -form of custom URL resolvers. -""" - -from lxml import etree -try: - from urlparse import urljoin - from urllib2 import urlopen -except ImportError: - # Python 3 - from urllib.parse import urljoin - from urllib.request import urlopen - -XINCLUDE = "{http://www.w3.org/2001/XInclude}" - -XINCLUDE_INCLUDE = XINCLUDE + "include" -XINCLUDE_FALLBACK = XINCLUDE + "fallback" -XINCLUDE_ITER_TAG = XINCLUDE + "*" - -## -# Fatal include error. - -class FatalIncludeError(etree.LxmlSyntaxError): - pass - -## -# ET compatible default loader. -# This loader reads an included resource from disk. -# -# @param href Resource reference. -# @param parse Parse mode. Either "xml" or "text". -# @param encoding Optional text encoding. -# @return The expanded resource. If the parse mode is "xml", this -# is an ElementTree instance. If the parse mode is "text", this -# is a Unicode string. If the loader fails, it can return None -# or raise an IOError exception. -# @throws IOError If the loader fails to load the resource. - -def default_loader(href, parse, encoding=None): - file = open(href, 'rb') - if parse == "xml": - data = etree.parse(file).getroot() - else: - data = file.read() - if not encoding: - encoding = 'utf-8' - data = data.decode(encoding) - file.close() - return data - -## -# Default loader used by lxml.etree - handles custom resolvers properly -# - -def _lxml_default_loader(href, parse, encoding=None, parser=None): - if parse == "xml": - data = etree.parse(href, parser).getroot() - else: - if "://" in href: - f = urlopen(href) - else: - f = open(href, 'rb') - data = f.read() - f.close() - if not encoding: - encoding = 'utf-8' - data = data.decode(encoding) - return data - -## -# Wrapper for ET compatibility - drops the parser - -def _wrap_et_loader(loader): - def load(href, parse, encoding=None, parser=None): - return loader(href, parse, encoding) - return load - - -## -# Expand XInclude directives. -# -# @param elem Root element. -# @param loader Optional resource loader. If omitted, it defaults -# to {@link default_loader}. If given, it should be a callable -# that implements the same interface as default_loader. -# @param base_url The base URL of the original file, to resolve -# relative include file references. -# @throws FatalIncludeError If the function fails to include a given -# resource, or if the tree contains malformed XInclude elements. -# @throws IOError If the function fails to load a given resource. -# @returns the node or its replacement if it was an XInclude node - -def include(elem, loader=None, base_url=None): - if base_url is None: - if hasattr(elem, 'getroot'): - tree = elem - elem = elem.getroot() - else: - tree = elem.getroottree() - if hasattr(tree, 'docinfo'): - base_url = tree.docinfo.URL - elif hasattr(elem, 'getroot'): - elem = elem.getroot() - _include(elem, loader, base_url=base_url) - -def _include(elem, loader=None, _parent_hrefs=None, base_url=None): - if loader is not None: - load_include = _wrap_et_loader(loader) - else: - load_include = _lxml_default_loader - - if _parent_hrefs is None: - _parent_hrefs = set() - - parser = elem.getroottree().parser - - include_elements = list( - elem.iter(XINCLUDE_ITER_TAG)) - - for e in include_elements: - if e.tag == XINCLUDE_INCLUDE: - # process xinclude directive - href = urljoin(base_url, e.get("href")) - parse = e.get("parse", "xml") - parent = e.getparent() - if parse == "xml": - if href in _parent_hrefs: - raise FatalIncludeError( - "recursive include of %r detected" % href - ) - _parent_hrefs.add(href) - node = load_include(href, parse, parser=parser) - if node is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - node = _include(node, loader, _parent_hrefs) - if e.tail: - node.tail = (node.tail or "") + e.tail - if parent is None: - return node # replaced the root node! - parent.replace(e, node) - elif parse == "text": - text = load_include(href, parse, encoding=e.get("encoding")) - if text is None: - raise FatalIncludeError( - "cannot load %r as %r" % (href, parse) - ) - predecessor = e.getprevious() - if predecessor is not None: - predecessor.tail = (predecessor.tail or "") + text - elif parent is None: - return text # replaced the root node! - else: - parent.text = (parent.text or "") + text + (e.tail or "") - parent.remove(e) - else: - raise FatalIncludeError( - "unknown parse type in xi:include tag (%r)" % parse - ) - elif e.tag == XINCLUDE_FALLBACK: - parent = e.getparent() - if parent is not None and parent.tag != XINCLUDE_INCLUDE: - raise FatalIncludeError( - "xi:fallback tag must be child of xi:include (%r)" % e.tag - ) - else: - raise FatalIncludeError( - "Invalid element found in XInclude namespace (%r)" % e.tag - ) - return elem diff --git a/libs/lxml/__init__.py b/libs/lxml/__init__.py deleted file mode 100644 index 07cbe3a26..000000000 --- a/libs/lxml/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# this is a package - -def get_include(): - """ - Returns a list of header include paths (for lxml itself, libxml2 - and libxslt) needed to compile C code against lxml if it was built - with statically linked libraries. - """ - import os - lxml_path = __path__[0] - include_path = os.path.join(lxml_path, 'includes') - includes = [include_path, lxml_path] - - for name in os.listdir(include_path): - path = os.path.join(include_path, name) - if os.path.isdir(path): - includes.append(path) - - return includes - diff --git a/libs/lxml/_elementpath.py b/libs/lxml/_elementpath.py deleted file mode 100644 index 5462df6cb..000000000 --- a/libs/lxml/_elementpath.py +++ /dev/null @@ -1,337 +0,0 @@ -# cython: language_level=2 - -# -# ElementTree -# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ -# -# limited xpath support for element trees -# -# history: -# 2003-05-23 fl created -# 2003-05-28 fl added support for // etc -# 2003-08-27 fl fixed parsing of periods in element names -# 2007-09-10 fl new selection engine -# 2007-09-12 fl fixed parent selector -# 2007-09-13 fl added iterfind; changed findall to return a list -# 2007-11-30 fl added namespaces support -# 2009-10-30 fl added child element value filter -# -# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2009 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -## -# Implementation module for XPath support. There's usually no reason -# to import this module directly; the ElementTree does this for -# you, if needed. -## - -from __future__ import absolute_import - -import re - -xpath_tokenizer_re = re.compile( - "(" - "'[^']*'|\"[^\"]*\"|" - "::|" - "//?|" - r"\.\.|" - r"\(\)|" - r"[/.*:\[\]\(\)@=])|" - r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" - r"\s+" - ) - -def xpath_tokenizer(pattern, namespaces=None): - default_namespace = namespaces.get(None) if namespaces else None - for token in xpath_tokenizer_re.findall(pattern): - tag = token[1] - if tag and tag[0] != "{": - if ":" in tag: - prefix, uri = tag.split(":", 1) - try: - if not namespaces: - raise KeyError - yield token[0], "{%s}%s" % (namespaces[prefix], uri) - except KeyError: - raise SyntaxError("prefix %r not found in prefix map" % prefix) - elif default_namespace: - yield token[0], "{%s}%s" % (default_namespace, tag) - else: - yield token - else: - yield token - - -def prepare_child(next, token): - tag = token[1] - def select(result): - for elem in result: - for e in elem.iterchildren(tag): - yield e - return select - -def prepare_star(next, token): - def select(result): - for elem in result: - for e in elem.iterchildren('*'): - yield e - return select - -def prepare_self(next, token): - def select(result): - return result - return select - -def prepare_descendant(next, token): - token = next() - if token[0] == "*": - tag = "*" - elif not token[0]: - tag = token[1] - else: - raise SyntaxError("invalid descendant") - def select(result): - for elem in result: - for e in elem.iterdescendants(tag): - yield e - return select - -def prepare_parent(next, token): - def select(result): - for elem in result: - parent = elem.getparent() - if parent is not None: - yield parent - return select - -def prepare_predicate(next, token): - # FIXME: replace with real parser!!! refs: - # http://effbot.org/zone/simple-iterator-parser.htm - # http://javascript.crockford.com/tdop/tdop.html - signature = '' - predicate = [] - while 1: - token = next() - if token[0] == "]": - break - if token == ('', ''): - # ignore whitespace - continue - if token[0] and token[0][:1] in "'\"": - token = "'", token[0][1:-1] - signature += token[0] or "-" - predicate.append(token[1]) - - # use signature to determine predicate type - if signature == "@-": - # [@attribute] predicate - key = predicate[1] - def select(result): - for elem in result: - if elem.get(key) is not None: - yield elem - return select - if signature == "@-='": - # [@attribute='value'] - key = predicate[1] - value = predicate[-1] - def select(result): - for elem in result: - if elem.get(key) == value: - yield elem - return select - if signature == "-" and not re.match(r"-?\d+$", predicate[0]): - # [tag] - tag = predicate[0] - def select(result): - for elem in result: - for _ in elem.iterchildren(tag): - yield elem - break - return select - if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])): - # [.='value'] or [tag='value'] - tag = predicate[0] - value = predicate[-1] - if tag: - def select(result): - for elem in result: - for e in elem.iterchildren(tag): - if "".join(e.itertext()) == value: - yield elem - break - else: - def select(result): - for elem in result: - if "".join(elem.itertext()) == value: - yield elem - return select - if signature == "-" or signature == "-()" or signature == "-()-": - # [index] or [last()] or [last()-index] - if signature == "-": - # [index] - index = int(predicate[0]) - 1 - if index < 0: - if index == -1: - raise SyntaxError( - "indices in path predicates are 1-based, not 0-based") - else: - raise SyntaxError("path index >= 1 expected") - else: - if predicate[0] != "last": - raise SyntaxError("unsupported function") - if signature == "-()-": - try: - index = int(predicate[2]) - 1 - except ValueError: - raise SyntaxError("unsupported expression") - else: - index = -1 - def select(result): - for elem in result: - parent = elem.getparent() - if parent is None: - continue - try: - # FIXME: what if the selector is "*" ? - elems = list(parent.iterchildren(elem.tag)) - if elems[index] is elem: - yield elem - except IndexError: - pass - return select - raise SyntaxError("invalid predicate") - -ops = { - "": prepare_child, - "*": prepare_star, - ".": prepare_self, - "..": prepare_parent, - "//": prepare_descendant, - "[": prepare_predicate, -} - - -# -------------------------------------------------------------------- - -_cache = {} - - -def _build_path_iterator(path, namespaces): - """compile selector pattern""" - if path[-1:] == "/": - path += "*" # implicit all (FIXME: keep this?) - - cache_key = (path,) - if namespaces: - if '' in namespaces: - raise ValueError("empty namespace prefix must be passed as None, not the empty string") - if None in namespaces: - cache_key += (namespaces[None],) + tuple(sorted( - item for item in namespaces.items() if item[0] is not None)) - else: - cache_key += tuple(sorted(namespaces.items())) - - try: - return _cache[cache_key] - except KeyError: - pass - if len(_cache) > 100: - _cache.clear() - - if path[:1] == "/": - raise SyntaxError("cannot use absolute path on element") - stream = iter(xpath_tokenizer(path, namespaces)) - try: - _next = stream.next - except AttributeError: - # Python 3 - _next = stream.__next__ - try: - token = _next() - except StopIteration: - raise SyntaxError("empty path expression") - selector = [] - while 1: - try: - selector.append(ops[token[0]](_next, token)) - except StopIteration: - raise SyntaxError("invalid path") - try: - token = _next() - if token[0] == "/": - token = _next() - except StopIteration: - break - _cache[cache_key] = selector - return selector - - -## -# Iterate over the matching nodes - -def iterfind(elem, path, namespaces=None): - selector = _build_path_iterator(path, namespaces) - result = iter((elem,)) - for select in selector: - result = select(result) - return result - - -## -# Find first matching object. - -def find(elem, path, namespaces=None): - it = iterfind(elem, path, namespaces) - try: - return next(it) - except StopIteration: - return None - - -## -# Find all matching objects. - -def findall(elem, path, namespaces=None): - return list(iterfind(elem, path, namespaces)) - - -## -# Find text for first matching object. - -def findtext(elem, path, default=None, namespaces=None): - el = find(elem, path, namespaces) - if el is None: - return default - else: - return el.text or '' diff --git a/libs/lxml/_elementpath.pyd b/libs/lxml/_elementpath.pyd deleted file mode 100644 index fdd9e6b25..000000000 Binary files a/libs/lxml/_elementpath.pyd and /dev/null differ diff --git a/libs/lxml/builder.py b/libs/lxml/builder.py deleted file mode 100644 index a28884567..000000000 --- a/libs/lxml/builder.py +++ /dev/null @@ -1,239 +0,0 @@ -# cython: language_level=2 - -# -# Element generator factory by Fredrik Lundh. -# -# Source: -# http://online.effbot.org/2006_11_01_archive.htm#et-builder -# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2004 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -""" -The ``E`` Element factory for generating XML documents. -""" - -from __future__ import absolute_import - -import lxml.etree as ET - -from functools import partial - -try: - basestring -except NameError: - basestring = str - -try: - unicode -except NameError: - unicode = str - - -class ElementMaker(object): - """Element generator factory. - - Unlike the ordinary Element factory, the E factory allows you to pass in - more than just a tag and some optional attributes; you can also pass in - text and other elements. The text is added as either text or tail - attributes, and elements are inserted at the right spot. Some small - examples:: - - >>> from lxml import etree as ET - >>> from lxml.builder import E - - >>> ET.tostring(E("tag")) - '' - >>> ET.tostring(E("tag", "text")) - 'text' - >>> ET.tostring(E("tag", "text", key="value")) - 'text' - >>> ET.tostring(E("tag", E("subtag", "text"), "tail")) - 'texttail' - - For simple tags, the factory also allows you to write ``E.tag(...)`` instead - of ``E('tag', ...)``:: - - >>> ET.tostring(E.tag()) - '' - >>> ET.tostring(E.tag("text")) - 'text' - >>> ET.tostring(E.tag(E.subtag("text"), "tail")) - 'texttail' - - Here's a somewhat larger example; this shows how to generate HTML - documents, using a mix of prepared factory functions for inline elements, - nested ``E.tag`` calls, and embedded XHTML fragments:: - - # some common inline elements - A = E.a - I = E.i - B = E.b - - def CLASS(v): - # helper function, 'class' is a reserved word - return {'class': v} - - page = ( - E.html( - E.head( - E.title("This is a sample document") - ), - E.body( - E.h1("Hello!", CLASS("title")), - E.p("This is a paragraph with ", B("bold"), " text in it!"), - E.p("This is another paragraph, with a ", - A("link", href="http://www.python.org"), "."), - E.p("Here are some reserved characters: ."), - ET.XML("

And finally, here is an embedded XHTML fragment.

"), - ) - ) - ) - - print ET.tostring(page) - - Here's a prettyprinted version of the output from the above script:: - - - - This is a sample document - - -

Hello!

-

This is a paragraph with bold text in it!

-

This is another paragraph, with link.

-

Here are some reserved characters: <spam&egg>.

-

And finally, here is an embedded XHTML fragment.

- - - - For namespace support, you can pass a namespace map (``nsmap``) - and/or a specific target ``namespace`` to the ElementMaker class:: - - >>> E = ElementMaker(namespace="http://my.ns/") - >>> print(ET.tostring( E.test )) - - - >>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'}) - >>> print(ET.tostring( E.test )) - - """ - - def __init__(self, typemap=None, - namespace=None, nsmap=None, makeelement=None): - if namespace is not None: - self._namespace = '{' + namespace + '}' - else: - self._namespace = None - - if nsmap: - self._nsmap = dict(nsmap) - else: - self._nsmap = None - - if makeelement is not None: - assert callable(makeelement) - self._makeelement = makeelement - else: - self._makeelement = ET.Element - - # initialize type map for this element factory - - if typemap: - typemap = dict(typemap) - else: - typemap = {} - - def add_text(elem, item): - try: - elem[-1].tail = (elem[-1].tail or "") + item - except IndexError: - elem.text = (elem.text or "") + item - - def add_cdata(elem, cdata): - if elem.text: - raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text) - elem.text = cdata - - if str not in typemap: - typemap[str] = add_text - if unicode not in typemap: - typemap[unicode] = add_text - if ET.CDATA not in typemap: - typemap[ET.CDATA] = add_cdata - - def add_dict(elem, item): - attrib = elem.attrib - for k, v in item.items(): - if isinstance(v, basestring): - attrib[k] = v - else: - attrib[k] = typemap[type(v)](None, v) - if dict not in typemap: - typemap[dict] = add_dict - - self._typemap = typemap - - def __call__(self, tag, *children, **attrib): - typemap = self._typemap - - if self._namespace is not None and tag[0] != '{': - tag = self._namespace + tag - elem = self._makeelement(tag, nsmap=self._nsmap) - if attrib: - typemap[dict](elem, attrib) - - for item in children: - if callable(item): - item = item() - t = typemap.get(type(item)) - if t is None: - if ET.iselement(item): - elem.append(item) - continue - for basetype in type(item).__mro__: - # See if the typemap knows of any of this type's bases. - t = typemap.get(basetype) - if t is not None: - break - else: - raise TypeError("bad argument type: %s(%r)" % - (type(item).__name__, item)) - v = t(elem, item) - if v: - typemap.get(type(v))(elem, v) - - return elem - - def __getattr__(self, tag): - return partial(self, tag) - - -# create factory object -E = ElementMaker() diff --git a/libs/lxml/builder.pyd b/libs/lxml/builder.pyd deleted file mode 100644 index b20e66481..000000000 Binary files a/libs/lxml/builder.pyd and /dev/null differ diff --git a/libs/lxml/cssselect.py b/libs/lxml/cssselect.py deleted file mode 100644 index 586a1427c..000000000 --- a/libs/lxml/cssselect.py +++ /dev/null @@ -1,102 +0,0 @@ -"""CSS Selectors based on XPath. - -This module supports selecting XML/HTML tags based on CSS selectors. -See the `CSSSelector` class for details. - -This is a thin wrapper around cssselect 0.7 or later. -""" - -from __future__ import absolute_import - -from . import etree -try: - import cssselect as external_cssselect -except ImportError: - raise ImportError( - 'cssselect does not seem to be installed. ' - 'See http://packages.python.org/cssselect/') - - -SelectorSyntaxError = external_cssselect.SelectorSyntaxError -ExpressionError = external_cssselect.ExpressionError -SelectorError = external_cssselect.SelectorError - - -__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError', - 'CSSSelector'] - - -class LxmlTranslator(external_cssselect.GenericTranslator): - """ - A custom CSS selector to XPath translator with lxml-specific extensions. - """ - def xpath_contains_function(self, xpath, function): - # Defined there, removed in later drafts: - # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors - if function.argument_types() not in (['STRING'], ['IDENT']): - raise ExpressionError( - "Expected a single string or ident for :contains(), got %r" - % function.arguments) - value = function.arguments[0].value - return xpath.add_condition( - 'contains(__lxml_internal_css:lower-case(string(.)), %s)' - % self.xpath_literal(value.lower())) - - -class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator): - """ - lxml extensions + HTML support. - """ - - -def _make_lower_case(context, s): - return s.lower() - -ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') -ns.prefix = '__lxml_internal_css' -ns['lower-case'] = _make_lower_case - - -class CSSSelector(etree.XPath): - """A CSS selector. - - Usage:: - - >>> from lxml import etree, cssselect - >>> select = cssselect.CSSSelector("a tag > child") - - >>> root = etree.XML("TEXT") - >>> [ el.tag for el in select(root) ] - ['child'] - - To use CSS namespaces, you need to pass a prefix-to-namespace - mapping as ``namespaces`` keyword argument:: - - >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' - >>> select_ns = cssselect.CSSSelector('root > rdf|Description', - ... namespaces={'rdf': rdfns}) - - >>> rdf = etree.XML(( - ... '' - ... 'blah' - ... '') % rdfns) - >>> [(el.tag, el.text) for el in select_ns(rdf)] - [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')] - - """ - def __init__(self, css, namespaces=None, translator='xml'): - if translator == 'xml': - translator = LxmlTranslator() - elif translator == 'html': - translator = LxmlHTMLTranslator() - elif translator == 'xhtml': - translator = LxmlHTMLTranslator(xhtml=True) - path = translator.css_to_xpath(css) - etree.XPath.__init__(self, path, namespaces=namespaces) - self.css = css - - def __repr__(self): - return '<%s %s for %r>' % ( - self.__class__.__name__, - hex(abs(id(self)))[2:], - self.css) diff --git a/libs/lxml/doctestcompare.py b/libs/lxml/doctestcompare.py deleted file mode 100644 index 1b0daa49a..000000000 --- a/libs/lxml/doctestcompare.py +++ /dev/null @@ -1,507 +0,0 @@ -""" -lxml-based doctest output comparison. - -Note: normally, you should just import the `lxml.usedoctest` and -`lxml.html.usedoctest` modules from within a doctest, instead of this -one:: - - >>> import lxml.usedoctest # for XML output - - >>> import lxml.html.usedoctest # for HTML output - -To use this module directly, you must call ``lxmldoctest.install()``, -which will cause doctest to use this in all subsequent calls. - -This changes the way output is checked and comparisons are made for -XML or HTML-like content. - -XML or HTML content is noticed because the example starts with ``<`` -(it's HTML if it starts with ```` or include an ``any`` -attribute in the tag. An ``any`` tag matches any tag, while the -attribute matches any and all attributes. - -When a match fails, the reformatted example and gotten text is -displayed (indented), and a rough diff-like output is given. Anything -marked with ``+`` is in the output but wasn't supposed to be, and -similarly ``-`` means its in the example but wasn't in the output. - -You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` -""" - -from lxml import etree -import sys -import re -import doctest -try: - from html import escape as html_escape -except ImportError: - from cgi import escape as html_escape - -__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', - 'LHTMLOutputChecker', 'install', 'temp_install'] - -try: - _basestring = basestring -except NameError: - _basestring = (str, bytes) - -_IS_PYTHON_3 = sys.version_info[0] >= 3 - -PARSE_HTML = doctest.register_optionflag('PARSE_HTML') -PARSE_XML = doctest.register_optionflag('PARSE_XML') -NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') - -OutputChecker = doctest.OutputChecker - -def strip(v): - if v is None: - return None - else: - return v.strip() - -def norm_whitespace(v): - return _norm_whitespace_re.sub(' ', v) - -_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) - -def html_fromstring(html): - return etree.fromstring(html, _html_parser) - -# We use this to distinguish repr()s from elements: -_repr_re = re.compile(r'^<[^>]+ (at|object) ') -_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') - -class LXMLOutputChecker(OutputChecker): - - empty_tags = ( - 'param', 'img', 'area', 'br', 'basefont', 'input', - 'base', 'meta', 'link', 'col') - - def get_default_parser(self): - return etree.XML - - def check_output(self, want, got, optionflags): - alt_self = getattr(self, '_temp_override_self', None) - if alt_self is not None: - super_method = self._temp_call_super_check_output - self = alt_self - else: - super_method = OutputChecker.check_output - parser = self.get_parser(want, got, optionflags) - if not parser: - return super_method( - self, want, got, optionflags) - try: - want_doc = parser(want) - except etree.XMLSyntaxError: - return False - try: - got_doc = parser(got) - except etree.XMLSyntaxError: - return False - return self.compare_docs(want_doc, got_doc) - - def get_parser(self, want, got, optionflags): - parser = None - if NOPARSE_MARKUP & optionflags: - return None - if PARSE_HTML & optionflags: - parser = html_fromstring - elif PARSE_XML & optionflags: - parser = etree.XML - elif (want.strip().lower().startswith('' % el.tag - return '<%s %s>' % (el.tag, ' '.join(attrs)) - - def format_end_tag(self, el): - if isinstance(el, etree.CommentBase): - # FIXME: probably PIs should be handled specially too? - return '-->' - return '' % el.tag - - def collect_diff(self, want, got, html, indent): - parts = [] - if not len(want) and not len(got): - parts.append(' '*indent) - parts.append(self.collect_diff_tag(want, got)) - if not self.html_empty_tag(got, html): - parts.append(self.collect_diff_text(want.text, got.text)) - parts.append(self.collect_diff_end_tag(want, got)) - parts.append(self.collect_diff_text(want.tail, got.tail)) - parts.append('\n') - return ''.join(parts) - parts.append(' '*indent) - parts.append(self.collect_diff_tag(want, got)) - parts.append('\n') - if strip(want.text) or strip(got.text): - parts.append(' '*indent) - parts.append(self.collect_diff_text(want.text, got.text)) - parts.append('\n') - want_children = list(want) - got_children = list(got) - while want_children or got_children: - if not want_children: - parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) - continue - if not got_children: - parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) - continue - parts.append(self.collect_diff( - want_children.pop(0), got_children.pop(0), html, indent+2)) - parts.append(' '*indent) - parts.append(self.collect_diff_end_tag(want, got)) - parts.append('\n') - if strip(want.tail) or strip(got.tail): - parts.append(' '*indent) - parts.append(self.collect_diff_text(want.tail, got.tail)) - parts.append('\n') - return ''.join(parts) - - def collect_diff_tag(self, want, got): - if not self.tag_compare(want.tag, got.tag): - tag = '%s (got: %s)' % (want.tag, got.tag) - else: - tag = got.tag - attrs = [] - any = want.tag == 'any' or 'any' in want.attrib - for name, value in sorted(got.attrib.items()): - if name not in want.attrib and not any: - attrs.append('+%s="%s"' % (name, self.format_text(value, False))) - else: - if name in want.attrib: - text = self.collect_diff_text(want.attrib[name], value, False) - else: - text = self.format_text(value, False) - attrs.append('%s="%s"' % (name, text)) - if not any: - for name, value in sorted(want.attrib.items()): - if name in got.attrib: - continue - attrs.append('-%s="%s"' % (name, self.format_text(value, False))) - if attrs: - tag = '<%s %s>' % (tag, ' '.join(attrs)) - else: - tag = '<%s>' % tag - return tag - - def collect_diff_end_tag(self, want, got): - if want.tag != got.tag: - tag = '%s (got: %s)' % (want.tag, got.tag) - else: - tag = got.tag - return '' % tag - - def collect_diff_text(self, want, got, strip=True): - if self.text_compare(want, got, strip): - if not got: - return '' - return self.format_text(got, strip) - text = '%s (got: %s)' % (want, got) - return self.format_text(text, strip) - -class LHTMLOutputChecker(LXMLOutputChecker): - def get_default_parser(self): - return html_fromstring - -def install(html=False): - """ - Install doctestcompare for all future doctests. - - If html is true, then by default the HTML parser will be used; - otherwise the XML parser is used. - """ - if html: - doctest.OutputChecker = LHTMLOutputChecker - else: - doctest.OutputChecker = LXMLOutputChecker - -def temp_install(html=False, del_module=None): - """ - Use this *inside* a doctest to enable this checker for this - doctest only. - - If html is true, then by default the HTML parser will be used; - otherwise the XML parser is used. - """ - if html: - Checker = LHTMLOutputChecker - else: - Checker = LXMLOutputChecker - frame = _find_doctest_frame() - dt_self = frame.f_locals['self'] - checker = Checker() - old_checker = dt_self._checker - dt_self._checker = checker - # The unfortunate thing is that there is a local variable 'check' - # in the function that runs the doctests, that is a bound method - # into the output checker. We have to update that. We can't - # modify the frame, so we have to modify the object in place. The - # only way to do this is to actually change the func_code - # attribute of the method. We change it, and then wait for - # __record_outcome to be run, which signals the end of the __run - # method, at which point we restore the previous check_output - # implementation. - if _IS_PYTHON_3: - check_func = frame.f_locals['check'].__func__ - checker_check_func = checker.check_output.__func__ - else: - check_func = frame.f_locals['check'].im_func - checker_check_func = checker.check_output.im_func - # Because we can't patch up func_globals, this is the only global - # in check_output that we care about: - doctest.etree = etree - _RestoreChecker(dt_self, old_checker, checker, - check_func, checker_check_func, - del_module) - -class _RestoreChecker(object): - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, - del_module): - self.dt_self = dt_self - self.checker = old_checker - self.checker._temp_call_super_check_output = self.call_super - self.checker._temp_override_self = new_checker - self.check_func = check_func - self.clone_func = clone_func - self.del_module = del_module - self.install_clone() - self.install_dt_self() - def install_clone(self): - if _IS_PYTHON_3: - self.func_code = self.check_func.__code__ - self.func_globals = self.check_func.__globals__ - self.check_func.__code__ = self.clone_func.__code__ - else: - self.func_code = self.check_func.func_code - self.func_globals = self.check_func.func_globals - self.check_func.func_code = self.clone_func.func_code - def uninstall_clone(self): - if _IS_PYTHON_3: - self.check_func.__code__ = self.func_code - else: - self.check_func.func_code = self.func_code - def install_dt_self(self): - self.prev_func = self.dt_self._DocTestRunner__record_outcome - self.dt_self._DocTestRunner__record_outcome = self - def uninstall_dt_self(self): - self.dt_self._DocTestRunner__record_outcome = self.prev_func - def uninstall_module(self): - if self.del_module: - import sys - del sys.modules[self.del_module] - if '.' in self.del_module: - package, module = self.del_module.rsplit('.', 1) - package_mod = sys.modules[package] - delattr(package_mod, module) - def __call__(self, *args, **kw): - self.uninstall_clone() - self.uninstall_dt_self() - del self.checker._temp_override_self - del self.checker._temp_call_super_check_output - result = self.prev_func(*args, **kw) - self.uninstall_module() - return result - def call_super(self, *args, **kw): - self.uninstall_clone() - try: - return self.check_func(*args, **kw) - finally: - self.install_clone() - -def _find_doctest_frame(): - import sys - frame = sys._getframe(1) - while frame: - l = frame.f_locals - if 'BOOM' in l: - # Sign of doctest - return frame - frame = frame.f_back - raise LookupError( - "Could not find doctest (only use this function *inside* a doctest)") - -__test__ = { - 'basic': ''' - >>> temp_install() - >>> print """stuff""" - ... - >>> print """""" - - - - >>> print """blahblahblah""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS - ...foo /> - '''} - -if __name__ == '__main__': - import doctest - doctest.testmod() - - diff --git a/libs/lxml/etree.h b/libs/lxml/etree.h deleted file mode 100644 index dcf739840..000000000 --- a/libs/lxml/etree.h +++ /dev/null @@ -1,223 +0,0 @@ -/* Generated by Cython 0.29.2 */ - -#ifndef __PYX_HAVE__lxml__etree -#define __PYX_HAVE__lxml__etree - -struct LxmlDocument; -struct LxmlElement; -struct LxmlElementTree; -struct LxmlElementTagMatcher; -struct LxmlElementIterator; -struct LxmlElementBase; -struct LxmlElementClassLookup; -struct LxmlFallbackElementClassLookup; - -/* "lxml/etree.pyx":318 - * - * # type of a function that steps from node to node - * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<< - * - * - */ -typedef xmlNode *(*_node_to_node_function)(xmlNode *); - -/* "lxml/etree.pyx":334 - * @cython.final - * @cython.freelist(8) - * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<< - * u"""Internal base class to reference a libxml document. - * - */ -struct LxmlDocument { - PyObject_HEAD - struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab; - int _ns_counter; - PyObject *_prefix_tail; - xmlDoc *_c_doc; - struct __pyx_obj_4lxml_5etree__BaseParser *_parser; -}; - -/* "lxml/etree.pyx":683 - * - * @cython.no_gc_clear - * cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<< - * u"""Element class. - * - */ -struct LxmlElement { - PyObject_HEAD - struct LxmlDocument *_doc; - xmlNode *_c_node; - PyObject *_tag; -}; - -/* "lxml/etree.pyx":1847 - * - * - * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<< - * object LxmlElementTree ]: - * cdef _Document _doc - */ -struct LxmlElementTree { - PyObject_HEAD - struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab; - struct LxmlDocument *_doc; - struct LxmlElement *_context_node; -}; - -/* "lxml/etree.pyx":2574 - * - * - * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<< - * type LxmlElementTagMatcherType ]: - * """ - */ -struct LxmlElementTagMatcher { - PyObject_HEAD - struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab; - PyObject *_pystrings; - int _node_type; - char *_href; - char *_name; -}; - -/* "lxml/etree.pyx":2605 - * self._name = NULL - * - * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<< - * object LxmlElementIterator, type LxmlElementIteratorType ]: - * """ - */ -struct LxmlElementIterator { - struct LxmlElementTagMatcher __pyx_base; - struct LxmlElement *_node; - _node_to_node_function _next_element; -}; - -/* "src/lxml/classlookup.pxi":6 - * # Custom Element classes - * - * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<< - * object LxmlElementBase ]: - * u"""ElementBase(*children, attrib=None, nsmap=None, **_extra) - */ -struct LxmlElementBase { - struct LxmlElement __pyx_base; -}; - -/* "src/lxml/classlookup.pxi":210 - * # Element class lookup - * - * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<< - * - * # class to store element class lookup functions - */ -typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *); - -/* "src/lxml/classlookup.pxi":213 - * - * # class to store element class lookup functions - * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<< - * object LxmlElementClassLookup ]: - * u"""ElementClassLookup(self) - */ -struct LxmlElementClassLookup { - PyObject_HEAD - _element_class_lookup_function _lookup_function; -}; - -/* "src/lxml/classlookup.pxi":221 - * - * - * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<< - * [ type LxmlFallbackElementClassLookupType, - * object LxmlFallbackElementClassLookup ]: - */ -struct LxmlFallbackElementClassLookup { - struct LxmlElementClassLookup __pyx_base; - struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab; - struct LxmlElementClassLookup *fallback; - _element_class_lookup_function _fallback_function; -}; - -#ifndef __PYX_HAVE_API__lxml__etree - -#ifndef __PYX_EXTERN_C - #ifdef __cplusplus - #define __PYX_EXTERN_C extern "C" - #else - #define __PYX_EXTERN_C extern - #endif -#endif - -#ifndef DL_IMPORT - #define DL_IMPORT(_T) _T -#endif - -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType; -__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType; - -__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *); -__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *); -__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *); -__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int); -__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *); -__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); -__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); -__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *); -__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *); -__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *); -__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *); -__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *); -__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *); -__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *); -__PYX_EXTERN_C int hasText(xmlNode *); -__PYX_EXTERN_C int hasTail(xmlNode *); -__PYX_EXTERN_C PyObject *textOf(xmlNode *); -__PYX_EXTERN_C PyObject *tailOf(xmlNode *); -__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *); -__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *); -__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *); -__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *); -__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *); -__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int); -__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int); -__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *); -__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *); -__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *); -__PYX_EXTERN_C int hasChild(xmlNode *); -__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t); -__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t); -__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t); -__PYX_EXTERN_C xmlNode *nextElement(xmlNode *); -__PYX_EXTERN_C xmlNode *previousElement(xmlNode *); -__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *); -__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *); -__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *); -__PYX_EXTERN_C PyObject *utf8(PyObject *); -__PYX_EXTERN_C PyObject *getNsTag(PyObject *); -__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *); -__PYX_EXTERN_C PyObject *namespacedName(xmlNode *); -__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *); -__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *); -__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *); -__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *); - -#endif /* !__PYX_HAVE_API__lxml__etree */ - -/* WARNING: the interface of the module init function changed in CPython 3.5. */ -/* It now returns a PyModuleDef instance instead of a PyModule instance. */ - -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initetree(void); -#else -PyMODINIT_FUNC PyInit_etree(void); -#endif - -#endif /* !__PYX_HAVE__lxml__etree */ diff --git a/libs/lxml/etree.pyd b/libs/lxml/etree.pyd deleted file mode 100644 index a03f974e6..000000000 Binary files a/libs/lxml/etree.pyd and /dev/null differ diff --git a/libs/lxml/etree_api.h b/libs/lxml/etree_api.h deleted file mode 100644 index 912f48c36..000000000 --- a/libs/lxml/etree_api.h +++ /dev/null @@ -1,219 +0,0 @@ -/* Generated by Cython 0.29.2 */ - -#ifndef __PYX_HAVE_API__lxml__etree -#define __PYX_HAVE_API__lxml__etree -#ifdef __MINGW64__ -#define MS_WIN64 -#endif -#include "Python.h" -#include "etree.h" - -static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0; -#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument -static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0; -#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory -static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0; -#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree -static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0; -#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument -static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0; -#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory -static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; -#define makeElement __pyx_api_f_4lxml_5etree_makeElement -static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; -#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement -static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0; -#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction -static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0; -#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass -static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0; -#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass -static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0; -#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback -static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0; -#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches -static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0; -#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise -static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0; -#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise -static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0; -#define hasText __pyx_api_f_4lxml_5etree_hasText -static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0; -#define hasTail __pyx_api_f_4lxml_5etree_hasTail -static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0; -#define textOf __pyx_api_f_4lxml_5etree_textOf -static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0; -#define tailOf __pyx_api_f_4lxml_5etree_tailOf -static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0; -#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText -static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0; -#define setTailText __pyx_api_f_4lxml_5etree_setTailText -static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0; -#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue -static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; -#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName -static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; -#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue -static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0; -#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes -static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0; -#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes -static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; -#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue -static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0; -#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute -static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; -#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName -static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0; -#define hasChild __pyx_api_f_4lxml_5etree_hasChild -static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0; -#define findChild __pyx_api_f_4lxml_5etree_findChild -static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0; -#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards -static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0; -#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards -static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0; -#define nextElement __pyx_api_f_4lxml_5etree_nextElement -static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0; -#define previousElement __pyx_api_f_4lxml_5etree_previousElement -static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0; -#define appendChild __pyx_api_f_4lxml_5etree_appendChild -static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0; -#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement -static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0; -#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode -static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0; -#define utf8 __pyx_api_f_4lxml_5etree_utf8 -static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0; -#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag -static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0; -#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs -static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0; -#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName -static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0; -#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName -static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0; -#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext -static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0; -#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch -static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0; -#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix -#if !defined(__Pyx_PyIdentifier_FromString) -#if PY_MAJOR_VERSION < 3 - #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s) -#else - #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s) -#endif -#endif - -#ifndef __PYX_HAVE_RT_ImportFunction -#define __PYX_HAVE_RT_ImportFunction -static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { - PyObject *d = 0; - PyObject *cobj = 0; - union { - void (*fp)(void); - void *p; - } tmp; - d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); - if (!d) - goto bad; - cobj = PyDict_GetItemString(d, funcname); - if (!cobj) { - PyErr_Format(PyExc_ImportError, - "%.200s does not export expected C function %.200s", - PyModule_GetName(module), funcname); - goto bad; - } -#if PY_VERSION_HEX >= 0x02070000 - if (!PyCapsule_IsValid(cobj, sig)) { - PyErr_Format(PyExc_TypeError, - "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", - PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); - goto bad; - } - tmp.p = PyCapsule_GetPointer(cobj, sig); -#else - {const char *desc, *s1, *s2; - desc = (const char *)PyCObject_GetDesc(cobj); - if (!desc) - goto bad; - s1 = desc; s2 = sig; - while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } - if (*s1 != *s2) { - PyErr_Format(PyExc_TypeError, - "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", - PyModule_GetName(module), funcname, sig, desc); - goto bad; - } - tmp.p = PyCObject_AsVoidPtr(cobj);} -#endif - *f = tmp.fp; - if (!(*f)) - goto bad; - Py_DECREF(d); - return 0; -bad: - Py_XDECREF(d); - return -1; -} -#endif - - -static int import_lxml__etree(void) { - PyObject *module = 0; - module = PyImport_ImportModule("lxml.etree"); - if (!module) goto bad; - if (__Pyx_ImportFunction(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad; - if (__Pyx_ImportFunction(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; - Py_DECREF(module); module = 0; - return 0; - bad: - Py_XDECREF(module); - return -1; -} - -#endif /* !__PYX_HAVE_API__lxml__etree */ diff --git a/libs/lxml/html/ElementSoup.py b/libs/lxml/html/ElementSoup.py deleted file mode 100644 index 8e4fde13c..000000000 --- a/libs/lxml/html/ElementSoup.py +++ /dev/null @@ -1,10 +0,0 @@ -__doc__ = """Legacy interface to the BeautifulSoup HTML parser. -""" - -__all__ = ["parse", "convert_tree"] - -from soupparser import convert_tree, parse as _parse - -def parse(file, beautifulsoup=None, makeelement=None): - root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement) - return root.getroot() diff --git a/libs/lxml/html/__init__.py b/libs/lxml/html/__init__.py deleted file mode 100644 index 5751f7097..000000000 --- a/libs/lxml/html/__init__.py +++ /dev/null @@ -1,1926 +0,0 @@ -# Copyright (c) 2004 Ian Bicking. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# 3. Neither the name of Ian Bicking nor the names of its contributors may -# be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IAN BICKING OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -"""The ``lxml.html`` tool set for HTML handling. -""" - -from __future__ import absolute_import - -__all__ = [ - 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring', - 'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form', - 'find_rel_links', 'find_class', 'make_links_absolute', - 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser', 'parse'] - - -import copy -import sys -import re -from functools import partial - -try: - from collections.abc import MutableMapping, MutableSet -except ImportError: - from collections import MutableMapping, MutableSet - -from .. import etree -from . import defs -from ._setmixin import SetMixin - -try: - from urlparse import urljoin -except ImportError: - # Python 3 - from urllib.parse import urljoin - -try: - unicode -except NameError: - # Python 3 - unicode = str -try: - basestring -except NameError: - # Python 3 - basestring = (str, bytes) - - -def __fix_docstring(s): - if not s: - return s - if sys.version_info[0] >= 3: - sub = re.compile(r"^(\s*)u'", re.M).sub - else: - sub = re.compile(r"^(\s*)b'", re.M).sub - return sub(r"\1'", s) - - -XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" - -_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]|descendant-or-self::x:a[@rel]", - namespaces={'x':XHTML_NAMESPACE}) -_options_xpath = etree.XPath("descendant-or-self::option|descendant-or-self::x:option", - namespaces={'x':XHTML_NAMESPACE}) -_forms_xpath = etree.XPath("descendant-or-self::form|descendant-or-self::x:form", - namespaces={'x':XHTML_NAMESPACE}) -#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'}) -_class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]") -_id_xpath = etree.XPath("descendant-or-self::*[@id=$id]") -_collect_string_content = etree.XPath("string()") -_iter_css_urls = re.compile(r'url\(('+'["][^"]*["]|'+"['][^']*[']|"+r'[^)]*)\)', re.I).finditer -_iter_css_imports = re.compile(r'@import "(.*?)"').finditer -_label_xpath = etree.XPath("//label[@for=$id]|//x:label[@for=$id]", - namespaces={'x':XHTML_NAMESPACE}) -_archive_re = re.compile(r'[^ ]+') -_parse_meta_refresh_url = re.compile( - r'[^;=]*;\s*(?:url\s*=\s*)?(?P.*)$', re.I).search - - -def _unquote_match(s, pos): - if s[:1] == '"' and s[-1:] == '"' or s[:1] == "'" and s[-1:] == "'": - return s[1:-1], pos+1 - else: - return s,pos - - -def _transform_result(typ, result): - """Convert the result back into the input type. - """ - if issubclass(typ, bytes): - return tostring(result, encoding='utf-8') - elif issubclass(typ, unicode): - return tostring(result, encoding='unicode') - else: - return result - - -def _nons(tag): - if isinstance(tag, basestring): - if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE: - return tag.split('}')[-1] - return tag - - -class Classes(MutableSet): - """Provides access to an element's class attribute as a set-like collection. - Usage:: - - >>> el = fromstring('') - >>> classes = el.classes # or: classes = Classes(el.attrib) - >>> classes |= ['block', 'paragraph'] - >>> el.get('class') - 'hidden large block paragraph' - >>> classes.toggle('hidden') - False - >>> el.get('class') - 'large block paragraph' - >>> classes -= ('some', 'classes', 'block') - >>> el.get('class') - 'large paragraph' - """ - def __init__(self, attributes): - self._attributes = attributes - self._get_class_value = partial(attributes.get, 'class', '') - - def add(self, value): - """ - Add a class. - - This has no effect if the class is already present. - """ - if not value or re.search(r'\s', value): - raise ValueError("Invalid class name: %r" % value) - classes = self._get_class_value().split() - if value in classes: - return - classes.append(value) - self._attributes['class'] = ' '.join(classes) - - def discard(self, value): - """ - Remove a class if it is currently present. - - If the class is not present, do nothing. - """ - if not value or re.search(r'\s', value): - raise ValueError("Invalid class name: %r" % value) - classes = [name for name in self._get_class_value().split() - if name != value] - if classes: - self._attributes['class'] = ' '.join(classes) - elif 'class' in self._attributes: - del self._attributes['class'] - - def remove(self, value): - """ - Remove a class; it must currently be present. - - If the class is not present, raise a KeyError. - """ - if not value or re.search(r'\s', value): - raise ValueError("Invalid class name: %r" % value) - super(Classes, self).remove(value) - - def __contains__(self, name): - classes = self._get_class_value() - return name in classes and name in classes.split() - - def __iter__(self): - return iter(self._get_class_value().split()) - - def __len__(self): - return len(self._get_class_value().split()) - - # non-standard methods - - def update(self, values): - """ - Add all names from 'values'. - """ - classes = self._get_class_value().split() - extended = False - for value in values: - if value not in classes: - classes.append(value) - extended = True - if extended: - self._attributes['class'] = ' '.join(classes) - - def toggle(self, value): - """ - Add a class name if it isn't there yet, or remove it if it exists. - - Returns true if the class was added (and is now enabled) and - false if it was removed (and is now disabled). - """ - if not value or re.search(r'\s', value): - raise ValueError("Invalid class name: %r" % value) - classes = self._get_class_value().split() - try: - classes.remove(value) - enabled = False - except ValueError: - classes.append(value) - enabled = True - if classes: - self._attributes['class'] = ' '.join(classes) - else: - del self._attributes['class'] - return enabled - - -class HtmlMixin(object): - - def set(self, key, value=None): - """set(self, key, value=None) - - Sets an element attribute. If no value is provided, or if the value is None, - creates a 'boolean' attribute without value, e.g. "
" - for ``form.set('novalidate')``. - """ - super(HtmlElement, self).set(key, value) - - @property - def classes(self): - """ - A set-like wrapper around the 'class' attribute. - """ - return Classes(self.attrib) - - @classes.setter - def classes(self, classes): - assert isinstance(classes, Classes) # only allow "el.classes |= ..." etc. - value = classes._get_class_value() - if value: - self.set('class', value) - elif self.get('class') is not None: - del self.attrib['class'] - - @property - def base_url(self): - """ - Returns the base URL, given when the page was parsed. - - Use with ``urlparse.urljoin(el.base_url, href)`` to get - absolute URLs. - """ - return self.getroottree().docinfo.URL - - @property - def forms(self): - """ - Return a list of all the forms - """ - return _forms_xpath(self) - - @property - def body(self): - """ - Return the element. Can be called from a child element - to get the document's head. - """ - return self.xpath('//body|//x:body', namespaces={'x':XHTML_NAMESPACE})[0] - - @property - def head(self): - """ - Returns the element. Can be called from a child - element to get the document's head. - """ - return self.xpath('//head|//x:head', namespaces={'x':XHTML_NAMESPACE})[0] - - @property - def label(self): - """ - Get or set any