diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py
index ea79bcb58..7d5a53b73 100644
--- a/bazarr/get_subtitle.py
+++ b/bazarr/get_subtitle.py
@@ -15,7 +15,7 @@ import subliminal
import subliminal_patch
from datetime import datetime, timedelta
from subzero.language import Language
-from subzero.video import parse_video
+from subzero.video import parse_video, refine_video
from subliminal import region, score as subliminal_scores, \
list_subtitles
from subliminal_patch.core import SZAsyncProviderPool, download_best_subtitles, save_subtitles, download_subtitles
@@ -63,6 +63,7 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type=
video.used_scene_name = dont_use_actual_file
video.original_name = original_name
video.original_path = original_path
+ refine_video(video)
return video
except:
diff --git a/libs/lxml/ElementInclude.py b/libs/lxml/ElementInclude.py
new file mode 100644
index 000000000..8badf8b44
--- /dev/null
+++ b/libs/lxml/ElementInclude.py
@@ -0,0 +1,219 @@
+#
+# ElementTree
+# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
+#
+# limited xinclude support for element trees
+#
+# history:
+# 2003-08-15 fl created
+# 2003-11-14 fl fixed default loader
+#
+# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+"""
+Limited XInclude support for the ElementTree package.
+
+While lxml.etree has full support for XInclude (see
+`etree.ElementTree.xinclude()`), this module provides a simpler, pure
+Python, ElementTree compatible implementation that supports a simple
+form of custom URL resolvers.
+"""
+
+from lxml import etree
+try:
+ from urlparse import urljoin
+ from urllib2 import urlopen
+except ImportError:
+ # Python 3
+ from urllib.parse import urljoin
+ from urllib.request import urlopen
+
+XINCLUDE = "{http://www.w3.org/2001/XInclude}"
+
+XINCLUDE_INCLUDE = XINCLUDE + "include"
+XINCLUDE_FALLBACK = XINCLUDE + "fallback"
+XINCLUDE_ITER_TAG = XINCLUDE + "*"
+
+##
+# Fatal include error.
+
+class FatalIncludeError(etree.LxmlSyntaxError):
+ pass
+
+##
+# ET compatible default loader.
+# This loader reads an included resource from disk.
+#
+# @param href Resource reference.
+# @param parse Parse mode. Either "xml" or "text".
+# @param encoding Optional text encoding.
+# @return The expanded resource. If the parse mode is "xml", this
+# is an ElementTree instance. If the parse mode is "text", this
+# is a Unicode string. If the loader fails, it can return None
+# or raise an IOError exception.
+# @throws IOError If the loader fails to load the resource.
+
+def default_loader(href, parse, encoding=None):
+ file = open(href, 'rb')
+ if parse == "xml":
+ data = etree.parse(file).getroot()
+ else:
+ data = file.read()
+ if not encoding:
+ encoding = 'utf-8'
+ data = data.decode(encoding)
+ file.close()
+ return data
+
+##
+# Default loader used by lxml.etree - handles custom resolvers properly
+#
+
+def _lxml_default_loader(href, parse, encoding=None, parser=None):
+ if parse == "xml":
+ data = etree.parse(href, parser).getroot()
+ else:
+ if "://" in href:
+ f = urlopen(href)
+ else:
+ f = open(href, 'rb')
+ data = f.read()
+ f.close()
+ if not encoding:
+ encoding = 'utf-8'
+ data = data.decode(encoding)
+ return data
+
+##
+# Wrapper for ET compatibility - drops the parser
+
+def _wrap_et_loader(loader):
+ def load(href, parse, encoding=None, parser=None):
+ return loader(href, parse, encoding)
+ return load
+
+
+##
+# Expand XInclude directives.
+#
+# @param elem Root element.
+# @param loader Optional resource loader. If omitted, it defaults
+# to {@link default_loader}. If given, it should be a callable
+# that implements the same interface as default_loader.
+# @param base_url The base URL of the original file, to resolve
+# relative include file references.
+# @throws FatalIncludeError If the function fails to include a given
+# resource, or if the tree contains malformed XInclude elements.
+# @throws IOError If the function fails to load a given resource.
+# @returns the node or its replacement if it was an XInclude node
+
+def include(elem, loader=None, base_url=None):
+ if base_url is None:
+ if hasattr(elem, 'getroot'):
+ tree = elem
+ elem = elem.getroot()
+ else:
+ tree = elem.getroottree()
+ if hasattr(tree, 'docinfo'):
+ base_url = tree.docinfo.URL
+ elif hasattr(elem, 'getroot'):
+ elem = elem.getroot()
+ _include(elem, loader, base_url=base_url)
+
+def _include(elem, loader=None, _parent_hrefs=None, base_url=None):
+ if loader is not None:
+ load_include = _wrap_et_loader(loader)
+ else:
+ load_include = _lxml_default_loader
+
+ if _parent_hrefs is None:
+ _parent_hrefs = set()
+
+ parser = elem.getroottree().parser
+
+ include_elements = list(
+ elem.iter(XINCLUDE_ITER_TAG))
+
+ for e in include_elements:
+ if e.tag == XINCLUDE_INCLUDE:
+ # process xinclude directive
+ href = urljoin(base_url, e.get("href"))
+ parse = e.get("parse", "xml")
+ parent = e.getparent()
+ if parse == "xml":
+ if href in _parent_hrefs:
+ raise FatalIncludeError(
+ "recursive include of %r detected" % href
+ )
+ _parent_hrefs.add(href)
+ node = load_include(href, parse, parser=parser)
+ if node is None:
+ raise FatalIncludeError(
+ "cannot load %r as %r" % (href, parse)
+ )
+ node = _include(node, loader, _parent_hrefs)
+ if e.tail:
+ node.tail = (node.tail or "") + e.tail
+ if parent is None:
+ return node # replaced the root node!
+ parent.replace(e, node)
+ elif parse == "text":
+ text = load_include(href, parse, encoding=e.get("encoding"))
+ if text is None:
+ raise FatalIncludeError(
+ "cannot load %r as %r" % (href, parse)
+ )
+ predecessor = e.getprevious()
+ if predecessor is not None:
+ predecessor.tail = (predecessor.tail or "") + text
+ elif parent is None:
+ return text # replaced the root node!
+ else:
+ parent.text = (parent.text or "") + text + (e.tail or "")
+ parent.remove(e)
+ else:
+ raise FatalIncludeError(
+ "unknown parse type in xi:include tag (%r)" % parse
+ )
+ elif e.tag == XINCLUDE_FALLBACK:
+ parent = e.getparent()
+ if parent is not None and parent.tag != XINCLUDE_INCLUDE:
+ raise FatalIncludeError(
+ "xi:fallback tag must be child of xi:include (%r)" % e.tag
+ )
+ else:
+ raise FatalIncludeError(
+ "Invalid element found in XInclude namespace (%r)" % e.tag
+ )
+ return elem
diff --git a/libs/lxml/__init__.py b/libs/lxml/__init__.py
new file mode 100644
index 000000000..07cbe3a26
--- /dev/null
+++ b/libs/lxml/__init__.py
@@ -0,0 +1,20 @@
+# this is a package
+
+def get_include():
+ """
+ Returns a list of header include paths (for lxml itself, libxml2
+ and libxslt) needed to compile C code against lxml if it was built
+ with statically linked libraries.
+ """
+ import os
+ lxml_path = __path__[0]
+ include_path = os.path.join(lxml_path, 'includes')
+ includes = [include_path, lxml_path]
+
+ for name in os.listdir(include_path):
+ path = os.path.join(include_path, name)
+ if os.path.isdir(path):
+ includes.append(path)
+
+ return includes
+
diff --git a/libs/lxml/_elementpath.py b/libs/lxml/_elementpath.py
new file mode 100644
index 000000000..5462df6cb
--- /dev/null
+++ b/libs/lxml/_elementpath.py
@@ -0,0 +1,337 @@
+# cython: language_level=2
+
+#
+# ElementTree
+# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
+#
+# limited xpath support for element trees
+#
+# history:
+# 2003-05-23 fl created
+# 2003-05-28 fl added support for // etc
+# 2003-08-27 fl fixed parsing of periods in element names
+# 2007-09-10 fl new selection engine
+# 2007-09-12 fl fixed parent selector
+# 2007-09-13 fl added iterfind; changed findall to return a list
+# 2007-11-30 fl added namespaces support
+# 2009-10-30 fl added child element value filter
+#
+# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2009 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+##
+# Implementation module for XPath support. There's usually no reason
+# to import this module directly; the ElementTree does this for
+# you, if needed.
+##
+
+from __future__ import absolute_import
+
+import re
+
+xpath_tokenizer_re = re.compile(
+ "("
+ "'[^']*'|\"[^\"]*\"|"
+ "::|"
+ "//?|"
+ r"\.\.|"
+ r"\(\)|"
+ r"[/.*:\[\]\(\)@=])|"
+ r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
+ r"\s+"
+ )
+
+def xpath_tokenizer(pattern, namespaces=None):
+ default_namespace = namespaces.get(None) if namespaces else None
+ for token in xpath_tokenizer_re.findall(pattern):
+ tag = token[1]
+ if tag and tag[0] != "{":
+ if ":" in tag:
+ prefix, uri = tag.split(":", 1)
+ try:
+ if not namespaces:
+ raise KeyError
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ except KeyError:
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
+ elif default_namespace:
+ yield token[0], "{%s}%s" % (default_namespace, tag)
+ else:
+ yield token
+ else:
+ yield token
+
+
+def prepare_child(next, token):
+ tag = token[1]
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren(tag):
+ yield e
+ return select
+
+def prepare_star(next, token):
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren('*'):
+ yield e
+ return select
+
+def prepare_self(next, token):
+ def select(result):
+ return result
+ return select
+
+def prepare_descendant(next, token):
+ token = next()
+ if token[0] == "*":
+ tag = "*"
+ elif not token[0]:
+ tag = token[1]
+ else:
+ raise SyntaxError("invalid descendant")
+ def select(result):
+ for elem in result:
+ for e in elem.iterdescendants(tag):
+ yield e
+ return select
+
+def prepare_parent(next, token):
+ def select(result):
+ for elem in result:
+ parent = elem.getparent()
+ if parent is not None:
+ yield parent
+ return select
+
+def prepare_predicate(next, token):
+ # FIXME: replace with real parser!!! refs:
+ # http://effbot.org/zone/simple-iterator-parser.htm
+ # http://javascript.crockford.com/tdop/tdop.html
+ signature = ''
+ predicate = []
+ while 1:
+ token = next()
+ if token[0] == "]":
+ break
+ if token == ('', ''):
+ # ignore whitespace
+ continue
+ if token[0] and token[0][:1] in "'\"":
+ token = "'", token[0][1:-1]
+ signature += token[0] or "-"
+ predicate.append(token[1])
+
+ # use signature to determine predicate type
+ if signature == "@-":
+ # [@attribute] predicate
+ key = predicate[1]
+ def select(result):
+ for elem in result:
+ if elem.get(key) is not None:
+ yield elem
+ return select
+ if signature == "@-='":
+ # [@attribute='value']
+ key = predicate[1]
+ value = predicate[-1]
+ def select(result):
+ for elem in result:
+ if elem.get(key) == value:
+ yield elem
+ return select
+ if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
+ # [tag]
+ tag = predicate[0]
+ def select(result):
+ for elem in result:
+ for _ in elem.iterchildren(tag):
+ yield elem
+ break
+ return select
+ if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
+ # [.='value'] or [tag='value']
+ tag = predicate[0]
+ value = predicate[-1]
+ if tag:
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren(tag):
+ if "".join(e.itertext()) == value:
+ yield elem
+ break
+ else:
+ def select(result):
+ for elem in result:
+ if "".join(elem.itertext()) == value:
+ yield elem
+ return select
+ if signature == "-" or signature == "-()" or signature == "-()-":
+ # [index] or [last()] or [last()-index]
+ if signature == "-":
+ # [index]
+ index = int(predicate[0]) - 1
+ if index < 0:
+ if index == -1:
+ raise SyntaxError(
+ "indices in path predicates are 1-based, not 0-based")
+ else:
+ raise SyntaxError("path index >= 1 expected")
+ else:
+ if predicate[0] != "last":
+ raise SyntaxError("unsupported function")
+ if signature == "-()-":
+ try:
+ index = int(predicate[2]) - 1
+ except ValueError:
+ raise SyntaxError("unsupported expression")
+ else:
+ index = -1
+ def select(result):
+ for elem in result:
+ parent = elem.getparent()
+ if parent is None:
+ continue
+ try:
+ # FIXME: what if the selector is "*" ?
+ elems = list(parent.iterchildren(elem.tag))
+ if elems[index] is elem:
+ yield elem
+ except IndexError:
+ pass
+ return select
+ raise SyntaxError("invalid predicate")
+
+ops = {
+ "": prepare_child,
+ "*": prepare_star,
+ ".": prepare_self,
+ "..": prepare_parent,
+ "//": prepare_descendant,
+ "[": prepare_predicate,
+}
+
+
+# --------------------------------------------------------------------
+
+_cache = {}
+
+
+def _build_path_iterator(path, namespaces):
+ """compile selector pattern"""
+ if path[-1:] == "/":
+ path += "*" # implicit all (FIXME: keep this?)
+
+ cache_key = (path,)
+ if namespaces:
+ if '' in namespaces:
+ raise ValueError("empty namespace prefix must be passed as None, not the empty string")
+ if None in namespaces:
+ cache_key += (namespaces[None],) + tuple(sorted(
+ item for item in namespaces.items() if item[0] is not None))
+ else:
+ cache_key += tuple(sorted(namespaces.items()))
+
+ try:
+ return _cache[cache_key]
+ except KeyError:
+ pass
+ if len(_cache) > 100:
+ _cache.clear()
+
+ if path[:1] == "/":
+ raise SyntaxError("cannot use absolute path on element")
+ stream = iter(xpath_tokenizer(path, namespaces))
+ try:
+ _next = stream.next
+ except AttributeError:
+ # Python 3
+ _next = stream.__next__
+ try:
+ token = _next()
+ except StopIteration:
+ raise SyntaxError("empty path expression")
+ selector = []
+ while 1:
+ try:
+ selector.append(ops[token[0]](_next, token))
+ except StopIteration:
+ raise SyntaxError("invalid path")
+ try:
+ token = _next()
+ if token[0] == "/":
+ token = _next()
+ except StopIteration:
+ break
+ _cache[cache_key] = selector
+ return selector
+
+
+##
+# Iterate over the matching nodes
+
+def iterfind(elem, path, namespaces=None):
+ selector = _build_path_iterator(path, namespaces)
+ result = iter((elem,))
+ for select in selector:
+ result = select(result)
+ return result
+
+
+##
+# Find first matching object.
+
+def find(elem, path, namespaces=None):
+ it = iterfind(elem, path, namespaces)
+ try:
+ return next(it)
+ except StopIteration:
+ return None
+
+
+##
+# Find all matching objects.
+
+def findall(elem, path, namespaces=None):
+ return list(iterfind(elem, path, namespaces))
+
+
+##
+# Find text for first matching object.
+
+def findtext(elem, path, default=None, namespaces=None):
+ el = find(elem, path, namespaces)
+ if el is None:
+ return default
+ else:
+ return el.text or ''
diff --git a/libs/lxml/_elementpath.pyd b/libs/lxml/_elementpath.pyd
new file mode 100644
index 000000000..fdd9e6b25
Binary files /dev/null and b/libs/lxml/_elementpath.pyd differ
diff --git a/libs/lxml/builder.py b/libs/lxml/builder.py
new file mode 100644
index 000000000..a28884567
--- /dev/null
+++ b/libs/lxml/builder.py
@@ -0,0 +1,239 @@
+# cython: language_level=2
+
+#
+# Element generator factory by Fredrik Lundh.
+#
+# Source:
+# http://online.effbot.org/2006_11_01_archive.htm#et-builder
+# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
+from __future__ import absolute_import
+
+import lxml.etree as ET
+
+from functools import partial
+
+try:
+ basestring
+except NameError:
+ basestring = str
+
+try:
+ unicode
+except NameError:
+ unicode = str
+
+
+class ElementMaker(object):
+ """Element generator factory.
+
+ Unlike the ordinary Element factory, the E factory allows you to pass in
+ more than just a tag and some optional attributes; you can also pass in
+ text and other elements. The text is added as either text or tail
+ attributes, and elements are inserted at the right spot. Some small
+ examples::
+
+ >>> from lxml import etree as ET
+ >>> from lxml.builder import E
+
+ >>> ET.tostring(E("tag"))
+ ''
+ >>> ET.tostring(E("tag", "text"))
+ 'text'
+ >>> ET.tostring(E("tag", "text", key="value"))
+ 'text'
+ >>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
+ 'texttail'
+
+ For simple tags, the factory also allows you to write ``E.tag(...)`` instead
+ of ``E('tag', ...)``::
+
+ >>> ET.tostring(E.tag())
+ ''
+ >>> ET.tostring(E.tag("text"))
+ 'text'
+ >>> ET.tostring(E.tag(E.subtag("text"), "tail"))
+ 'texttail'
+
+ Here's a somewhat larger example; this shows how to generate HTML
+ documents, using a mix of prepared factory functions for inline elements,
+ nested ``E.tag`` calls, and embedded XHTML fragments::
+
+ # some common inline elements
+ A = E.a
+ I = E.i
+ B = E.b
+
+ def CLASS(v):
+ # helper function, 'class' is a reserved word
+ return {'class': v}
+
+ page = (
+ E.html(
+ E.head(
+ E.title("This is a sample document")
+ ),
+ E.body(
+ E.h1("Hello!", CLASS("title")),
+ E.p("This is a paragraph with ", B("bold"), " text in it!"),
+ E.p("This is another paragraph, with a ",
+ A("link", href="http://www.python.org"), "."),
+ E.p("Here are some reserved characters: ."),
+ ET.XML("
And finally, here is an embedded XHTML fragment.
"),
+ )
+ )
+ )
+
+ print ET.tostring(page)
+
+ Here's a prettyprinted version of the output from the above script::
+
+
+
+ This is a sample document
+
+
+
+
+
+
+ For namespace support, you can pass a namespace map (``nsmap``)
+ and/or a specific target ``namespace`` to the ElementMaker class::
+
+ >>> E = ElementMaker(namespace="http://my.ns/")
+ >>> print(ET.tostring( E.test ))
+
+
+ >>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'})
+ >>> print(ET.tostring( E.test ))
+
+ """
+
+ def __init__(self, typemap=None,
+ namespace=None, nsmap=None, makeelement=None):
+ if namespace is not None:
+ self._namespace = '{' + namespace + '}'
+ else:
+ self._namespace = None
+
+ if nsmap:
+ self._nsmap = dict(nsmap)
+ else:
+ self._nsmap = None
+
+ if makeelement is not None:
+ assert callable(makeelement)
+ self._makeelement = makeelement
+ else:
+ self._makeelement = ET.Element
+
+ # initialize type map for this element factory
+
+ if typemap:
+ typemap = dict(typemap)
+ else:
+ typemap = {}
+
+ def add_text(elem, item):
+ try:
+ elem[-1].tail = (elem[-1].tail or "") + item
+ except IndexError:
+ elem.text = (elem.text or "") + item
+
+ def add_cdata(elem, cdata):
+ if elem.text:
+ raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
+ elem.text = cdata
+
+ if str not in typemap:
+ typemap[str] = add_text
+ if unicode not in typemap:
+ typemap[unicode] = add_text
+ if ET.CDATA not in typemap:
+ typemap[ET.CDATA] = add_cdata
+
+ def add_dict(elem, item):
+ attrib = elem.attrib
+ for k, v in item.items():
+ if isinstance(v, basestring):
+ attrib[k] = v
+ else:
+ attrib[k] = typemap[type(v)](None, v)
+ if dict not in typemap:
+ typemap[dict] = add_dict
+
+ self._typemap = typemap
+
+ def __call__(self, tag, *children, **attrib):
+ typemap = self._typemap
+
+ if self._namespace is not None and tag[0] != '{':
+ tag = self._namespace + tag
+ elem = self._makeelement(tag, nsmap=self._nsmap)
+ if attrib:
+ typemap[dict](elem, attrib)
+
+ for item in children:
+ if callable(item):
+ item = item()
+ t = typemap.get(type(item))
+ if t is None:
+ if ET.iselement(item):
+ elem.append(item)
+ continue
+ for basetype in type(item).__mro__:
+ # See if the typemap knows of any of this type's bases.
+ t = typemap.get(basetype)
+ if t is not None:
+ break
+ else:
+ raise TypeError("bad argument type: %s(%r)" %
+ (type(item).__name__, item))
+ v = t(elem, item)
+ if v:
+ typemap.get(type(v))(elem, v)
+
+ return elem
+
+ def __getattr__(self, tag):
+ return partial(self, tag)
+
+
+# create factory object
+E = ElementMaker()
diff --git a/libs/lxml/builder.pyd b/libs/lxml/builder.pyd
new file mode 100644
index 000000000..b20e66481
Binary files /dev/null and b/libs/lxml/builder.pyd differ
diff --git a/libs/lxml/cssselect.py b/libs/lxml/cssselect.py
new file mode 100644
index 000000000..586a1427c
--- /dev/null
+++ b/libs/lxml/cssselect.py
@@ -0,0 +1,102 @@
+"""CSS Selectors based on XPath.
+
+This module supports selecting XML/HTML tags based on CSS selectors.
+See the `CSSSelector` class for details.
+
+This is a thin wrapper around cssselect 0.7 or later.
+"""
+
+from __future__ import absolute_import
+
+from . import etree
+try:
+ import cssselect as external_cssselect
+except ImportError:
+ raise ImportError(
+ 'cssselect does not seem to be installed. '
+ 'See http://packages.python.org/cssselect/')
+
+
+SelectorSyntaxError = external_cssselect.SelectorSyntaxError
+ExpressionError = external_cssselect.ExpressionError
+SelectorError = external_cssselect.SelectorError
+
+
+__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
+ 'CSSSelector']
+
+
+class LxmlTranslator(external_cssselect.GenericTranslator):
+ """
+ A custom CSS selector to XPath translator with lxml-specific extensions.
+ """
+ def xpath_contains_function(self, xpath, function):
+ # Defined there, removed in later drafts:
+ # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
+ if function.argument_types() not in (['STRING'], ['IDENT']):
+ raise ExpressionError(
+ "Expected a single string or ident for :contains(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ 'contains(__lxml_internal_css:lower-case(string(.)), %s)'
+ % self.xpath_literal(value.lower()))
+
+
+class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
+ """
+ lxml extensions + HTML support.
+ """
+
+
+def _make_lower_case(context, s):
+ return s.lower()
+
+ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
+ns.prefix = '__lxml_internal_css'
+ns['lower-case'] = _make_lower_case
+
+
+class CSSSelector(etree.XPath):
+ """A CSS selector.
+
+ Usage::
+
+ >>> from lxml import etree, cssselect
+ >>> select = cssselect.CSSSelector("a tag > child")
+
+ >>> root = etree.XML("TEXT")
+ >>> [ el.tag for el in select(root) ]
+ ['child']
+
+ To use CSS namespaces, you need to pass a prefix-to-namespace
+ mapping as ``namespaces`` keyword argument::
+
+ >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+ >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
+ ... namespaces={'rdf': rdfns})
+
+ >>> rdf = etree.XML((
+ ... ''
+ ... 'blah'
+ ... '') % rdfns)
+ >>> [(el.tag, el.text) for el in select_ns(rdf)]
+ [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
+
+ """
+ def __init__(self, css, namespaces=None, translator='xml'):
+ if translator == 'xml':
+ translator = LxmlTranslator()
+ elif translator == 'html':
+ translator = LxmlHTMLTranslator()
+ elif translator == 'xhtml':
+ translator = LxmlHTMLTranslator(xhtml=True)
+ path = translator.css_to_xpath(css)
+ etree.XPath.__init__(self, path, namespaces=namespaces)
+ self.css = css
+
+ def __repr__(self):
+ return '<%s %s for %r>' % (
+ self.__class__.__name__,
+ hex(abs(id(self)))[2:],
+ self.css)
diff --git a/libs/lxml/doctestcompare.py b/libs/lxml/doctestcompare.py
new file mode 100644
index 000000000..1b0daa49a
--- /dev/null
+++ b/libs/lxml/doctestcompare.py
@@ -0,0 +1,507 @@
+"""
+lxml-based doctest output comparison.
+
+Note: normally, you should just import the `lxml.usedoctest` and
+`lxml.html.usedoctest` modules from within a doctest, instead of this
+one::
+
+ >>> import lxml.usedoctest # for XML output
+
+ >>> import lxml.html.usedoctest # for HTML output
+
+To use this module directly, you must call ``lxmldoctest.install()``,
+which will cause doctest to use this in all subsequent calls.
+
+This changes the way output is checked and comparisons are made for
+XML or HTML-like content.
+
+XML or HTML content is noticed because the example starts with ``<``
+(it's HTML if it starts with ```` or include an ``any``
+attribute in the tag. An ``any`` tag matches any tag, while the
+attribute matches any and all attributes.
+
+When a match fails, the reformatted example and gotten text is
+displayed (indented), and a rough diff-like output is given. Anything
+marked with ``+`` is in the output but wasn't supposed to be, and
+similarly ``-`` means its in the example but wasn't in the output.
+
+You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
+"""
+
+from lxml import etree
+import sys
+import re
+import doctest
+try:
+ from html import escape as html_escape
+except ImportError:
+ from cgi import escape as html_escape
+
+__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
+ 'LHTMLOutputChecker', 'install', 'temp_install']
+
+try:
+ _basestring = basestring
+except NameError:
+ _basestring = (str, bytes)
+
+_IS_PYTHON_3 = sys.version_info[0] >= 3
+
+PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
+PARSE_XML = doctest.register_optionflag('PARSE_XML')
+NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
+
+OutputChecker = doctest.OutputChecker
+
+def strip(v):
+ if v is None:
+ return None
+ else:
+ return v.strip()
+
+def norm_whitespace(v):
+ return _norm_whitespace_re.sub(' ', v)
+
+_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
+
+def html_fromstring(html):
+ return etree.fromstring(html, _html_parser)
+
+# We use this to distinguish repr()s from elements:
+_repr_re = re.compile(r'^<[^>]+ (at|object) ')
+_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
+
+class LXMLOutputChecker(OutputChecker):
+
+ empty_tags = (
+ 'param', 'img', 'area', 'br', 'basefont', 'input',
+ 'base', 'meta', 'link', 'col')
+
+ def get_default_parser(self):
+ return etree.XML
+
+ def check_output(self, want, got, optionflags):
+ alt_self = getattr(self, '_temp_override_self', None)
+ if alt_self is not None:
+ super_method = self._temp_call_super_check_output
+ self = alt_self
+ else:
+ super_method = OutputChecker.check_output
+ parser = self.get_parser(want, got, optionflags)
+ if not parser:
+ return super_method(
+ self, want, got, optionflags)
+ try:
+ want_doc = parser(want)
+ except etree.XMLSyntaxError:
+ return False
+ try:
+ got_doc = parser(got)
+ except etree.XMLSyntaxError:
+ return False
+ return self.compare_docs(want_doc, got_doc)
+
+ def get_parser(self, want, got, optionflags):
+ parser = None
+ if NOPARSE_MARKUP & optionflags:
+ return None
+ if PARSE_HTML & optionflags:
+ parser = html_fromstring
+ elif PARSE_XML & optionflags:
+ parser = etree.XML
+ elif (want.strip().lower().startswith('' % el.tag
+ return '<%s %s>' % (el.tag, ' '.join(attrs))
+
+ def format_end_tag(self, el):
+ if isinstance(el, etree.CommentBase):
+ # FIXME: probably PIs should be handled specially too?
+ return '-->'
+ return '%s>' % el.tag
+
+ def collect_diff(self, want, got, html, indent):
+ parts = []
+ if not len(want) and not len(got):
+ parts.append(' '*indent)
+ parts.append(self.collect_diff_tag(want, got))
+ if not self.html_empty_tag(got, html):
+ parts.append(self.collect_diff_text(want.text, got.text))
+ parts.append(self.collect_diff_end_tag(want, got))
+ parts.append(self.collect_diff_text(want.tail, got.tail))
+ parts.append('\n')
+ return ''.join(parts)
+ parts.append(' '*indent)
+ parts.append(self.collect_diff_tag(want, got))
+ parts.append('\n')
+ if strip(want.text) or strip(got.text):
+ parts.append(' '*indent)
+ parts.append(self.collect_diff_text(want.text, got.text))
+ parts.append('\n')
+ want_children = list(want)
+ got_children = list(got)
+ while want_children or got_children:
+ if not want_children:
+ parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
+ continue
+ if not got_children:
+ parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
+ continue
+ parts.append(self.collect_diff(
+ want_children.pop(0), got_children.pop(0), html, indent+2))
+ parts.append(' '*indent)
+ parts.append(self.collect_diff_end_tag(want, got))
+ parts.append('\n')
+ if strip(want.tail) or strip(got.tail):
+ parts.append(' '*indent)
+ parts.append(self.collect_diff_text(want.tail, got.tail))
+ parts.append('\n')
+ return ''.join(parts)
+
+ def collect_diff_tag(self, want, got):
+ if not self.tag_compare(want.tag, got.tag):
+ tag = '%s (got: %s)' % (want.tag, got.tag)
+ else:
+ tag = got.tag
+ attrs = []
+ any = want.tag == 'any' or 'any' in want.attrib
+ for name, value in sorted(got.attrib.items()):
+ if name not in want.attrib and not any:
+ attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
+ else:
+ if name in want.attrib:
+ text = self.collect_diff_text(want.attrib[name], value, False)
+ else:
+ text = self.format_text(value, False)
+ attrs.append('%s="%s"' % (name, text))
+ if not any:
+ for name, value in sorted(want.attrib.items()):
+ if name in got.attrib:
+ continue
+ attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
+ if attrs:
+ tag = '<%s %s>' % (tag, ' '.join(attrs))
+ else:
+ tag = '<%s>' % tag
+ return tag
+
+ def collect_diff_end_tag(self, want, got):
+ if want.tag != got.tag:
+ tag = '%s (got: %s)' % (want.tag, got.tag)
+ else:
+ tag = got.tag
+ return '%s>' % tag
+
+ def collect_diff_text(self, want, got, strip=True):
+ if self.text_compare(want, got, strip):
+ if not got:
+ return ''
+ return self.format_text(got, strip)
+ text = '%s (got: %s)' % (want, got)
+ return self.format_text(text, strip)
+
+class LHTMLOutputChecker(LXMLOutputChecker):
+ def get_default_parser(self):
+ return html_fromstring
+
+def install(html=False):
+ """
+ Install doctestcompare for all future doctests.
+
+ If html is true, then by default the HTML parser will be used;
+ otherwise the XML parser is used.
+ """
+ if html:
+ doctest.OutputChecker = LHTMLOutputChecker
+ else:
+ doctest.OutputChecker = LXMLOutputChecker
+
+def temp_install(html=False, del_module=None):
+ """
+ Use this *inside* a doctest to enable this checker for this
+ doctest only.
+
+ If html is true, then by default the HTML parser will be used;
+ otherwise the XML parser is used.
+ """
+ if html:
+ Checker = LHTMLOutputChecker
+ else:
+ Checker = LXMLOutputChecker
+ frame = _find_doctest_frame()
+ dt_self = frame.f_locals['self']
+ checker = Checker()
+ old_checker = dt_self._checker
+ dt_self._checker = checker
+ # The unfortunate thing is that there is a local variable 'check'
+ # in the function that runs the doctests, that is a bound method
+ # into the output checker. We have to update that. We can't
+ # modify the frame, so we have to modify the object in place. The
+ # only way to do this is to actually change the func_code
+ # attribute of the method. We change it, and then wait for
+ # __record_outcome to be run, which signals the end of the __run
+ # method, at which point we restore the previous check_output
+ # implementation.
+ if _IS_PYTHON_3:
+ check_func = frame.f_locals['check'].__func__
+ checker_check_func = checker.check_output.__func__
+ else:
+ check_func = frame.f_locals['check'].im_func
+ checker_check_func = checker.check_output.im_func
+ # Because we can't patch up func_globals, this is the only global
+ # in check_output that we care about:
+ doctest.etree = etree
+ _RestoreChecker(dt_self, old_checker, checker,
+ check_func, checker_check_func,
+ del_module)
+
+class _RestoreChecker(object):
+ def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
+ del_module):
+ self.dt_self = dt_self
+ self.checker = old_checker
+ self.checker._temp_call_super_check_output = self.call_super
+ self.checker._temp_override_self = new_checker
+ self.check_func = check_func
+ self.clone_func = clone_func
+ self.del_module = del_module
+ self.install_clone()
+ self.install_dt_self()
+ def install_clone(self):
+ if _IS_PYTHON_3:
+ self.func_code = self.check_func.__code__
+ self.func_globals = self.check_func.__globals__
+ self.check_func.__code__ = self.clone_func.__code__
+ else:
+ self.func_code = self.check_func.func_code
+ self.func_globals = self.check_func.func_globals
+ self.check_func.func_code = self.clone_func.func_code
+ def uninstall_clone(self):
+ if _IS_PYTHON_3:
+ self.check_func.__code__ = self.func_code
+ else:
+ self.check_func.func_code = self.func_code
+ def install_dt_self(self):
+ self.prev_func = self.dt_self._DocTestRunner__record_outcome
+ self.dt_self._DocTestRunner__record_outcome = self
+ def uninstall_dt_self(self):
+ self.dt_self._DocTestRunner__record_outcome = self.prev_func
+ def uninstall_module(self):
+ if self.del_module:
+ import sys
+ del sys.modules[self.del_module]
+ if '.' in self.del_module:
+ package, module = self.del_module.rsplit('.', 1)
+ package_mod = sys.modules[package]
+ delattr(package_mod, module)
+ def __call__(self, *args, **kw):
+ self.uninstall_clone()
+ self.uninstall_dt_self()
+ del self.checker._temp_override_self
+ del self.checker._temp_call_super_check_output
+ result = self.prev_func(*args, **kw)
+ self.uninstall_module()
+ return result
+ def call_super(self, *args, **kw):
+ self.uninstall_clone()
+ try:
+ return self.check_func(*args, **kw)
+ finally:
+ self.install_clone()
+
+def _find_doctest_frame():
+ import sys
+ frame = sys._getframe(1)
+ while frame:
+ l = frame.f_locals
+ if 'BOOM' in l:
+ # Sign of doctest
+ return frame
+ frame = frame.f_back
+ raise LookupError(
+ "Could not find doctest (only use this function *inside* a doctest)")
+
+__test__ = {
+ 'basic': '''
+ >>> temp_install()
+ >>> print """stuff"""
+ ...
+ >>> print """"""
+
+
+
+ >>> print """blahblahblah""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
+ ...foo />
+ '''}
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
+
+
diff --git a/libs/lxml/etree.h b/libs/lxml/etree.h
new file mode 100644
index 000000000..dcf739840
--- /dev/null
+++ b/libs/lxml/etree.h
@@ -0,0 +1,223 @@
+/* Generated by Cython 0.29.2 */
+
+#ifndef __PYX_HAVE__lxml__etree
+#define __PYX_HAVE__lxml__etree
+
+struct LxmlDocument;
+struct LxmlElement;
+struct LxmlElementTree;
+struct LxmlElementTagMatcher;
+struct LxmlElementIterator;
+struct LxmlElementBase;
+struct LxmlElementClassLookup;
+struct LxmlFallbackElementClassLookup;
+
+/* "lxml/etree.pyx":318
+ *
+ * # type of a function that steps from node to node
+ * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
+ *
+ *
+ */
+typedef xmlNode *(*_node_to_node_function)(xmlNode *);
+
+/* "lxml/etree.pyx":334
+ * @cython.final
+ * @cython.freelist(8)
+ * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
+ * u"""Internal base class to reference a libxml document.
+ *
+ */
+struct LxmlDocument {
+ PyObject_HEAD
+ struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
+ int _ns_counter;
+ PyObject *_prefix_tail;
+ xmlDoc *_c_doc;
+ struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
+};
+
+/* "lxml/etree.pyx":683
+ *
+ * @cython.no_gc_clear
+ * cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
+ * u"""Element class.
+ *
+ */
+struct LxmlElement {
+ PyObject_HEAD
+ struct LxmlDocument *_doc;
+ xmlNode *_c_node;
+ PyObject *_tag;
+};
+
+/* "lxml/etree.pyx":1847
+ *
+ *
+ * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
+ * object LxmlElementTree ]:
+ * cdef _Document _doc
+ */
+struct LxmlElementTree {
+ PyObject_HEAD
+ struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
+ struct LxmlDocument *_doc;
+ struct LxmlElement *_context_node;
+};
+
+/* "lxml/etree.pyx":2574
+ *
+ *
+ * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
+ * type LxmlElementTagMatcherType ]:
+ * """
+ */
+struct LxmlElementTagMatcher {
+ PyObject_HEAD
+ struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
+ PyObject *_pystrings;
+ int _node_type;
+ char *_href;
+ char *_name;
+};
+
+/* "lxml/etree.pyx":2605
+ * self._name = NULL
+ *
+ * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
+ * object LxmlElementIterator, type LxmlElementIteratorType ]:
+ * """
+ */
+struct LxmlElementIterator {
+ struct LxmlElementTagMatcher __pyx_base;
+ struct LxmlElement *_node;
+ _node_to_node_function _next_element;
+};
+
+/* "src/lxml/classlookup.pxi":6
+ * # Custom Element classes
+ *
+ * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
+ * object LxmlElementBase ]:
+ * u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
+ */
+struct LxmlElementBase {
+ struct LxmlElement __pyx_base;
+};
+
+/* "src/lxml/classlookup.pxi":210
+ * # Element class lookup
+ *
+ * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
+ *
+ * # class to store element class lookup functions
+ */
+typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
+
+/* "src/lxml/classlookup.pxi":213
+ *
+ * # class to store element class lookup functions
+ * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
+ * object LxmlElementClassLookup ]:
+ * u"""ElementClassLookup(self)
+ */
+struct LxmlElementClassLookup {
+ PyObject_HEAD
+ _element_class_lookup_function _lookup_function;
+};
+
+/* "src/lxml/classlookup.pxi":221
+ *
+ *
+ * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
+ * [ type LxmlFallbackElementClassLookupType,
+ * object LxmlFallbackElementClassLookup ]:
+ */
+struct LxmlFallbackElementClassLookup {
+ struct LxmlElementClassLookup __pyx_base;
+ struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
+ struct LxmlElementClassLookup *fallback;
+ _element_class_lookup_function _fallback_function;
+};
+
+#ifndef __PYX_HAVE_API__lxml__etree
+
+#ifndef __PYX_EXTERN_C
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#ifndef DL_IMPORT
+ #define DL_IMPORT(_T) _T
+#endif
+
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
+__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
+
+__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
+__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
+__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
+__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
+__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
+__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
+__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
+__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
+__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
+__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
+__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
+__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
+__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
+__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
+__PYX_EXTERN_C int hasText(xmlNode *);
+__PYX_EXTERN_C int hasTail(xmlNode *);
+__PYX_EXTERN_C PyObject *textOf(xmlNode *);
+__PYX_EXTERN_C PyObject *tailOf(xmlNode *);
+__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
+__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
+__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
+__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
+__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
+__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
+__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
+__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
+__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
+__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
+__PYX_EXTERN_C int hasChild(xmlNode *);
+__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
+__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
+__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
+__PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
+__PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
+__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
+__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
+__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
+__PYX_EXTERN_C PyObject *utf8(PyObject *);
+__PYX_EXTERN_C PyObject *getNsTag(PyObject *);
+__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
+__PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
+__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
+__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
+__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
+__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
+
+#endif /* !__PYX_HAVE_API__lxml__etree */
+
+/* WARNING: the interface of the module init function changed in CPython 3.5. */
+/* It now returns a PyModuleDef instance instead of a PyModule instance. */
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initetree(void);
+#else
+PyMODINIT_FUNC PyInit_etree(void);
+#endif
+
+#endif /* !__PYX_HAVE__lxml__etree */
diff --git a/libs/lxml/etree.pyd b/libs/lxml/etree.pyd
new file mode 100644
index 000000000..a03f974e6
Binary files /dev/null and b/libs/lxml/etree.pyd differ
diff --git a/libs/lxml/etree_api.h b/libs/lxml/etree_api.h
new file mode 100644
index 000000000..912f48c36
--- /dev/null
+++ b/libs/lxml/etree_api.h
@@ -0,0 +1,219 @@
+/* Generated by Cython 0.29.2 */
+
+#ifndef __PYX_HAVE_API__lxml__etree
+#define __PYX_HAVE_API__lxml__etree
+#ifdef __MINGW64__
+#define MS_WIN64
+#endif
+#include "Python.h"
+#include "etree.h"
+
+static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
+#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
+static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
+#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
+static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
+#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
+static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
+#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
+static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
+#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
+static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
+#define makeElement __pyx_api_f_4lxml_5etree_makeElement
+static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
+#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
+static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
+#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
+static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
+#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
+static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
+#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
+static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
+#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
+static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
+#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
+static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
+#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
+static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
+#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
+static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
+#define hasText __pyx_api_f_4lxml_5etree_hasText
+static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
+#define hasTail __pyx_api_f_4lxml_5etree_hasTail
+static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
+#define textOf __pyx_api_f_4lxml_5etree_textOf
+static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
+#define tailOf __pyx_api_f_4lxml_5etree_tailOf
+static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
+#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
+static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
+#define setTailText __pyx_api_f_4lxml_5etree_setTailText
+static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
+#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
+static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
+#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
+static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
+#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
+static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
+#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
+static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
+#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
+static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
+#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
+static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
+#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
+static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
+#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
+static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
+#define hasChild __pyx_api_f_4lxml_5etree_hasChild
+static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
+#define findChild __pyx_api_f_4lxml_5etree_findChild
+static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
+#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
+static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
+#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
+static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
+#define nextElement __pyx_api_f_4lxml_5etree_nextElement
+static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
+#define previousElement __pyx_api_f_4lxml_5etree_previousElement
+static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
+#define appendChild __pyx_api_f_4lxml_5etree_appendChild
+static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
+#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
+static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
+#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
+static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
+#define utf8 __pyx_api_f_4lxml_5etree_utf8
+static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
+#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
+static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
+#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
+static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
+#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
+static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
+#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
+static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
+#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
+static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
+#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
+static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
+#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+ #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportFunction
+#define __PYX_HAVE_RT_ImportFunction
+static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+ PyObject *d = 0;
+ PyObject *cobj = 0;
+ union {
+ void (*fp)(void);
+ void *p;
+ } tmp;
+ d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
+ if (!d)
+ goto bad;
+ cobj = PyDict_GetItemString(d, funcname);
+ if (!cobj) {
+ PyErr_Format(PyExc_ImportError,
+ "%.200s does not export expected C function %.200s",
+ PyModule_GetName(module), funcname);
+ goto bad;
+ }
+#if PY_VERSION_HEX >= 0x02070000
+ if (!PyCapsule_IsValid(cobj, sig)) {
+ PyErr_Format(PyExc_TypeError,
+ "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+ PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
+ goto bad;
+ }
+ tmp.p = PyCapsule_GetPointer(cobj, sig);
+#else
+ {const char *desc, *s1, *s2;
+ desc = (const char *)PyCObject_GetDesc(cobj);
+ if (!desc)
+ goto bad;
+ s1 = desc; s2 = sig;
+ while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
+ if (*s1 != *s2) {
+ PyErr_Format(PyExc_TypeError,
+ "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+ PyModule_GetName(module), funcname, sig, desc);
+ goto bad;
+ }
+ tmp.p = PyCObject_AsVoidPtr(cobj);}
+#endif
+ *f = tmp.fp;
+ if (!(*f))
+ goto bad;
+ Py_DECREF(d);
+ return 0;
+bad:
+ Py_XDECREF(d);
+ return -1;
+}
+#endif
+
+
+static int import_lxml__etree(void) {
+ PyObject *module = 0;
+ module = PyImport_ImportModule("lxml.etree");
+ if (!module) goto bad;
+ if (__Pyx_ImportFunction(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+ Py_DECREF(module); module = 0;
+ return 0;
+ bad:
+ Py_XDECREF(module);
+ return -1;
+}
+
+#endif /* !__PYX_HAVE_API__lxml__etree */
diff --git a/libs/lxml/html/ElementSoup.py b/libs/lxml/html/ElementSoup.py
new file mode 100644
index 000000000..8e4fde13c
--- /dev/null
+++ b/libs/lxml/html/ElementSoup.py
@@ -0,0 +1,10 @@
+__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
+"""
+
+__all__ = ["parse", "convert_tree"]
+
+from soupparser import convert_tree, parse as _parse
+
+def parse(file, beautifulsoup=None, makeelement=None):
+ root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
+ return root.getroot()
diff --git a/libs/lxml/html/__init__.py b/libs/lxml/html/__init__.py
new file mode 100644
index 000000000..5751f7097
--- /dev/null
+++ b/libs/lxml/html/__init__.py
@@ -0,0 +1,1926 @@
+# Copyright (c) 2004 Ian Bicking. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# 3. Neither the name of Ian Bicking nor the names of its contributors may
+# be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IAN BICKING OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""The ``lxml.html`` tool set for HTML handling.
+"""
+
+from __future__ import absolute_import
+
+__all__ = [
+ 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring',
+ 'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form',
+ 'find_rel_links', 'find_class', 'make_links_absolute',
+ 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser', 'parse']
+
+
+import copy
+import sys
+import re
+from functools import partial
+
+try:
+ from collections.abc import MutableMapping, MutableSet
+except ImportError:
+ from collections import MutableMapping, MutableSet
+
+from .. import etree
+from . import defs
+from ._setmixin import SetMixin
+
+try:
+ from urlparse import urljoin
+except ImportError:
+ # Python 3
+ from urllib.parse import urljoin
+
+try:
+ unicode
+except NameError:
+ # Python 3
+ unicode = str
+try:
+ basestring
+except NameError:
+ # Python 3
+ basestring = (str, bytes)
+
+
+def __fix_docstring(s):
+ if not s:
+ return s
+ if sys.version_info[0] >= 3:
+ sub = re.compile(r"^(\s*)u'", re.M).sub
+ else:
+ sub = re.compile(r"^(\s*)b'", re.M).sub
+ return sub(r"\1'", s)
+
+
+XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
+
+_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]|descendant-or-self::x:a[@rel]",
+ namespaces={'x':XHTML_NAMESPACE})
+_options_xpath = etree.XPath("descendant-or-self::option|descendant-or-self::x:option",
+ namespaces={'x':XHTML_NAMESPACE})
+_forms_xpath = etree.XPath("descendant-or-self::form|descendant-or-self::x:form",
+ namespaces={'x':XHTML_NAMESPACE})
+#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
+_class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]")
+_id_xpath = etree.XPath("descendant-or-self::*[@id=$id]")
+_collect_string_content = etree.XPath("string()")
+_iter_css_urls = re.compile(r'url\(('+'["][^"]*["]|'+"['][^']*[']|"+r'[^)]*)\)', re.I).finditer
+_iter_css_imports = re.compile(r'@import "(.*?)"').finditer
+_label_xpath = etree.XPath("//label[@for=$id]|//x:label[@for=$id]",
+ namespaces={'x':XHTML_NAMESPACE})
+_archive_re = re.compile(r'[^ ]+')
+_parse_meta_refresh_url = re.compile(
+ r'[^;=]*;\s*(?:url\s*=\s*)?(?P.*)$', re.I).search
+
+
+def _unquote_match(s, pos):
+ if s[:1] == '"' and s[-1:] == '"' or s[:1] == "'" and s[-1:] == "'":
+ return s[1:-1], pos+1
+ else:
+ return s,pos
+
+
+def _transform_result(typ, result):
+ """Convert the result back into the input type.
+ """
+ if issubclass(typ, bytes):
+ return tostring(result, encoding='utf-8')
+ elif issubclass(typ, unicode):
+ return tostring(result, encoding='unicode')
+ else:
+ return result
+
+
+def _nons(tag):
+ if isinstance(tag, basestring):
+ if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
+ return tag.split('}')[-1]
+ return tag
+
+
+class Classes(MutableSet):
+ """Provides access to an element's class attribute as a set-like collection.
+ Usage::
+
+ >>> el = fromstring('
Text
')
+ >>> classes = el.classes # or: classes = Classes(el.attrib)
+ >>> classes |= ['block', 'paragraph']
+ >>> el.get('class')
+ 'hidden large block paragraph'
+ >>> classes.toggle('hidden')
+ False
+ >>> el.get('class')
+ 'large block paragraph'
+ >>> classes -= ('some', 'classes', 'block')
+ >>> el.get('class')
+ 'large paragraph'
+ """
+ def __init__(self, attributes):
+ self._attributes = attributes
+ self._get_class_value = partial(attributes.get, 'class', '')
+
+ def add(self, value):
+ """
+ Add a class.
+
+ This has no effect if the class is already present.
+ """
+ if not value or re.search(r'\s', value):
+ raise ValueError("Invalid class name: %r" % value)
+ classes = self._get_class_value().split()
+ if value in classes:
+ return
+ classes.append(value)
+ self._attributes['class'] = ' '.join(classes)
+
+ def discard(self, value):
+ """
+ Remove a class if it is currently present.
+
+ If the class is not present, do nothing.
+ """
+ if not value or re.search(r'\s', value):
+ raise ValueError("Invalid class name: %r" % value)
+ classes = [name for name in self._get_class_value().split()
+ if name != value]
+ if classes:
+ self._attributes['class'] = ' '.join(classes)
+ elif 'class' in self._attributes:
+ del self._attributes['class']
+
+ def remove(self, value):
+ """
+ Remove a class; it must currently be present.
+
+ If the class is not present, raise a KeyError.
+ """
+ if not value or re.search(r'\s', value):
+ raise ValueError("Invalid class name: %r" % value)
+ super(Classes, self).remove(value)
+
+ def __contains__(self, name):
+ classes = self._get_class_value()
+ return name in classes and name in classes.split()
+
+ def __iter__(self):
+ return iter(self._get_class_value().split())
+
+ def __len__(self):
+ return len(self._get_class_value().split())
+
+ # non-standard methods
+
+ def update(self, values):
+ """
+ Add all names from 'values'.
+ """
+ classes = self._get_class_value().split()
+ extended = False
+ for value in values:
+ if value not in classes:
+ classes.append(value)
+ extended = True
+ if extended:
+ self._attributes['class'] = ' '.join(classes)
+
+ def toggle(self, value):
+ """
+ Add a class name if it isn't there yet, or remove it if it exists.
+
+ Returns true if the class was added (and is now enabled) and
+ false if it was removed (and is now disabled).
+ """
+ if not value or re.search(r'\s', value):
+ raise ValueError("Invalid class name: %r" % value)
+ classes = self._get_class_value().split()
+ try:
+ classes.remove(value)
+ enabled = False
+ except ValueError:
+ classes.append(value)
+ enabled = True
+ if classes:
+ self._attributes['class'] = ' '.join(classes)
+ else:
+ del self._attributes['class']
+ return enabled
+
+
+class HtmlMixin(object):
+
+ def set(self, key, value=None):
+ """set(self, key, value=None)
+
+ Sets an element attribute. If no value is provided, or if the value is None,
+ creates a 'boolean' attribute without value, e.g. ""
+ for ``form.set('novalidate')``.
+ """
+ super(HtmlElement, self).set(key, value)
+
+ @property
+ def classes(self):
+ """
+ A set-like wrapper around the 'class' attribute.
+ """
+ return Classes(self.attrib)
+
+ @classes.setter
+ def classes(self, classes):
+ assert isinstance(classes, Classes) # only allow "el.classes |= ..." etc.
+ value = classes._get_class_value()
+ if value:
+ self.set('class', value)
+ elif self.get('class') is not None:
+ del self.attrib['class']
+
+ @property
+ def base_url(self):
+ """
+ Returns the base URL, given when the page was parsed.
+
+ Use with ``urlparse.urljoin(el.base_url, href)`` to get
+ absolute URLs.
+ """
+ return self.getroottree().docinfo.URL
+
+ @property
+ def forms(self):
+ """
+ Return a list of all the forms
+ """
+ return _forms_xpath(self)
+
+ @property
+ def body(self):
+ """
+ Return the element. Can be called from a child element
+ to get the document's head.
+ """
+ return self.xpath('//body|//x:body', namespaces={'x':XHTML_NAMESPACE})[0]
+
+ @property
+ def head(self):
+ """
+ Returns the element. Can be called from a child
+ element to get the document's head.
+ """
+ return self.xpath('//head|//x:head', namespaces={'x':XHTML_NAMESPACE})[0]
+
+ @property
+ def label(self):
+ """
+ Get or set any