pull/684/head
Louis Vézina 5 years ago
parent fb4b152c5b
commit 2b2fd4e8d7

@ -2,6 +2,7 @@
from __future__ import absolute_import
from __future__ import print_function
from six import PY3
import subprocess as sp
import time
import os
@ -43,7 +44,10 @@ def start_bazarr():
print("Bazarr starting...")
try:
for line in iter(ep.stdout.readline, ''):
sys.stdout.buffer.write(line)
if PY3:
sys.stdout.buffer.write(line)
else:
sys.stdout.write(line)
except KeyboardInterrupt:
pass

@ -26,7 +26,14 @@
import os
import re
import six
import yaml
try:
import yaml
except ImportError:
if six.PY3:
import yaml3 as yaml
else:
import yaml2 as yaml
from .. import plugins
from ..AppriseAsset import AppriseAsset

@ -17,17 +17,18 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.8.0"
__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__version__ = "4.6.0"
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
__license__ = "MIT"
__all__ = ['BeautifulSoup']
import os
import re
import sys
import traceback
import warnings
@ -49,7 +50,7 @@ from .element import (
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
@ -73,7 +74,7 @@ class BeautifulSoup(Tag):
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = '[document]'
ROOT_TAG_NAME = u'[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@ -81,56 +82,16 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
**kwargs):
"""Constructor.
:param markup: A string or a file-like object representing
markup to be parsed.
:param features: Desirable features of the parser to be used. This
may be the name of a specific parser ("lxml", "lxml-xml",
"html.parser", or "html5lib") or it may be the type of markup
to be used ("html", "html5", "xml"). It's recommended that you
name a specific parser, so that Beautiful Soup gives you the
same results across platforms and virtual environments.
:param builder: A TreeBuilder subclass to instantiate (or
instance to use) instead of looking one up based on
`features`. You only need to use this if you've implemented a
custom TreeBuilder.
:param parse_only: A SoupStrainer. Only parts of the document
matching the SoupStrainer will be considered. This is useful
when parsing part of a document that would otherwise be too
large to fit into memory.
:param from_encoding: A string indicating the encoding of the
document to be parsed. Pass this in if Beautiful Soup is
guessing wrongly about the document's encoding.
:param exclude_encodings: A list of strings indicating
encodings known to be wrong. Pass this in if you don't know
the document's encoding but you know Beautiful Soup's guess is
wrong.
:param kwargs: For backwards compatibility purposes, the
constructor accepts certain keyword arguments used in
Beautiful Soup 3. None of these arguments do anything in
Beautiful Soup 4; they will result in a warning and then be ignored.
Apart from this, any keyword arguments passed into the BeautifulSoup
constructor are propagated to the TreeBuilder constructor. This
makes it possible to configure a TreeBuilder beyond saying
which one to use.
"""
"""The Soup object is initialized as the 'root tag', and the
provided markup (which can be a string or a file-like object)
is fed into the underlying parser."""
if 'convertEntities' in kwargs:
del kwargs['convertEntities']
warnings.warn(
"BS4 does not respect the convertEntities argument to the "
"BeautifulSoup constructor. Entities are always converted "
@ -181,22 +142,18 @@ class BeautifulSoup(Tag):
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
if from_encoding and isinstance(markup, str):
if from_encoding and isinstance(markup, unicode):
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
from_encoding = None
# We need this information to track whether or not the builder
# was specified well enough that we can omit the 'you need to
# specify a parser' warning.
original_builder = builder
original_features = features
if isinstance(builder, type):
# A builder class was passed in; it needs to be instantiated.
builder_class = builder
builder = None
elif builder is None:
if isinstance(features, str):
if len(kwargs) > 0:
arg = kwargs.keys().pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
original_features = features
if isinstance(features, basestring):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
@ -206,73 +163,41 @@ class BeautifulSoup(Tag):
"Couldn't find a tree builder with the features you "
"requested: %s. Do you need to install a parser library?"
% ",".join(features))
# At this point either we have a TreeBuilder instance in
# builder, or we have a builder_class that we can instantiate
# with the remaining **kwargs.
if builder is None:
builder = builder_class(**kwargs)
if not original_builder and not (
original_features == builder.NAME or
original_features in builder.ALTERNATE_NAMES
):
builder = builder_class()
if not (original_features == builder.NAME or
original_features in builder.ALTERNATE_NAMES):
if builder.is_xml:
markup_type = "XML"
else:
markup_type = "HTML"
# This code adapted from warnings.py so that we get the same line
# of code as our warnings.warn() call gets, even if the answer is wrong
# (as it may be in a multithreading situation).
caller = None
try:
caller = sys._getframe(1)
except ValueError:
pass
if caller:
globals = caller.f_globals
line_number = caller.f_lineno
else:
globals = sys.__dict__
line_number= 1
filename = globals.get('__file__')
if filename:
fnl = filename.lower()
if fnl.endswith((".pyc", ".pyo")):
filename = filename[:-1]
if filename:
# If there is no filename at all, the user is most likely in a REPL,
# and the warning is not necessary.
values = dict(
filename=filename,
line_number=line_number,
parser=builder.NAME,
markup_type=markup_type
)
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
else:
if kwargs:
warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
caller = traceback.extract_stack()[0]
filename = caller[0]
line_number = caller[1]
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
filename=filename,
line_number=line_number,
parser=builder.NAME,
markup_type=markup_type))
self.builder = builder
self.is_xml = builder.is_xml
self.known_xml = self.is_xml
self._namespaces = dict()
self.parse_only = parse_only
self.builder.soup = self
self.builder.initialize_soup(self)
self.parse_only = parse_only
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
elif len(markup) <= 256 and (
(isinstance(markup, bytes) and not b'<' in markup)
or (isinstance(markup, str) and not '<' in markup)
or (isinstance(markup, unicode) and not u'<' in markup)
):
# Print out warnings for a couple beginner problems
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if (isinstance(markup, str)
if (isinstance(markup, unicode)
and not os.path.supports_unicode_filenames):
possible_filename = markup.encode("utf8")
else:
@ -280,13 +205,13 @@ class BeautifulSoup(Tag):
is_file = False
try:
is_file = os.path.exists(possible_filename)
except Exception as e:
except Exception, e:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
pass
if is_file:
if isinstance(markup, str):
if isinstance(markup, unicode):
markup = markup.encode("utf8")
warnings.warn(
'"%s" looks like a filename, not markup. You should'
@ -338,9 +263,9 @@ class BeautifulSoup(Tag):
if isinstance(markup, bytes):
space = b' '
cant_start_with = (b"http:", b"https:")
elif isinstance(markup, str):
space = ' '
cant_start_with = ("http:", "https:")
elif isinstance(markup, unicode):
space = u' '
cant_start_with = (u"http:", u"https:")
else:
return
@ -377,10 +302,9 @@ class BeautifulSoup(Tag):
self.preserve_whitespace_tag_stack = []
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs):
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
"""Create a new tag associated with this soup."""
kwattrs.update(attrs)
return Tag(None, self.builder, name, namespace, nsprefix, kwattrs)
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
def new_string(self, s, subclass=NavigableString):
"""Create a new NavigableString associated with this soup."""
@ -403,7 +327,7 @@ class BeautifulSoup(Tag):
def pushTag(self, tag):
#print "Push", tag.name
if self.currentTag is not None:
if self.currentTag:
self.currentTag.contents.append(tag)
self.tagStack.append(tag)
self.currentTag = self.tagStack[-1]
@ -412,7 +336,7 @@ class BeautifulSoup(Tag):
def endData(self, containerClass=NavigableString):
if self.current_data:
current_data = ''.join(self.current_data)
current_data = u''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@ -442,71 +366,60 @@ class BeautifulSoup(Tag):
def object_was_parsed(self, o, parent=None, most_recent_element=None):
"""Add an object to the parse tree."""
if parent is None:
parent = self.currentTag
if most_recent_element is not None:
previous_element = most_recent_element
else:
previous_element = self._most_recent_element
parent = parent or self.currentTag
previous_element = most_recent_element or self._most_recent_element
next_element = previous_sibling = next_sibling = None
if isinstance(o, Tag):
next_element = o.next_element
next_sibling = o.next_sibling
previous_sibling = o.previous_sibling
if previous_element is None:
if not previous_element:
previous_element = o.previous_element
fix = parent.next_element is not None
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
self._most_recent_element = o
parent.contents.append(o)
# Check if we are inserting into an already parsed node.
if fix:
self._linkage_fixer(parent)
def _linkage_fixer(self, el):
"""Make sure linkage of this fragment is sound."""
first = el.contents[0]
child = el.contents[-1]
descendant = child
if child is first and el.parent is not None:
# Parent should be linked to first child
el.next_element = child
# We are no longer linked to whatever this element is
prev_el = child.previous_element
if prev_el is not None and prev_el is not el:
prev_el.next_element = None
# First child should be linked to the parent, and no previous siblings.
child.previous_element = el
child.previous_sibling = None
# We have no sibling as we've been appended as the last.
child.next_sibling = None
# This index is a tag, dig deeper for a "last descendant"
if isinstance(child, Tag) and child.contents:
descendant = child._last_descendant(False)
# As the final step, link last descendant. It should be linked
# to the parent's next sibling (if found), else walk up the chain
# and find a parent with a sibling. It should have no next sibling.
descendant.next_element = None
descendant.next_sibling = None
target = el
while True:
if target is None:
break
elif target.next_sibling is not None:
descendant.next_element = target.next_sibling
target.next_sibling.previous_element = child
break
target = target.parent
if parent.next_sibling:
# This node is being inserted into an element that has
# already been parsed. Deal with any dangling references.
index = len(parent.contents)-1
while index >= 0:
if parent.contents[index] is o:
break
index -= 1
else:
raise ValueError(
"Error building tree: supposedly %r was inserted "
"into %r after the fact, but I don't see it!" % (
o, parent
)
)
if index == 0:
previous_element = parent
previous_sibling = None
else:
previous_element = previous_sibling = parent.contents[index-1]
if index == len(parent.contents)-1:
next_element = parent.next_sibling
next_sibling = None
else:
next_element = next_sibling = parent.contents[index+1]
o.previous_element = previous_element
if previous_element:
previous_element.next_element = o
o.next_element = next_element
if next_element:
next_element.previous_element = o
o.next_sibling = next_sibling
if next_sibling:
next_sibling.previous_sibling = o
o.previous_sibling = previous_sibling
if previous_sibling:
previous_sibling.next_sibling = o
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
@ -552,7 +465,7 @@ class BeautifulSoup(Tag):
self.currentTag, self._most_recent_element)
if tag is None:
return tag
if self._most_recent_element is not None:
if self._most_recent_element:
self._most_recent_element.next_element = tag
self._most_recent_element = tag
self.pushTag(tag)
@ -577,9 +490,9 @@ class BeautifulSoup(Tag):
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = '<?xml version="1.0"%s?>\n' % encoding_part
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = ''
prefix = u''
if not pretty_print:
indent_level = None
else:
@ -613,4 +526,4 @@ class FeatureNotFound(ValueError):
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print(soup.prettify())
print soup.prettify()

@ -1,5 +1,5 @@
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import itertools
@ -7,7 +7,8 @@ import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
nonwhitespace_re
HTMLAwareEntitySubstitution,
whitespace_re
)
__all__ = [
@ -89,46 +90,18 @@ class TreeBuilder(object):
is_xml = False
picklable = False
preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
# A value for these tag/attribute combinations is a space- or
# comma-separated list of CDATA, rather than a single CDATA.
DEFAULT_CDATA_LIST_ATTRIBUTES = {}
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
USE_DEFAULT = object()
def __init__(self, multi_valued_attributes=USE_DEFAULT, preserve_whitespace_tags=USE_DEFAULT):
"""Constructor.
:param multi_valued_attributes: If this is set to None, the
TreeBuilder will not turn any values for attributes like
'class' into lists. Setting this do a dictionary will
customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
for an example.
Internally, these are called "CDATA list attributes", but that
probably doesn't make sense to an end-user, so the argument name
is `multi_valued_attributes`.
:param preserve_whitespace_tags:
"""
cdata_list_attributes = {}
def __init__(self):
self.soup = None
if multi_valued_attributes is self.USE_DEFAULT:
multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
self.cdata_list_attributes = multi_valued_attributes
if preserve_whitespace_tags is self.USE_DEFAULT:
preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
self.preserve_whitespace_tags = preserve_whitespace_tags
def initialize_soup(self, soup):
"""The BeautifulSoup object has been initialized and is now
being associated with the TreeBuilder.
"""
self.soup = soup
def reset(self):
pass
@ -152,7 +125,7 @@ class TreeBuilder(object):
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
def feed(self, markup):
raise NotImplementedError()
@ -187,14 +160,14 @@ class TreeBuilder(object):
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
for attr in list(attrs.keys()):
for attr in attrs.keys():
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
if isinstance(value, str):
values = nonwhitespace_re.findall(value)
if isinstance(value, basestring):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
# for the same tag when rearranging the parse
@ -258,20 +231,15 @@ class HTMLTreeBuilder(TreeBuilder):
Such as which tags are empty-element tags.
"""
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
empty_element_tags = set([
# These are from HTML5.
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
# These are from earlier versions of HTML and are removed in HTML5.
'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer'
# These are from HTML4, removed in HTML5.
'spacer', 'frame'
])
# The HTML standard defines these as block-level elements. Beautiful
# Soup does not treat these elements differently from other elements,
# but it may do so eventually, and this information is available if
# you need to use it.
block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
# The HTML standard defines these attributes as containing a
# space-separated list of values, not a single value. That is,
# class="foo bar" means that the 'class' attribute has two values,
@ -279,7 +247,7 @@ class HTMLTreeBuilder(TreeBuilder):
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
DEFAULT_CDATA_LIST_ATTRIBUTES = {
cdata_list_attributes = {
"*" : ['class', 'accesskey', 'dropzone'],
"a" : ['rel', 'rev'],
"link" : ['rel', 'rev'],
@ -296,8 +264,6 @@ class HTMLTreeBuilder(TreeBuilder):
"output" : ["for"],
}
DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
def set_up_substitutions(self, tag):
# We are only interested in <meta> tags
if tag.name != 'meta':

@ -1,5 +1,5 @@
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTML5TreeBuilder',
@ -15,7 +15,7 @@ from bs4.builder import (
)
from bs4.element import (
NamespacedAttribute,
nonwhitespace_re,
whitespace_re,
)
import html5lib
from html5lib.constants import (
@ -33,7 +33,7 @@ try:
# Pre-0.99999999
from html5lib.treebuilders import _base as treebuilder_base
new_html5lib = False
except ImportError as e:
except ImportError, e:
# 0.99999999 and up
from html5lib.treebuilders import base as treebuilder_base
new_html5lib = True
@ -64,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
extra_kwargs = dict()
if not isinstance(markup, str):
if not isinstance(markup, unicode):
if new_html5lib:
extra_kwargs['override_encoding'] = self.user_specified_encoding
else:
@ -72,13 +72,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
doc = parser.parse(markup, **extra_kwargs)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, str):
if isinstance(markup, unicode):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
original_encoding = parser.tokenizer.stream.charEncoding[0]
if not isinstance(original_encoding, str):
if not isinstance(original_encoding, basestring):
# In 0.99999999 and up, the encoding is an html5lib
# Encoding object. We want to use a string for compatibility
# with other tree builders.
@ -92,7 +92,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return '<html><head></head><body>%s</body></html>' % fragment
return u'<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
@ -174,7 +174,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
rv.append("|%s<%s>" % (' ' * indent, name))
if element.attrs:
attributes = []
for name, value in list(element.attrs.items()):
for name, value in element.attrs.items():
if isinstance(name, NamespacedAttribute):
name = "%s %s" % (prefixes[name.namespace], name.name)
if isinstance(value, list):
@ -199,14 +199,14 @@ class AttrList(object):
def __setitem__(self, name, value):
# If this attribute is a multi-valued attribute for this element,
# turn its value into a list.
list_attr = self.element.cdata_list_attributes
list_attr = HTML5TreeBuilder.cdata_list_attributes
if (name in list_attr['*']
or (self.element.name in list_attr
and name in list_attr[self.element.name])):
# A node that is being cloned may have already undergone
# this procedure.
if not isinstance(value, list):
value = nonwhitespace_re.findall(value)
value = whitespace_re.split(value)
self.element[name] = value
def items(self):
return list(self.attrs.items())
@ -229,7 +229,7 @@ class Element(treebuilder_base.Node):
def appendChild(self, node):
string_child = child = None
if isinstance(node, str):
if isinstance(node, basestring):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@ -246,10 +246,10 @@ class Element(treebuilder_base.Node):
child = node.element
node.parent = self
if not isinstance(child, str) and child.parent is not None:
if not isinstance(child, basestring) and child.parent is not None:
node.element.extract()
if (string_child is not None and self.element.contents
if (string_child and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# We are appending a string onto another string.
# TODO This has O(n^2) performance, for input like
@ -259,7 +259,7 @@ class Element(treebuilder_base.Node):
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, str):
if isinstance(node, basestring):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
@ -299,7 +299,7 @@ class Element(treebuilder_base.Node):
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in list(attributes.items()):
for name, value in attributes.items():
self.element[name] = value
# The attributes may contain variables that need substitution.
@ -360,16 +360,16 @@ class Element(treebuilder_base.Node):
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
if new_parents_last_descendant is not None:
if new_parents_last_descendant:
first_child.previous_element = new_parents_last_descendant
else:
first_child.previous_element = new_parent_element
first_child.previous_sibling = new_parents_last_child
if new_parents_last_descendant is not None:
if new_parents_last_descendant:
new_parents_last_descendant.next_element = first_child
else:
new_parent_element.next_element = first_child
if new_parents_last_child is not None:
if new_parents_last_child:
new_parents_last_child.next_sibling = first_child
# Find the very last element being moved. It is now the
@ -379,7 +379,7 @@ class Element(treebuilder_base.Node):
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
if new_parents_last_descendant_next_element is not None:
if new_parents_last_descendant_next_element:
# TODO: This code has no test coverage and I'm not sure
# how to get html5lib to go through this path, but it's
# just the other side of the previous line.

@ -1,18 +1,17 @@
# encoding: utf-8
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'HTMLParserTreeBuilder',
]
from html.parser import HTMLParser
from HTMLParser import HTMLParser
try:
from html.parser import HTMLParseError
except ImportError as e:
from HTMLParser import HTMLParseError
except ImportError, e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
@ -65,18 +64,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
def error(self, msg):
"""In Python 3, HTMLParser subclasses must implement error(), although this
requirement doesn't appear to be documented.
In Python 2, HTMLParser implements error() as raising an exception.
In any event, this method is called only on very strange markup and our best strategy
is to pretend it didn't happen and keep going.
"""
warnings.warn(msg)
def handle_startendtag(self, name, attrs):
# This is only called when the markup looks like
# <tag/>.
@ -141,26 +129,11 @@ class BeautifulSoupHTMLParser(HTMLParser):
else:
real_name = int(name)
data = None
if real_name < 256:
# HTML numeric entities are supposed to reference Unicode
# code points, but sometimes they reference code points in
# some other encoding (ahem, Windows-1252). E.g. &#147;
# instead of &#201; for LEFT DOUBLE QUOTATION MARK. This
# code tries to detect this situation and compensate.
for encoding in (self.soup.original_encoding, 'windows-1252'):
if not encoding:
continue
try:
data = bytearray([real_name]).decode(encoding)
except UnicodeDecodeError as e:
pass
if not data:
try:
data = chr(real_name)
except (ValueError, OverflowError) as e:
pass
data = data or "\N{REPLACEMENT CHARACTER}"
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
def handle_entityref(self, name):
@ -168,12 +141,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
if character is not None:
data = character
else:
# If this were XML, it would be ambiguous whether "&foo"
# was an character entity reference with a missing
# semicolon or the literal string "&foo". Since this is
# HTML, we have a complete list of all character entity references,
# and this one wasn't found, so assume it's the literal string "&foo".
data = "&%s" % name
data = "&%s;" % name
self.handle_data(data)
def handle_comment(self, data):
@ -214,15 +182,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
NAME = HTMLPARSER
features = [NAME, HTML, STRICT]
def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
super(HTMLParserTreeBuilder, self).__init__(**kwargs)
parser_args = parser_args or []
parser_kwargs = parser_kwargs or {}
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
parser_kwargs['strict'] = False
kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
parser_kwargs['convert_charrefs'] = False
self.parser_args = (parser_args, parser_kwargs)
kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None, exclude_encodings=None):
@ -231,7 +196,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, str):
if isinstance(markup, unicode):
yield (markup, None, None, False)
return
@ -248,8 +213,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
parser.close()
except HTMLParseError as e:
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e

@ -1,18 +1,13 @@
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
]
try:
from collections.abc import Callable # Python 3.6
except ImportError as e:
from collections import Callable
from io import BytesIO
from io import StringIO
from StringIO import StringIO
import collections
from lxml import etree
from bs4.element import (
Comment,
@ -33,10 +28,6 @@ from bs4.dammit import EncodingDetector
LXML = 'lxml'
def _invert(d):
"Invert a dictionary."
return dict((v,k) for k, v in list(d.items()))
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
@ -53,29 +44,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# This namespace mapping is specified in the XML Namespace
# standard.
DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace')
DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS)
def initialize_soup(self, soup):
"""Let the BeautifulSoup object know about the standard namespace
mapping.
"""
super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
self._register_namespaces(self.DEFAULT_NSMAPS)
def _register_namespaces(self, mapping):
"""Let the BeautifulSoup object know about namespaces encountered
while parsing the document.
This might be useful later on when creating CSS selectors.
"""
for key, value in list(mapping.items()):
if key and key not in self.soup._namespaces:
# Let the BeautifulSoup object know about a new namespace.
# If there are multiple namespaces defined with the same
# prefix, the first one in the document takes precedence.
self.soup._namespaces[key] = value
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
def default_parser(self, encoding):
# This can either return a parser object or a class, which
@ -89,12 +58,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# Use the default parser.
parser = self.default_parser(encoding)
if isinstance(parser, Callable):
if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False, encoding=encoding)
return parser
def __init__(self, parser=None, empty_element_tags=None, **kwargs):
def __init__(self, parser=None, empty_element_tags=None):
# TODO: Issue a warning if parser is present but not a
# callable, since that means there's no way to create new
# parsers for different encodings.
@ -102,9 +71,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if empty_element_tags is not None:
self.empty_element_tags = set(empty_element_tags)
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
self.nsmaps = [self.DEFAULT_NSMAPS]
def _getNsTag(self, tag):
# Split the namespace URL out of a fully-qualified lxml tag
# name. Copied from lxml's src/lxml/sax.py.
@ -133,12 +101,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
else:
self.processing_instruction_class = XMLProcessingInstruction
if isinstance(markup, str):
if isinstance(markup, unicode):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
if isinstance(markup, str):
if isinstance(markup, unicode):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@ -153,7 +121,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, str):
elif isinstance(markup, unicode):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@ -168,36 +136,30 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def close(self):
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
self.nsmaps = [self.DEFAULT_NSMAPS]
def start(self, name, attrs, nsmap={}):
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
attrs = dict(attrs)
nsprefix = None
# Invert each namespace map as it comes in.
if len(nsmap) == 0 and len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
if len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
# First, Let the BeautifulSoup object know about it.
self._register_namespaces(nsmap)
# Then, add it to our running list of inverted namespace
# mappings.
self.nsmaps.append(_invert(nsmap))
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in list(nsmap.items()):
for prefix, namespace in nsmap.items():
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@ -206,7 +168,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in list(attrs.items()):
for attr, value in attrs.items():
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@ -266,7 +228,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
@ -287,10 +249,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return '<html><body>%s</body></html>' % fragment
return u'<html><body>%s</body></html>' % fragment

@ -6,11 +6,12 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
Feed Parser. It works best on XML and HTML, but it does not rewrite the
XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
# Use of this source code is governed by the MIT license.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import codecs
from html.entities import codepoint2name
from htmlentitydefs import codepoint2name
import re
import logging
import string
@ -45,9 +46,9 @@ except ImportError:
pass
xml_encoding_re = re.compile(
'^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
html_meta_re = re.compile(
'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
@ -57,24 +58,15 @@ class EntitySubstitution(object):
lookup = {}
reverse_lookup = {}
characters_for_re = []
# &apos is an XHTML entity and an HTML 5, but not an HTML 4
# entity. We don't want to use it, but we want to recognize it on the way in.
#
# TODO: Ideally we would be able to recognize all HTML 5 named
# entities, but that's a little tricky.
extra = [(39, 'apos')]
for codepoint, name in list(codepoint2name.items()) + extra:
character = chr(codepoint)
if codepoint not in (34, 39):
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot; or the single quote into &apos;, unless it
# happens within an attribute value, which is handled
# elsewhere.
# &quot;, unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to recognize those entities on the way in and
# convert them to Unicode characters.
# But we do want to turn &quot; into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
@ -90,7 +82,7 @@ class EntitySubstitution(object):
}
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
"&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
")")
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
@ -282,7 +274,7 @@ class EncodingDetector:
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
if isinstance(data, str):
if isinstance(data, unicode):
# Unicode data cannot have a byte-order mark.
return data, encoding
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@ -360,9 +352,9 @@ class UnicodeDammit:
markup, override_encodings, is_html, exclude_encodings)
# Short-circuit if the data is in Unicode to begin with.
if isinstance(markup, str) or markup == '':
if isinstance(markup, unicode) or markup == '':
self.markup = markup
self.unicode_markup = str(markup)
self.unicode_markup = unicode(markup)
self.original_encoding = None
return
@ -446,7 +438,7 @@ class UnicodeDammit:
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
return str(data, encoding, errors)
return unicode(data, encoding, errors)
@property
def declared_html_encoding(self):

@ -1,11 +1,12 @@
"""Diagnostic functions, mainly for use when doing tech support."""
# Use of this source code is governed by the MIT license.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import cProfile
from io import StringIO
from html.parser import HTMLParser
from StringIO import StringIO
from HTMLParser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
@ -21,8 +22,8 @@ import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print("Diagnostic running on Beautiful Soup %s" % __version__)
print("Python version %s" % sys.version)
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
@ -31,16 +32,16 @@ def diagnose(data):
break
else:
basic_parsers.remove(name)
print((
print (
"I noticed that %s is not installed. Installing it may help." %
name))
name)
if 'lxml' in basic_parsers:
basic_parsers.append("lxml-xml")
basic_parsers.append(["lxml", "xml"])
try:
from lxml import etree
print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
except ImportError as e:
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
except ImportError, e:
print (
"lxml is not installed or couldn't be imported.")
@ -48,43 +49,37 @@ def diagnose(data):
if 'html5lib' in basic_parsers:
try:
import html5lib
print("Found html5lib version %s" % html5lib.__version__)
except ImportError as e:
print "Found html5lib version %s" % html5lib.__version__
except ImportError, e:
print (
"html5lib is not installed or couldn't be imported.")
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
with open(data) as fp:
data = fp.read()
elif data.startswith("http:") or data.startswith("https:"):
print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
return
else:
try:
if os.path.exists(data):
print('"%s" looks like a filename. Reading data from the file.' % data)
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
print()
print
for parser in basic_parsers:
print("Trying to parse your markup with %s" % parser)
print "Trying to parse your markup with %s" % parser
success = False
try:
soup = BeautifulSoup(data, features=parser)
soup = BeautifulSoup(data, parser)
success = True
except Exception as e:
print("%s could not parse the markup." % parser)
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print("Here's what %s did with the markup:" % parser)
print(soup.prettify())
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print("-" * 80)
print "-" * 80
def lxml_trace(data, html=True, **kwargs):
"""Print out the lxml events that occur during parsing.
@ -94,7 +89,7 @@ def lxml_trace(data, html=True, **kwargs):
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
print(("%s, %4s, %s" % (event, element.tag, element.text)))
print("%s, %4s, %s" % (event, element.tag, element.text))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
@ -154,7 +149,7 @@ def rword(length=5):
def rsentence(length=4):
"Generate a random sentence-like string."
return " ".join(rword(random.randint(4,9)) for i in list(range(length)))
return " ".join(rword(random.randint(4,9)) for i in range(length))
def rdoc(num_elements=1000):
"""Randomly generate an invalid HTML document."""
@ -176,9 +171,9 @@ def rdoc(num_elements=1000):
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
data = rdoc(num_elements)
print("Generated a large invalid HTML document (%d bytes)." % len(data))
print "Generated a large invalid HTML document (%d bytes)." % len(data)
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
@ -187,24 +182,24 @@ def benchmark_parsers(num_elements=100000):
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception as e:
print("%s could not parse the markup." % parser)
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print("Raw lxml parsed the markup in %.2fs." % (b-a))
print "Raw lxml parsed the markup in %.2fs." % (b-a)
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print("Raw html5lib parsed the markup in %.2fs." % (b-a))
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
def profile(num_elements=100000, parser="lxml"):

File diff suppressed because it is too large Load Diff

@ -1,99 +0,0 @@
from bs4.dammit import EntitySubstitution
class Formatter(EntitySubstitution):
"""Describes a strategy to use when outputting a parse tree to a string.
Some parts of this strategy come from the distinction between
HTML4, HTML5, and XML. Others are configurable by the user.
"""
# Registries of XML and HTML formatters.
XML_FORMATTERS = {}
HTML_FORMATTERS = {}
HTML = 'html'
XML = 'xml'
HTML_DEFAULTS = dict(
cdata_containing_tags=set(["script", "style"]),
)
def _default(self, language, value, kwarg):
if value is not None:
return value
if language == self.XML:
return set()
return self.HTML_DEFAULTS[kwarg]
def __init__(
self, language=None, entity_substitution=None,
void_element_close_prefix='/', cdata_containing_tags=None,
):
"""
:param void_element_close_prefix: By default, represent void
elements as <tag/> rather than <tag>
"""
self.language = language
self.entity_substitution = entity_substitution
self.void_element_close_prefix = void_element_close_prefix
self.cdata_containing_tags = self._default(
language, cdata_containing_tags, 'cdata_containing_tags'
)
def substitute(self, ns):
"""Process a string that needs to undergo entity substitution."""
if not self.entity_substitution:
return ns
from .element import NavigableString
if (isinstance(ns, NavigableString)
and ns.parent is not None
and ns.parent.name in self.cdata_containing_tags):
# Do nothing.
return ns
# Substitute.
return self.entity_substitution(ns)
def attribute_value(self, value):
"""Process the value of an attribute."""
return self.substitute(value)
def attributes(self, tag):
"""Reorder a tag's attributes however you want."""
return sorted(tag.attrs.items())
class HTMLFormatter(Formatter):
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
class XMLFormatter(Formatter):
REGISTRY = {}
def __init__(self, *args, **kwargs):
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
# Set up aliases for the default formatters.
HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html
)
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html,
void_element_close_prefix = None
)
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_xml
)
HTMLFormatter.REGISTRY[None] = HTMLFormatter(
entity_substitution=None
)
XMLFormatter.REGISTRY["html"] = XMLFormatter(
entity_substitution=EntitySubstitution.substitute_html
)
XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
entity_substitution=EntitySubstitution.substitute_xml
)
XMLFormatter.REGISTRY[None] = Formatter(
Formatter(Formatter.XML, entity_substitution=None)
)

@ -1,7 +1,7 @@
# encoding: utf-8
"""Helper classes for tests."""
# Use of this source code is governed by the MIT license.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__license__ = "MIT"
import pickle
@ -16,66 +16,29 @@ from bs4.element import (
ContentMetaAttributeValue,
Doctype,
SoupStrainer,
Tag
)
from bs4.builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
BAD_DOCUMENT = """A bare string
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
<div><![CDATA[A CDATA section where it doesn't belong]]></div>
<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div>
<div>A <meta> tag</div>
<div>A <br> tag that supposedly has contents.</br></div>
<div>AT&T</div>
<div><textarea>Within a textarea, markup like <b> tags and <&<&amp; should be treated as literal</textarea></div>
<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div>
<div>This numeric entity is missing the final semicolon: <x t="pi&#241ata"></div>
<div><a href="http://example.com/</a> that attribute value never got closed</div>
<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div>
<! This document starts with a bogus declaration ><div>a</div>
<div>This document contains <!an incomplete declaration <div>(do you see it?)</div>
<div>This document ends with <!an incomplete declaration
<div><a style={height:21px;}>That attribute value was bogus</a></div>
<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace
<div><table><td nowrap>That boolean attribute had no value</td></table></div>
<div>Here's a nonexistent entity: &#foo; (do you see it?)</div>
<div>This document ends before the entity finishes: &gt
<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p>
<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b>
<div><table><tr><td>Here's a table</td></tr></table></div>
<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div>
<div>This tag contains nothing but whitespace: <b> </b></div>
<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div>
<div><table><div>This table contains bare markup</div></table></div>
<div><div id="1">\n <a href="link1">This link is never closed.\n</div>\n<div id="2">\n <div id="3">\n <a href="link2">This link is closed.</a>\n </div>\n</div></div>
<div>This document contains a <!DOCTYPE surprise>surprise doctype</div>
<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div>
<div><our\u2603>Tag name contains Unicode characters</our\u2603></div>
<div><a \u2603="snowman">Attribute name contains Unicode characters</a></div>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
"""
class SoupTest(unittest.TestCase):
@property
def default_builder(self):
return default_builder
return default_builder()
def soup(self, markup, **kwargs):
"""Build a Beautiful Soup object from markup."""
builder = kwargs.pop('builder', self.default_builder)
return BeautifulSoup(markup, builder=builder, **kwargs)
def document_for(self, markup, **kwargs):
def document_for(self, markup):
"""Turn an HTML fragment into a document.
The details depend on the builder.
"""
return self.default_builder(**kwargs).test_fragment_to_document(markup)
return self.default_builder.test_fragment_to_document(markup)
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
builder = self.default_builder
@ -96,121 +59,6 @@ class SoupTest(unittest.TestCase):
self.assertEqual(earlier, e.previous_element)
earlier = e
def linkage_validator(self, el, _recursive_call=False):
"""Ensure proper linkage throughout the document."""
descendant = None
# Document element should have no previous element or previous sibling.
# It also shouldn't have a next sibling.
if el.parent is None:
assert el.previous_element is None,\
"Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
el, el.previous_element, None
)
assert el.previous_sibling is None,\
"Bad previous_sibling\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
el, el.previous_sibling, None
)
assert el.next_sibling is None,\
"Bad next_sibling\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format(
el, el.next_sibling, None
)
idx = 0
child = None
last_child = None
last_idx = len(el.contents) - 1
for child in el.contents:
descendant = None
# Parent should link next element to their first child
# That child should have no previous sibling
if idx == 0:
if el.parent is not None:
assert el.next_element is child,\
"Bad next_element\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format(
el, el.next_element, child
)
assert child.previous_element is el,\
"Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
child, child.previous_element, el
)
assert child.previous_sibling is None,\
"Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED: {}".format(
child, child.previous_sibling, None
)
# If not the first child, previous index should link as sibling to this index
# Previous element should match the last index or the last bubbled up descendant
else:
assert child.previous_sibling is el.contents[idx - 1],\
"Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED {}".format(
child, child.previous_sibling, el.contents[idx - 1]
)
assert el.contents[idx - 1].next_sibling is child,\
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
el.contents[idx - 1], el.contents[idx - 1].next_sibling, child
)
if last_child is not None:
assert child.previous_element is last_child,\
"Bad previous_element\nNODE: {}\nPREV {}\nEXPECTED {}\nCONTENTS {}".format(
child, child.previous_element, last_child, child.parent.contents
)
assert last_child.next_element is child,\
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
last_child, last_child.next_element, child
)
if isinstance(child, Tag) and child.contents:
descendant = self.linkage_validator(child, True)
# A bubbled up descendant should have no next siblings
assert descendant.next_sibling is None,\
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
descendant, descendant.next_sibling, None
)
# Mark last child as either the bubbled up descendant or the current child
if descendant is not None:
last_child = descendant
else:
last_child = child
# If last child, there are non next siblings
if idx == last_idx:
assert child.next_sibling is None,\
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
child, child.next_sibling, None
)
idx += 1
child = descendant if descendant is not None else child
if child is None:
child = el
if not _recursive_call and child is not None:
target = el
while True:
if target is None:
assert child.next_element is None, \
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
child, child.next_element, None
)
break
elif target.next_sibling is not None:
assert child.next_element is target.next_sibling, \
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
child, child.next_element, target.next_sibling
)
break
target = target.parent
# We are done, so nothing to return
return None
else:
# Return the child to the recursive caller
return child
class HTMLTreeBuilderSmokeTest(object):
"""A basic test of a treebuilder's competence.
@ -232,7 +80,7 @@ class HTMLTreeBuilderSmokeTest(object):
soup = self.soup("")
new_tag = soup.new_tag(name)
self.assertEqual(True, new_tag.is_empty_element)
def test_pickle_and_unpickle_identity(self):
# Pickling a tree, then unpickling it, yields a tree identical
# to the original.
@ -302,20 +150,12 @@ class HTMLTreeBuilderSmokeTest(object):
soup.encode("utf-8").replace(b"\n", b""),
markup.replace(b"\n", b""))
def test_namespaced_html(self):
"""When a namespaced XML document is parsed as HTML it should
be treated as HTML with weird tag names.
"""
markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>"""
soup = self.soup(markup)
self.assertEqual(2, len(soup.find_all("ns1:foo")))
def test_processing_instruction(self):
# We test both Unicode and bytestring to verify that
# process_markup correctly sets processing_instruction_class
# even when the markup is already Unicode and there is no
# need to process anything.
markup = """<?PITarget PIContent?>"""
markup = u"""<?PITarget PIContent?>"""
soup = self.soup(markup)
self.assertEqual(markup, soup.decode())
@ -452,18 +292,6 @@ Hello, world!
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_multivalued_attribute_with_whitespace(self):
# Whitespace separating the values of a multi-valued attribute
# should be ignored.
markup = '<div class=" foo bar "></a>'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.div['class'])
# If you search by the literal name of the class it's like the whitespace
# wasn't there.
self.assertEqual(soup.div, soup.find('div', class_="foo bar"))
def test_deeply_nested_multivalued_attribute(self):
# html5lib can set the attributes of the same tag many times
# as it rearranges the tree. This has caused problems with
@ -483,41 +311,15 @@ Hello, world!
def test_angle_brackets_in_attribute_values_are_escaped(self):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_strings_resembling_character_entity_references(self):
# "&T" and "&p" look like incomplete character entities, but they are
# not.
self.assertSoupEquals(
"<p>&bull; AT&T is in the s&p 500</p>",
"<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
)
def test_apos_entity(self):
self.assertSoupEquals(
"<p>Bob&apos;s Bar</p>",
"<p>Bob's Bar</p>",
)
def test_entities_in_foreign_document_encoding(self):
# &#147; and &#148; are invalid numeric entities referencing
# Windows-1252 characters. &#45; references a character common
# to Windows-1252 and Unicode, and &#9731; references a
# character only found in Unicode.
#
# All of these entities should be converted to Unicode
# characters.
markup = "<p>&#147;Hello&#148; &#45;&#9731;</p>"
soup = self.soup(markup)
self.assertEqual("“Hello” -☃", soup.p.string)
def test_entities_in_attributes_converted_to_unicode(self):
expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
@ -528,11 +330,11 @@ Hello, world!
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = "\N{REPLACEMENT CHARACTER}"
expect = u"\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
def test_multipart_strings(self):
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
@ -606,9 +408,9 @@ Hello, world!
# A seemingly innocuous document... but it's in Unicode! And
# it contains characters that can't be represented in the
# encoding found in the declaration! The horror!
markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
soup = self.soup(markup)
self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
@ -648,7 +450,7 @@ Hello, world!
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
@ -658,15 +460,15 @@ Hello, world!
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
@ -675,7 +477,7 @@ Hello, world!
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
@ -784,13 +586,6 @@ Hello, world!
data.a['foo'] = 'bar'
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
def test_worst_case(self):
"""Test the worst case (currently) for linking issues."""
soup = self.soup(BAD_DOCUMENT)
self.linkage_validator(soup)
class XMLTreeBuilderSmokeTest(object):
def test_pickle_and_unpickle_identity(self):
@ -829,17 +624,6 @@ class XMLTreeBuilderSmokeTest(object):
self.assertEqual(
soup.encode("utf-8"), markup)
def test_nested_namespaces(self):
doc = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<parent xmlns="http://ns1/">
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
</child>
</parent>"""
soup = self.soup(doc)
self.assertEqual(doc, soup.encode())
def test_formatter_processes_script_tag_for_xml_documents(self):
doc = """
<script type="text/javascript">
@ -853,15 +637,15 @@ class XMLTreeBuilderSmokeTest(object):
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_can_parse_unicode_document(self):
markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
soup = self.soup(markup)
self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
str(soup.rss), markup)
unicode(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
@ -892,17 +676,17 @@ class XMLTreeBuilderSmokeTest(object):
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(str(soup.p), markup)
self.assertEqual(unicode(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(str(soup.foo), markup)
self.assertEqual(unicode(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(str(soup.foo), markup)
self.assertEqual(unicode(soup.foo), markup)
def test_find_by_prefixed_name(self):
doc = """<?xml version="1.0" encoding="utf-8"?>
@ -937,12 +721,6 @@ class XMLTreeBuilderSmokeTest(object):
# The two tags have the same namespace prefix.
self.assertEqual(tag.prefix, duplicate.prefix)
def test_worst_case(self):
"""Test the worst case (currently) for linking issues."""
soup = self.soup(BAD_DOCUMENT)
self.linkage_validator(soup)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""

@ -5,7 +5,7 @@ import warnings
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError as e:
except ImportError, e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
@ -22,7 +22,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
@property
def default_builder(self):
return HTML5TreeBuilder
return HTML5TreeBuilder()
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
def test_reparented_markup(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
soup = self.soup(markup)
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_ends_with_whitespace(self):
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
soup = self.soup(markup)
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
self.assertEqual(2, len(soup.find_all('p')))
def test_reparented_markup_containing_identical_whitespace_nodes(self):
@ -127,44 +127,4 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
def test_foster_parenting(self):
markup = b"""<table><td></tbody>A"""
soup = self.soup(markup)
self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
def test_extraction(self):
"""
Test that extraction does not destroy the tree.
https://bugs.launchpad.net/beautifulsoup/+bug/1782928
"""
markup = """
<html><head></head>
<style>
</style><script></script><body><p>hello</p></body></html>
"""
soup = self.soup(markup)
[s.extract() for s in soup('script')]
[s.extract() for s in soup('style')]
self.assertEqual(len(soup.find_all("p")), 1)
def test_empty_comment(self):
"""
Test that empty comment does not break structure.
https://bugs.launchpad.net/beautifulsoup/+bug/1806598
"""
markup = """
<html>
<body>
<form>
<!----><input type="text">
</form>
</body>
</html>
"""
soup = self.soup(markup)
inputs = []
for form in soup.find_all('form'):
inputs.extend(form.find_all('input'))
self.assertEqual(len(inputs), 1)
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())

@ -5,11 +5,12 @@ from pdb import set_trace
import pickle
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
default_builder = HTMLParserTreeBuilder
@property
def default_builder(self):
return HTMLParserTreeBuilder()
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
@ -31,17 +32,3 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
def test_redundant_empty_element_closing_tags(self):
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
self.assertSoupEquals('</br></br></br>', "")
def test_empty_element(self):
# This verifies that any buffered data present when the parser
# finishes working is handled.
self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
class TestHTMLParserSubclass(SoupTest):
def test_error(self):
"""Verify that our HTMLParser subclass implements error() in a way
that doesn't cause a crash.
"""
parser = BeautifulSoupHTMLParser()
parser.error("don't crash")

@ -7,7 +7,7 @@ try:
import lxml.etree
LXML_PRESENT = True
LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError as e:
except ImportError, e:
LXML_PRESENT = False
LXML_VERSION = (0,)
@ -36,7 +36,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return LXMLTreeBuilder
return LXMLTreeBuilder()
def test_out_of_range_entity(self):
self.assertSoupEquals(
@ -46,12 +46,6 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
def test_entities_in_foreign_document_encoding(self):
# We can't implement this case correctly because by the time we
# hear about markup like "&#147;", it's been (incorrectly) converted into
# a string like u'\x93'
pass
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
# test if an old version of lxml is installed.
@ -68,7 +62,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
# if one is installed.
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("<b />")
self.assertEqual("<b/>", str(soup.b))
self.assertEqual(u"<b/>", unicode(soup.b))
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
@skipIf(
@ -79,22 +73,4 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return LXMLTreeBuilderForXML
def test_namespace_indexing(self):
# We should not track un-prefixed namespaces as we can only hold one
# and it will be recognized as the default namespace by soupsieve,
# which may be confusing in some situations. When no namespace is provided
# for a selector, the default namespace (if defined) is assumed.
soup = self.soup(
'<?xml version="1.1"?>\n'
'<root>'
'<tag xmlns="http://unprefixed-namespace.com">content</tag>'
'<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
'</root>'
)
self.assertEqual(
soup._namespaces,
{'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
)
return LXMLTreeBuilderForXML()

@ -24,7 +24,6 @@ from bs4.dammit import (
EncodingDetector,
)
from bs4.testing import (
default_builder,
SoupTest,
skipIf,
)
@ -33,7 +32,7 @@ import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError as e:
except ImportError, e:
LXML_PRESENT = False
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
@ -41,86 +40,21 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestConstructor(SoupTest):
def test_short_unicode_input(self):
data = "<h1>éé</h1>"
data = u"<h1>éé</h1>"
soup = self.soup(data)
self.assertEqual("éé", soup.h1.string)
self.assertEqual(u"éé", soup.h1.string)
def test_embedded_null(self):
data = "<h1>foo\0bar</h1>"
data = u"<h1>foo\0bar</h1>"
soup = self.soup(data)
self.assertEqual("foo\0bar", soup.h1.string)
self.assertEqual(u"foo\0bar", soup.h1.string)
def test_exclude_encodings(self):
utf8_data = "Räksmörgås".encode("utf-8")
utf8_data = u"Räksmörgås".encode("utf-8")
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
self.assertEqual("windows-1252", soup.original_encoding)
def test_custom_builder_class(self):
# Verify that you can pass in a custom Builder class and
# it'll be instantiated with the appropriate keyword arguments.
class Mock(object):
def __init__(self, **kwargs):
self.called_with = kwargs
self.is_xml = True
def initialize_soup(self, soup):
pass
def prepare_markup(self, *args, **kwargs):
return ''
kwargs = dict(
var="value",
# This is a deprecated BS3-era keyword argument, which
# will be stripped out.
convertEntities=True,
)
with warnings.catch_warnings(record=True):
soup = BeautifulSoup('', builder=Mock, **kwargs)
assert isinstance(soup.builder, Mock)
self.assertEqual(dict(var="value"), soup.builder.called_with)
# You can also instantiate the TreeBuilder yourself. In this
# case, that specific object is used and any keyword arguments
# to the BeautifulSoup constructor are ignored.
builder = Mock(**kwargs)
with warnings.catch_warnings(record=True) as w:
soup = BeautifulSoup(
'', builder=builder, ignored_value=True,
)
msg = str(w[0].message)
assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")
self.assertEqual(builder, soup.builder)
self.assertEqual(kwargs, builder.called_with)
def test_cdata_list_attributes(self):
# Most attribute values are represented as scalars, but the
# HTML standard says that some attributes, like 'class' have
# space-separated lists as values.
markup = '<a id=" an id " class=" a class "></a>'
soup = self.soup(markup)
# Note that the spaces are stripped for 'class' but not for 'id'.
a = soup.a
self.assertEqual(" an id ", a['id'])
self.assertEqual(["a", "class"], a['class'])
# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
# you customize or disable this. As always, you can customize the TreeBuilder
# by passing in a keyword argument to the BeautifulSoup constructor.
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
self.assertEqual(" a class ", soup.a['class'])
# Here are two ways of saying that `id` is a multi-valued
# attribute in this context, but 'class' is not.
for switcheroo in ({'*': 'id'}, {'a': 'id'}):
with warnings.catch_warnings(record=True) as w:
# This will create a warning about not explicitly
# specifying a parser, but we'll ignore it.
soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
a = soup.a
self.assertEqual(["an", "id"], a['id'])
self.assertEqual(" a class ", a['class'])
class TestWarnings(SoupTest):
def _no_parser_specified(self, s, is_there=True):
@ -195,7 +129,7 @@ class TestWarnings(SoupTest):
with warnings.catch_warnings(record=True) as warning_list:
# note - this url must differ from the bytes one otherwise
# python's warnings system swallows the second warning
soup = self.soup("http://www.crummyunicode.com/")
soup = self.soup(u"http://www.crummyunicode.com/")
self.assertTrue(any("looks like a URL" in str(w.message)
for w in warning_list))
@ -207,7 +141,7 @@ class TestWarnings(SoupTest):
def test_url_warning_with_unicode_and_space(self):
with warnings.catch_warnings(record=True) as warning_list:
soup = self.soup("http://www.crummyuncode.com/ is great")
soup = self.soup(u"http://www.crummyuncode.com/ is great")
self.assertFalse(any("looks like a URL" in str(w.message)
for w in warning_list))
@ -229,9 +163,9 @@ class TestEntitySubstitution(unittest.TestCase):
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
s = "foo\u2200\N{SNOWMAN}\u00f5bar"
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
"foo&forall;\N{SNOWMAN}&otilde;bar")
u"foo&forall;\N{SNOWMAN}&otilde;bar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
@ -283,7 +217,7 @@ class TestEntitySubstitution(unittest.TestCase):
self.assertEqual(
self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
"&Aacute;T&amp;T")
def test_quotes_not_html_substituted(self):
"""There's no need to do this except inside attribute values."""
text = 'Bob\'s "bar"'
@ -296,7 +230,7 @@ class TestEncodingConversion(SoupTest):
def setUp(self):
super(TestEncodingConversion, self).setUp()
self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
@ -316,7 +250,7 @@ class TestEncodingConversion(SoupTest):
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
self.assertTrue(isinstance(unicode_output, str))
self.assertTrue(isinstance(unicode_output, unicode))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
@ -328,7 +262,7 @@ class TestEncodingConversion(SoupTest):
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
@ -336,7 +270,7 @@ class TestEncodingConversion(SoupTest):
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
@ -347,14 +281,14 @@ class TestEncodingConversion(SoupTest):
PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of UnicodeDammit."""
def test_unicode_input(self):
markup = "I'm already Unicode! \N{SNOWMAN}"
markup = u"I'm already Unicode! \N{SNOWMAN}"
dammit = UnicodeDammit(markup)
self.assertEqual(dammit.unicode_markup, markup)
@ -362,7 +296,7 @@ class TestUnicodeDammit(unittest.TestCase):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEqual(
dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>")
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
@ -386,14 +320,14 @@ class TestUnicodeDammit(unittest.TestCase):
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}')
self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
def test_convert_hebrew(self):
hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9')
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
@ -402,19 +336,19 @@ class TestUnicodeDammit(unittest.TestCase):
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = "Räksmörgås".encode("utf-8")
utf8_data = u"Räksmörgås".encode("utf-8")
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_ignore_invalid_codecs(self):
utf8_data = "Räksmörgås".encode("utf-8")
utf8_data = u"Räksmörgås".encode("utf-8")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_exclude_encodings(self):
# This is UTF-8.
utf8_data = "Räksmörgås".encode("utf-8")
utf8_data = u"Räksmörgås".encode("utf-8")
# But if we exclude UTF-8 from consideration, the guess is
# Windows-1252.
@ -430,7 +364,7 @@ class TestUnicodeDammit(unittest.TestCase):
detected = EncodingDetector(
b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
encodings = list(detected.encodings)
assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings
assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
def test_detect_html5_style_meta_tag(self):
@ -470,7 +404,7 @@ class TestUnicodeDammit(unittest.TestCase):
bs4.dammit.chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue("\ufffd" in dammit.unicode_markup)
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
soup = BeautifulSoup(doc, "html.parser")
self.assertTrue(soup.contains_replacement_characters)
@ -482,17 +416,17 @@ class TestUnicodeDammit(unittest.TestCase):
# A document written in UTF-16LE will have its byte order marker stripped.
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
dammit = UnicodeDammit(data)
self.assertEqual("<a>áé</a>", dammit.unicode_markup)
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
self.assertEqual("utf-16le", dammit.original_encoding)
def test_detwingle(self):
# Here's a UTF8 document.
utf8 = ("\N{SNOWMAN}" * 3).encode("utf8")
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
# Here's a Windows-1252 document.
windows_1252 = (
"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
# Through some unholy alchemy, they've been stuck together.
doc = utf8 + windows_1252 + utf8
@ -507,7 +441,7 @@ class TestUnicodeDammit(unittest.TestCase):
fixed = UnicodeDammit.detwingle(doc)
self.assertEqual(
"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
def test_detwingle_ignores_multibyte_characters(self):
# Each of these characters has a UTF-8 representation ending
@ -515,9 +449,9 @@ class TestUnicodeDammit(unittest.TestCase):
# Windows-1252. But our code knows to skip over multibyte
# UTF-8 characters, so they'll survive the process unscathed.
for tricky_unicode_char in (
"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
):
input = tricky_unicode_char.encode("utf8")
self.assertTrue(input.endswith(b'\x93'))

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""Tests for Beautiful Soup's tree traversal methods.
@ -25,7 +26,6 @@ from bs4.element import (
Comment,
Declaration,
Doctype,
Formatter,
NavigableString,
SoupStrainer,
Tag,
@ -71,13 +71,13 @@ class TestFind(TreeTest):
self.assertEqual(soup.find("b").string, "2")
def test_unicode_text_find(self):
soup = self.soup('<h1>Räksmörgås</h1>')
self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
soup = self.soup(u'<h1>Räksmörgås</h1>')
self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
def test_unicode_attribute_find(self):
soup = self.soup('<h1 id="Räksmörgås">here it is</h1>')
soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
str(soup)
self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
def test_find_everything(self):
@ -97,17 +97,17 @@ class TestFindAll(TreeTest):
"""You can search the tree for text nodes."""
soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
# Exact match.
self.assertEqual(soup.find_all(string="bar"), ["bar"])
self.assertEqual(soup.find_all(text="bar"), ["bar"])
self.assertEqual(soup.find_all(string="bar"), [u"bar"])
self.assertEqual(soup.find_all(text="bar"), [u"bar"])
# Match any of a number of strings.
self.assertEqual(
soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
# Match a regular expression.
self.assertEqual(soup.find_all(text=re.compile('.*')),
["Foo", "bar", '\xbb'])
[u"Foo", u"bar", u'\xbb'])
# Match anything.
self.assertEqual(soup.find_all(text=True),
["Foo", "bar", '\xbb'])
[u"Foo", u"bar", u'\xbb'])
def test_find_all_limit(self):
"""You can limit the number of items returned by find_all."""
@ -250,8 +250,8 @@ class TestFindAllByAttribute(TreeTest):
["Matching a.", "Matching b."])
def test_find_all_by_utf8_attribute_value(self):
peace = "םולש".encode("utf8")
data = '<a title="םולש"></a>'.encode("utf8")
peace = u"םולש".encode("utf8")
data = u'<a title="םולש"></a>'.encode("utf8")
soup = self.soup(data)
self.assertEqual([soup.a], soup.find_all(title=peace))
self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
@ -417,48 +417,6 @@ class TestFindAllByAttribute(TreeTest):
self.assertEqual([], soup.find_all(id=1, text="bar"))
class TestSmooth(TreeTest):
"""Test Tag.smooth."""
def test_smooth(self):
soup = self.soup("<div>a</div>")
div = soup.div
div.append("b")
div.append("c")
div.append(Comment("Comment 1"))
div.append(Comment("Comment 2"))
div.append("d")
builder = self.default_builder()
span = Tag(soup, builder, 'span')
span.append('1')
span.append('2')
div.append(span)
# At this point the tree has a bunch of adjacent
# NavigableStrings. This is normal, but it has no meaning in
# terms of HTML, so we may want to smooth things out for
# output.
# Since the <span> tag has two children, its .string is None.
self.assertEqual(None, div.span.string)
self.assertEqual(7, len(div.contents))
div.smooth()
self.assertEqual(5, len(div.contents))
# The three strings at the beginning of div.contents have been
# merged into on string.
#
self.assertEqual('abc', div.contents[0])
# The call is recursive -- the <span> tag was also smoothed.
self.assertEqual('12', div.span.string)
# The two comments have _not_ been merged, even though
# comments are strings. Merging comments would change the
# meaning of the HTML.
self.assertEqual('Comment 1', div.contents[1])
self.assertEqual('Comment 2', div.contents[2])
class TestIndex(TreeTest):
@ -647,7 +605,7 @@ class SiblingTest(TreeTest):
</html>'''
# All that whitespace looks good but makes the tests more
# difficult. Get rid of it.
markup = re.compile(r"\n\s*").sub("", markup)
markup = re.compile("\n\s*").sub("", markup)
self.tree = self.soup(markup)
@ -745,12 +703,12 @@ class TestTagCreation(SoupTest):
"""Test the ability to create new tags."""
def test_new_tag(self):
soup = self.soup("")
new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"})
new_tag = soup.new_tag("foo", bar="baz")
self.assertTrue(isinstance(new_tag, Tag))
self.assertEqual("foo", new_tag.name)
self.assertEqual(dict(bar="baz", name="a name"), new_tag.attrs)
self.assertEqual(dict(bar="baz"), new_tag.attrs)
self.assertEqual(None, new_tag.parent)
def test_tag_inherits_self_closing_rules_from_builder(self):
if XML_BUILDER_PRESENT:
xml_soup = BeautifulSoup("", "lxml-xml")
@ -863,26 +821,6 @@ class TestTreeModification(SoupTest):
soup = self.soup(text)
self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
def test_insert_beautifulsoup_object_inserts_children(self):
"""Inserting one BeautifulSoup object into another actually inserts all
of its children -- you'll never combine BeautifulSoup objects.
"""
soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
text = "<p>p2</p><p>p3</p>"
to_insert = self.soup(text)
soup.insert(1, to_insert)
for i in soup.descendants:
assert not isinstance(i, BeautifulSoup)
p1, p2, p3, p4 = list(soup.children)
self.assertEqual("And now, a word:", p1.string)
self.assertEqual("p2", p2.string)
self.assertEqual("p3", p3.string)
self.assertEqual("And we're back.", p4.string)
def test_replace_with_maintains_next_element_throughout(self):
soup = self.soup('<p><a>one</a><b>three</b></p>')
a = soup.a
@ -939,7 +877,7 @@ class TestTreeModification(SoupTest):
self.assertEqual(soup.a.contents[0].next_element, "bar")
def test_insert_tag(self):
builder = self.default_builder()
builder = self.default_builder
soup = self.soup(
"<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
magic_tag = Tag(soup, builder, 'magictag')
@ -974,13 +912,6 @@ class TestTreeModification(SoupTest):
soup.a.append(soup.b)
self.assertEqual(data, soup.decode())
def test_extend(self):
data = "<a><b><c><d><e><f><g></g></f></e></d></c></b></a>"
soup = self.soup(data)
l = [soup.g, soup.f, soup.e, soup.d, soup.c, soup.b]
soup.a.extend(l)
self.assertEqual("<a><g></g><f></f><e></e><d></d><c></c><b></b></a>", soup.decode())
def test_move_tag_to_beginning_of_parent(self):
data = "<a><b></b><c></c><d></d></a>"
soup = self.soup(data)
@ -1007,29 +938,6 @@ class TestTreeModification(SoupTest):
self.assertEqual(
soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
# Can't insert an element before itself.
b = soup.b
self.assertRaises(ValueError, b.insert_before, b)
# Can't insert before if an element has no parent.
b.extract()
self.assertRaises(ValueError, b.insert_before, "nope")
# Can insert an identical element
soup = self.soup("<a>")
soup.a.insert_before(soup.new_tag("a"))
def test_insert_multiple_before(self):
soup = self.soup("<a>foo</a><b>bar</b>")
soup.b.insert_before("BAZ", " ", "QUUX")
soup.a.insert_before("QUUX", " ", "BAZ")
self.assertEqual(
soup.decode(), self.document_for("QUUX BAZ<a>foo</a>BAZ QUUX<b>bar</b>"))
soup.a.insert_before(soup.b, "FOO")
self.assertEqual(
soup.decode(), self.document_for("QUUX BAZ<b>bar</b>FOO<a>foo</a>BAZ QUUX"))
def test_insert_after(self):
soup = self.soup("<a>foo</a><b>bar</b>")
soup.b.insert_after("BAZ")
@ -1040,28 +948,6 @@ class TestTreeModification(SoupTest):
self.assertEqual(
soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
# Can't insert an element after itself.
b = soup.b
self.assertRaises(ValueError, b.insert_after, b)
# Can't insert after if an element has no parent.
b.extract()
self.assertRaises(ValueError, b.insert_after, "nope")
# Can insert an identical element
soup = self.soup("<a>")
soup.a.insert_before(soup.new_tag("a"))
def test_insert_multiple_after(self):
soup = self.soup("<a>foo</a><b>bar</b>")
soup.b.insert_after("BAZ", " ", "QUUX")
soup.a.insert_after("QUUX", " ", "BAZ")
self.assertEqual(
soup.decode(), self.document_for("<a>foo</a>QUUX BAZ<b>bar</b>BAZ QUUX"))
soup.b.insert_after(soup.a, "FOO ")
self.assertEqual(
soup.decode(), self.document_for("QUUX BAZ<b>bar</b><a>foo</a>FOO BAZ QUUX"))
def test_insert_after_raises_exception_if_after_has_no_meaning(self):
soup = self.soup("")
tag = soup.new_tag("a")
@ -1225,7 +1111,7 @@ class TestTreeModification(SoupTest):
<script>baz</script>
</html>""")
[soup.script.extract() for i in soup.find_all("script")]
self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body))
self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
@ -1300,7 +1186,7 @@ class TestElementObjects(SoupTest):
tag = soup.bTag
self.assertEqual(soup.b, tag)
self.assertEqual(
'.bTag is deprecated, use .find("b") instead. If you really were looking for a tag called bTag, use .find("bTag")',
'.bTag is deprecated, use .find("b") instead.',
str(w[0].message))
def test_has_attr(self):
@ -1463,19 +1349,19 @@ class TestPersistence(SoupTest):
soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
encoding = soup.original_encoding
copy = soup.__copy__()
self.assertEqual("<p> </p>", str(copy))
self.assertEqual(u"<p> </p>", unicode(copy))
self.assertEqual(encoding, copy.original_encoding)
def test_unicode_pickle(self):
# A tree containing Unicode characters can be pickled.
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
loaded = pickle.loads(dumped)
self.assertEqual(loaded.decode(), soup.decode())
def test_copy_navigablestring_is_not_attached_to_tree(self):
html = "<b>Foo<a></a></b><b>Bar</b>"
html = u"<b>Foo<a></a></b><b>Bar</b>"
soup = self.soup(html)
s1 = soup.find(string="Foo")
s2 = copy.copy(s1)
@ -1487,7 +1373,7 @@ class TestPersistence(SoupTest):
self.assertEqual(None, s2.previous_element)
def test_copy_navigablestring_subclass_has_same_type(self):
html = "<b><!--Foo--></b>"
html = u"<b><!--Foo--></b>"
soup = self.soup(html)
s1 = soup.string
s2 = copy.copy(s1)
@ -1495,19 +1381,19 @@ class TestPersistence(SoupTest):
self.assertTrue(isinstance(s2, Comment))
def test_copy_entire_soup(self):
html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
soup = self.soup(html)
soup_copy = copy.copy(soup)
self.assertEqual(soup, soup_copy)
def test_copy_tag_copies_contents(self):
html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
soup = self.soup(html)
div = soup.div
div_copy = copy.copy(div)
# The two tags look the same, and evaluate to equal.
self.assertEqual(str(div), str(div_copy))
self.assertEqual(unicode(div), unicode(div_copy))
self.assertEqual(div, div_copy)
# But they're not the same object.
@ -1523,75 +1409,67 @@ class TestPersistence(SoupTest):
class TestSubstitutions(SoupTest):
def test_default_formatter_is_minimal(self):
markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter="minimal")
# The < is converted back into &lt; but the e-with-acute is left alone.
self.assertEqual(
decoded,
self.document_for(
"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
def test_formatter_html(self):
markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter="html")
self.assertEqual(
decoded,
self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
def test_formatter_html5(self):
markup = "<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter="html5")
self.assertEqual(
decoded,
self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
def test_formatter_minimal(self):
markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter="minimal")
# The < is converted back into &lt; but the e-with-acute is left alone.
self.assertEqual(
decoded,
self.document_for(
"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
def test_formatter_null(self):
markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter=None)
# Neither the angle brackets nor the e-with-acute are converted.
# This is not valid HTML, but it's what the user wanted.
self.assertEqual(decoded,
self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
def test_formatter_custom(self):
markup = "<b>&lt;foo&gt;</b><b>bar</b><br/>"
markup = u"<b>&lt;foo&gt;</b><b>bar</b>"
soup = self.soup(markup)
decoded = soup.decode(formatter = lambda x: x.upper())
# Instead of normal entity conversion code, the custom
# callable is called on every string.
self.assertEqual(
decoded,
self.document_for("<b><FOO></b><b>BAR</b><br/>"))
self.document_for(u"<b><FOO></b><b>BAR</b>"))
def test_formatter_is_run_on_attribute_values(self):
markup = '<a href="http://a.com?a=b&c=é">e</a>'
markup = u'<a href="http://a.com?a=b&c=é">e</a>'
soup = self.soup(markup)
a = soup.a
expect_minimal = '<a href="http://a.com?a=b&amp;c=é">e</a>'
expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
self.assertEqual(expect_minimal, a.decode())
self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
expect_html = '<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
self.assertEqual(expect_html, a.decode(formatter="html"))
self.assertEqual(markup, a.decode(formatter=None))
expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>'
expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
def test_formatter_skips_script_tag_for_html_documents(self):
@ -1613,28 +1491,28 @@ class TestSubstitutions(SoupTest):
self.assertTrue(b"< < hey > >" in encoded)
def test_prettify_leaves_preformatted_text_alone(self):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
self.assertEqual(
'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>',
u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
soup.div.prettify())
def test_prettify_accepts_formatter_function(self):
def test_prettify_accepts_formatter(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
pretty = soup.prettify(formatter = lambda x: x.upper())
self.assertTrue("FOO" in pretty)
def test_prettify_outputs_unicode_by_default(self):
soup = self.soup("<a></a>")
self.assertEqual(str, type(soup.prettify()))
self.assertEqual(unicode, type(soup.prettify()))
def test_prettify_can_encode_data(self):
soup = self.soup("<a></a>")
self.assertEqual(bytes, type(soup.prettify("utf-8")))
def test_html_entity_substitution_off_by_default(self):
markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
soup = self.soup(markup)
encoded = soup.b.encode("utf-8")
self.assertEqual(encoded, markup.encode('utf-8'))
@ -1678,77 +1556,54 @@ class TestEncoding(SoupTest):
"""Test the ability to encode objects into strings."""
def test_unicode_string_can_be_encoded(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual(soup.b.string.encode("utf-8"),
"\N{SNOWMAN}".encode("utf-8"))
u"\N{SNOWMAN}".encode("utf-8"))
def test_tag_containing_unicode_string_can_be_encoded(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual(
soup.b.encode("utf-8"), html.encode("utf-8"))
def test_encoding_substitutes_unrecognized_characters_by_default(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
def test_encoding_can_be_made_strict(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertRaises(
UnicodeEncodeError, soup.encode, "ascii", errors="strict")
def test_decode_contents(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
def test_encode_contents(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual(
"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
encoding="utf8"))
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
self.assertEqual(
"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
def test_repr(self):
html = "<b>\N{SNOWMAN}</b>"
html = u"<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
if PY3K:
self.assertEqual(html, repr(soup))
else:
self.assertEqual(b'<b>\\u2603</b>', repr(soup))
class TestFormatter(SoupTest):
def test_sort_attributes(self):
# Test the ability to override Formatter.attributes() to,
# e.g., disable the normal sorting of attributes.
class UnsortedFormatter(Formatter):
def attributes(self, tag):
self.called_with = tag
for k, v in sorted(tag.attrs.items()):
if k == 'ignore':
continue
yield k,v
soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>')
formatter = UnsortedFormatter()
decoded = soup.decode(formatter=formatter)
# attributes() was called on the <p> tag. It filtered out one
# attribute and sorted the other two.
self.assertEqual(formatter.called_with, soup.p)
self.assertEqual('<p aval="2" cval="1"></p>', decoded)
class TestNavigableStringSubclasses(SoupTest):
def test_cdata(self):
@ -1865,7 +1720,7 @@ class TestSoupSelector(TreeTest):
els = self.soup.select('title')
self.assertEqual(len(els), 1)
self.assertEqual(els[0].name, 'title')
self.assertEqual(els[0].contents, ['The title'])
self.assertEqual(els[0].contents, [u'The title'])
def test_one_tag_many(self):
els = self.soup.select('div')
@ -1900,7 +1755,7 @@ class TestSoupSelector(TreeTest):
self.assertEqual(len(self.soup.select('del')), 0)
def test_invalid_tag(self):
self.assertRaises(SyntaxError, self.soup.select, 'tag%t')
self.assertRaises(ValueError, self.soup.select, 'tag%t')
def test_select_dashed_tag_ids(self):
self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
@ -1911,7 +1766,7 @@ class TestSoupSelector(TreeTest):
self.assertEqual(dashed[0]['id'], 'dash2')
def test_dashed_tag_text(self):
self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
def test_select_dashed_matches_find_all(self):
self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
@ -2091,31 +1946,32 @@ class TestSoupSelector(TreeTest):
NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
self.assertRaises(
SyntaxError, self.soup.select, "a:nth-of-type(a)")
NotImplementedError, self.soup.select, "a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
self.assertEqual(len(els), 1)
self.assertEqual(els[0].string, 'Some text')
self.assertEqual(els[0].string, u'Some text')
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
self.assertEqual(len(els), 1)
self.assertEqual(els[0].string, 'Another')
self.assertEqual(els[0].string, u'Another')
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
self.assertEqual(len(els), 0)
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
self.assertEqual(len(els), 0)
# Pass in an invalid value.
self.assertRaises(
ValueError, self.soup.select, 'div p:nth-of-type(0)')
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
self.assertEqual(len(els), 1)
self.assertEqual(els[0].string, 'Some text')
self.assertEqual(els[0].string, u'Some text')
def test_id_child_selector_nth_of_type(self):
self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
@ -2147,7 +2003,7 @@ class TestSoupSelector(TreeTest):
self.assertEqual([], self.soup.select('#inner ~ h2'))
def test_dangling_combinator(self):
self.assertRaises(SyntaxError, self.soup.select, 'h1 >')
self.assertRaises(ValueError, self.soup.select, 'h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
@ -2178,8 +2034,8 @@ class TestSoupSelector(TreeTest):
self.assertSelects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
self.assertRaises(SyntaxError, self.soup.select, ',x, y')
self.assertRaises(SyntaxError, self.soup.select, 'x,,y')
self.assertRaises(ValueError, self.soup.select, ',x, y')
self.assertRaises(ValueError, self.soup.select, 'x,,y')
def test_multiple_select_attrs(self):
self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
@ -2190,16 +2046,5 @@ class TestSoupSelector(TreeTest):
def test_multiple_select_nested(self):
self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
self.assertEqual(3, len(selected))
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected

@ -1,32 +0,0 @@
Copyright (c) 2013, Ethan Furman.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
Neither the name Ethan Furman nor the names of any
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

@ -1,3 +0,0 @@
enum34 is the new Python stdlib enum module available in Python 3.4
backported for previous versions of Python from 2.4 to 3.3.
tested on 2.6, 2.7, and 3.3+

@ -1,837 +0,0 @@
"""Python Enumerations"""
import sys as _sys
__all__ = ['Enum', 'IntEnum', 'unique']
version = 1, 1, 6
pyver = float('%s.%s' % _sys.version_info[:2])
try:
any
except NameError:
def any(iterable):
for element in iterable:
if element:
return True
return False
try:
from collections import OrderedDict
except ImportError:
OrderedDict = None
try:
basestring
except NameError:
# In Python 2 basestring is the ancestor of both str and unicode
# in Python 3 it's just str, but was missing in 3.1
basestring = str
try:
unicode
except NameError:
# In Python 3 unicode no longer exists (it's just str)
unicode = str
class _RouteClassAttributeToGetattr(object):
"""Route attribute access on a class to __getattr__.
This is a descriptor, used to define attributes that act differently when
accessed through an instance and through a class. Instance access remains
normal, but access to an attribute through a class will be routed to the
class's __getattr__ method; this is done by raising AttributeError.
"""
def __init__(self, fget=None):
self.fget = fget
def __get__(self, instance, ownerclass=None):
if instance is None:
raise AttributeError()
return self.fget(instance)
def __set__(self, instance, value):
raise AttributeError("can't set attribute")
def __delete__(self, instance):
raise AttributeError("can't delete attribute")
def _is_descriptor(obj):
"""Returns True if obj is a descriptor, False otherwise."""
return (
hasattr(obj, '__get__') or
hasattr(obj, '__set__') or
hasattr(obj, '__delete__'))
def _is_dunder(name):
"""Returns True if a __dunder__ name, False otherwise."""
return (name[:2] == name[-2:] == '__' and
name[2:3] != '_' and
name[-3:-2] != '_' and
len(name) > 4)
def _is_sunder(name):
"""Returns True if a _sunder_ name, False otherwise."""
return (name[0] == name[-1] == '_' and
name[1:2] != '_' and
name[-2:-1] != '_' and
len(name) > 2)
def _make_class_unpicklable(cls):
"""Make the given class un-picklable."""
def _break_on_call_reduce(self, protocol=None):
raise TypeError('%r cannot be pickled' % self)
cls.__reduce_ex__ = _break_on_call_reduce
cls.__module__ = '<unknown>'
class _EnumDict(dict):
"""Track enum member order and ensure member names are not reused.
EnumMeta will use the names found in self._member_names as the
enumeration member names.
"""
def __init__(self):
super(_EnumDict, self).__init__()
self._member_names = []
def __setitem__(self, key, value):
"""Changes anything not dundered or not a descriptor.
If a descriptor is added with the same name as an enum member, the name
is removed from _member_names (this may leave a hole in the numerical
sequence of values).
If an enum member name is used twice, an error is raised; duplicate
values are not checked for.
Single underscore (sunder) names are reserved.
Note: in 3.x __order__ is simply discarded as a not necessary piece
leftover from 2.x
"""
if pyver >= 3.0 and key in ('_order_', '__order__'):
return
elif key == '__order__':
key = '_order_'
if _is_sunder(key):
if key != '_order_':
raise ValueError('_names_ are reserved for future Enum use')
elif _is_dunder(key):
pass
elif key in self._member_names:
# descriptor overwriting an enum?
raise TypeError('Attempted to reuse key: %r' % key)
elif not _is_descriptor(value):
if key in self:
# enum overwriting a descriptor?
raise TypeError('Key already defined as: %r' % self[key])
self._member_names.append(key)
super(_EnumDict, self).__setitem__(key, value)
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
# EnumMeta finishes running the first time the Enum class doesn't exist. This
# is also why there are checks in EnumMeta like `if Enum is not None`
Enum = None
class EnumMeta(type):
"""Metaclass for Enum"""
@classmethod
def __prepare__(metacls, cls, bases):
return _EnumDict()
def __new__(metacls, cls, bases, classdict):
# an Enum class is final once enumeration items have been defined; it
# cannot be mixed with other types (int, float, etc.) if it has an
# inherited __new__ unless a new __new__ is defined (or the resulting
# class will fail).
if type(classdict) is dict:
original_dict = classdict
classdict = _EnumDict()
for k, v in original_dict.items():
classdict[k] = v
member_type, first_enum = metacls._get_mixins_(bases)
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
first_enum)
# save enum items into separate mapping so they don't get baked into
# the new class
members = dict((k, classdict[k]) for k in classdict._member_names)
for name in classdict._member_names:
del classdict[name]
# py2 support for definition order
_order_ = classdict.get('_order_')
if _order_ is None:
if pyver < 3.0:
try:
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
except TypeError:
_order_ = [name for name in sorted(members.keys())]
else:
_order_ = classdict._member_names
else:
del classdict['_order_']
if pyver < 3.0:
_order_ = _order_.replace(',', ' ').split()
aliases = [name for name in members if name not in _order_]
_order_ += aliases
# check for illegal enum names (any others?)
invalid_names = set(members) & set(['mro'])
if invalid_names:
raise ValueError('Invalid enum member name(s): %s' % (
', '.join(invalid_names), ))
# save attributes from super classes so we know if we can take
# the shortcut of storing members in the class dict
base_attributes = set([a for b in bases for a in b.__dict__])
# create our new Enum type
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
enum_class._member_names_ = [] # names in random order
if OrderedDict is not None:
enum_class._member_map_ = OrderedDict()
else:
enum_class._member_map_ = {} # name->value map
enum_class._member_type_ = member_type
# Reverse value->name map for hashable values.
enum_class._value2member_map_ = {}
# instantiate them, checking for duplicates as we go
# we instantiate first instead of checking for duplicates first in case
# a custom __new__ is doing something funky with the values -- such as
# auto-numbering ;)
if __new__ is None:
__new__ = enum_class.__new__
for member_name in _order_:
value = members[member_name]
if not isinstance(value, tuple):
args = (value, )
else:
args = value
if member_type is tuple: # special case for tuple enums
args = (args, ) # wrap it one more time
if not use_args or not args:
enum_member = __new__(enum_class)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = value
else:
enum_member = __new__(enum_class, *args)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = member_type(*args)
value = enum_member._value_
enum_member._name_ = member_name
enum_member.__objclass__ = enum_class
enum_member.__init__(*args)
# If another member with the same value was already defined, the
# new member becomes an alias to the existing one.
for name, canonical_member in enum_class._member_map_.items():
if canonical_member.value == enum_member._value_:
enum_member = canonical_member
break
else:
# Aliases don't appear in member names (only in __members__).
enum_class._member_names_.append(member_name)
# performance boost for any member that would not shadow
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
if member_name not in base_attributes:
setattr(enum_class, member_name, enum_member)
# now add to _member_map_
enum_class._member_map_[member_name] = enum_member
try:
# This may fail if value is not hashable. We can't add the value
# to the map, and by-value lookups for this value will be
# linear.
enum_class._value2member_map_[value] = enum_member
except TypeError:
pass
# If a custom type is mixed into the Enum, and it does not know how
# to pickle itself, pickle.dumps will succeed but pickle.loads will
# fail. Rather than have the error show up later and possibly far
# from the source, sabotage the pickle protocol for this class so
# that pickle.dumps also fails.
#
# However, if the new class implements its own __reduce_ex__, do not
# sabotage -- it's on them to make sure it works correctly. We use
# __reduce_ex__ instead of any of the others as it is preferred by
# pickle over __reduce__, and it handles all pickle protocols.
unpicklable = False
if '__reduce_ex__' not in classdict:
if member_type is not object:
methods = ('__getnewargs_ex__', '__getnewargs__',
'__reduce_ex__', '__reduce__')
if not any(m in member_type.__dict__ for m in methods):
_make_class_unpicklable(enum_class)
unpicklable = True
# double check that repr and friends are not the mixin's or various
# things break (such as pickle)
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
class_method = getattr(enum_class, name)
obj_method = getattr(member_type, name, None)
enum_method = getattr(first_enum, name, None)
if name not in classdict and class_method is not enum_method:
if name == '__reduce_ex__' and unpicklable:
continue
setattr(enum_class, name, enum_method)
# method resolution and int's are not playing nice
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
if issubclass(enum_class, int):
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
elif pyver < 3.0:
if issubclass(enum_class, int):
for method in (
'__le__',
'__lt__',
'__gt__',
'__ge__',
'__eq__',
'__ne__',
'__hash__',
):
setattr(enum_class, method, getattr(int, method))
# replace any other __new__ with our own (as long as Enum is not None,
# anyway) -- again, this is to support pickle
if Enum is not None:
# if the user defined their own __new__, save it before it gets
# clobbered in case they subclass later
if save_new:
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
return enum_class
def __bool__(cls):
"""
classes/types should always be True.
"""
return True
def __call__(cls, value, names=None, module=None, type=None, start=1):
"""Either returns an existing member, or creates a new enum class.
This method is used both when an enum class is given a value to match
to an enumeration member (i.e. Color(3)) and for the functional API
(i.e. Color = Enum('Color', names='red green blue')).
When used for the functional API: `module`, if set, will be stored in
the new class' __module__ attribute; `type`, if set, will be mixed in
as the first base class.
Note: if `module` is not set this routine will attempt to discover the
calling module by walking the frame stack; if this is unsuccessful
the resulting class will not be pickleable.
"""
if names is None: # simple value lookup
return cls.__new__(cls, value)
# otherwise, functional API: we're creating a new Enum type
return cls._create_(value, names, module=module, type=type, start=start)
def __contains__(cls, member):
return isinstance(member, cls) and member.name in cls._member_map_
def __delattr__(cls, attr):
# nicer error message when someone tries to delete an attribute
# (see issue19025).
if attr in cls._member_map_:
raise AttributeError(
"%s: cannot delete Enum member." % cls.__name__)
super(EnumMeta, cls).__delattr__(attr)
def __dir__(self):
return (['__class__', '__doc__', '__members__', '__module__'] +
self._member_names_)
@property
def __members__(cls):
"""Returns a mapping of member name->value.
This mapping lists all enum members, including aliases. Note that this
is a copy of the internal mapping.
"""
return cls._member_map_.copy()
def __getattr__(cls, name):
"""Return the enum member matching `name`
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
"""
if _is_dunder(name):
raise AttributeError(name)
try:
return cls._member_map_[name]
except KeyError:
raise AttributeError(name)
def __getitem__(cls, name):
return cls._member_map_[name]
def __iter__(cls):
return (cls._member_map_[name] for name in cls._member_names_)
def __reversed__(cls):
return (cls._member_map_[name] for name in reversed(cls._member_names_))
def __len__(cls):
return len(cls._member_names_)
__nonzero__ = __bool__
def __repr__(cls):
return "<enum %r>" % cls.__name__
def __setattr__(cls, name, value):
"""Block attempts to reassign Enum members.
A simple assignment to the class namespace only changes one of the
several possible ways to get an Enum member from the Enum class,
resulting in an inconsistent Enumeration.
"""
member_map = cls.__dict__.get('_member_map_', {})
if name in member_map:
raise AttributeError('Cannot reassign members.')
super(EnumMeta, cls).__setattr__(name, value)
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
"""Convenience method to create a new Enum class.
`names` can be:
* A string containing member names, separated either with spaces or
commas. Values are auto-numbered from 1.
* An iterable of member names. Values are auto-numbered from 1.
* An iterable of (member name, value) pairs.
* A mapping of member name -> value.
"""
if pyver < 3.0:
# if class_name is unicode, attempt a conversion to ASCII
if isinstance(class_name, unicode):
try:
class_name = class_name.encode('ascii')
except UnicodeEncodeError:
raise TypeError('%r is not representable in ASCII' % class_name)
metacls = cls.__class__
if type is None:
bases = (cls, )
else:
bases = (type, cls)
classdict = metacls.__prepare__(class_name, bases)
_order_ = []
# special processing needed for names?
if isinstance(names, basestring):
names = names.replace(',', ' ').split()
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
names = [(e, i+start) for (i, e) in enumerate(names)]
# Here, names is either an iterable of (name, value) or a mapping.
item = None # in case names is empty
for item in names:
if isinstance(item, basestring):
member_name, member_value = item, names[item]
else:
member_name, member_value = item
classdict[member_name] = member_value
_order_.append(member_name)
# only set _order_ in classdict if name/value was not from a mapping
if not isinstance(item, basestring):
classdict['_order_'] = ' '.join(_order_)
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
# TODO: replace the frame hack if a blessed way to know the calling
# module is ever developed
if module is None:
try:
module = _sys._getframe(2).f_globals['__name__']
except (AttributeError, ValueError):
pass
if module is None:
_make_class_unpicklable(enum_class)
else:
enum_class.__module__ = module
return enum_class
@staticmethod
def _get_mixins_(bases):
"""Returns the type for creating enum members, and the first inherited
enum class.
bases: the tuple of bases that was given to __new__
"""
if not bases or Enum is None:
return object, Enum
# double check that we are not subclassing a class with existing
# enumeration members; while we're at it, see if any other data
# type has been mixed in so we can use the correct __new__
member_type = first_enum = None
for base in bases:
if (base is not Enum and
issubclass(base, Enum) and
base._member_names_):
raise TypeError("Cannot extend enumerations")
# base is now the last base in bases
if not issubclass(base, Enum):
raise TypeError("new enumerations must be created as "
"`ClassName([mixin_type,] enum_type)`")
# get correct mix-in type (either mix-in type of Enum subclass, or
# first base if last base is Enum)
if not issubclass(bases[0], Enum):
member_type = bases[0] # first data type
first_enum = bases[-1] # enum type
else:
for base in bases[0].__mro__:
# most common: (IntEnum, int, Enum, object)
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
# <class 'int'>, <Enum 'Enum'>,
# <class 'object'>)
if issubclass(base, Enum):
if first_enum is None:
first_enum = base
else:
if member_type is None:
member_type = base
return member_type, first_enum
if pyver < 3.0:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
if __new__:
return None, True, True # __new__, save_new, use_args
N__new__ = getattr(None, '__new__')
O__new__ = getattr(object, '__new__')
if Enum is None:
E__new__ = N__new__
else:
E__new__ = Enum.__dict__['__new__']
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
try:
target = possible.__dict__[method]
except (AttributeError, KeyError):
target = getattr(possible, method, None)
if target not in [
None,
N__new__,
O__new__,
E__new__,
]:
if method == '__member_new__':
classdict['__new__'] = target
return None, False, True
if isinstance(target, staticmethod):
target = target.__get__(member_type)
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, False, use_args
else:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
# should __new__ be saved as __member_new__ later?
save_new = __new__ is not None
if __new__ is None:
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
target = getattr(possible, method, None)
if target not in (
None,
None.__new__,
object.__new__,
Enum.__new__,
):
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, save_new, use_args
########################################################
# In order to support Python 2 and 3 with a single
# codebase we have to create the Enum methods separately
# and then use the `type(name, bases, dict)` method to
# create the class.
########################################################
temp_enum_dict = {}
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
def __new__(cls, value):
# all enum instances are actually created during class construction
# without calling this method; this method is called by the metaclass'
# __call__ (i.e. Color(3) ), and by pickle
if type(value) is cls:
# For lookups like Color(Color.red)
value = value.value
#return value
# by-value search for a matching enum member
# see if it's in the reverse mapping (for hashable values)
try:
if value in cls._value2member_map_:
return cls._value2member_map_[value]
except TypeError:
# not there, now do long search -- O(n) behavior
for member in cls._member_map_.values():
if member.value == value:
return member
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
temp_enum_dict['__new__'] = __new__
del __new__
def __repr__(self):
return "<%s.%s: %r>" % (
self.__class__.__name__, self._name_, self._value_)
temp_enum_dict['__repr__'] = __repr__
del __repr__
def __str__(self):
return "%s.%s" % (self.__class__.__name__, self._name_)
temp_enum_dict['__str__'] = __str__
del __str__
if pyver >= 3.0:
def __dir__(self):
added_behavior = [
m
for cls in self.__class__.mro()
for m in cls.__dict__
if m[0] != '_' and m not in self._member_map_
]
return (['__class__', '__doc__', '__module__', ] + added_behavior)
temp_enum_dict['__dir__'] = __dir__
del __dir__
def __format__(self, format_spec):
# mixed-in Enums should use the mixed-in type's __format__, otherwise
# we can get strange results with the Enum name showing up instead of
# the value
# pure Enum branch
if self._member_type_ is object:
cls = str
val = str(self)
# mix-in branch
else:
cls = self._member_type_
val = self.value
return cls.__format__(val, format_spec)
temp_enum_dict['__format__'] = __format__
del __format__
####################################
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
def __cmp__(self, other):
if type(other) is self.__class__:
if self is other:
return 0
return -1
return NotImplemented
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__cmp__'] = __cmp__
del __cmp__
else:
def __le__(self, other):
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__le__'] = __le__
del __le__
def __lt__(self, other):
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__lt__'] = __lt__
del __lt__
def __ge__(self, other):
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__ge__'] = __ge__
del __ge__
def __gt__(self, other):
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__gt__'] = __gt__
del __gt__
def __eq__(self, other):
if type(other) is self.__class__:
return self is other
return NotImplemented
temp_enum_dict['__eq__'] = __eq__
del __eq__
def __ne__(self, other):
if type(other) is self.__class__:
return self is not other
return NotImplemented
temp_enum_dict['__ne__'] = __ne__
del __ne__
def __hash__(self):
return hash(self._name_)
temp_enum_dict['__hash__'] = __hash__
del __hash__
def __reduce_ex__(self, proto):
return self.__class__, (self._value_, )
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
del __reduce_ex__
# _RouteClassAttributeToGetattr is used to provide access to the `name`
# and `value` properties of enum members while keeping some measure of
# protection from modification, while still allowing for an enumeration
# to have members named `name` and `value`. This works because enumeration
# members are not set directly on the enum class -- __getattr__ is
# used to look them up.
@_RouteClassAttributeToGetattr
def name(self):
return self._name_
temp_enum_dict['name'] = name
del name
@_RouteClassAttributeToGetattr
def value(self):
return self._value_
temp_enum_dict['value'] = value
del value
@classmethod
def _convert(cls, name, module, filter, source=None):
"""
Create a new Enum subclass that replaces a collection of global constants
"""
# convert all constants from source (or module) that pass filter() to
# a new Enum called name, and export the enum and its members back to
# module;
# also, replace the __reduce_ex__ method so unpickling works in
# previous Python versions
module_globals = vars(_sys.modules[module])
if source:
source = vars(source)
else:
source = module_globals
members = dict((name, value) for name, value in source.items() if filter(name))
cls = cls(name, members, module=module)
cls.__reduce_ex__ = _reduce_ex_by_name
module_globals.update(cls.__members__)
module_globals[name] = cls
return cls
temp_enum_dict['_convert'] = _convert
del _convert
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
del temp_enum_dict
# Enum has now been created
###########################
class IntEnum(int, Enum):
"""Enum where members are also (and must be) ints"""
def _reduce_ex_by_name(self, proto):
return self.name
def unique(enumeration):
"""Class decorator that ensures only unique members exist in an enumeration."""
duplicates = []
for name, member in enumeration.__members__.items():
if name != member.name:
duplicates.append((name, member.name))
if duplicates:
duplicate_names = ', '.join(
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
)
raise ValueError('duplicate names found in %r: %s' %
(enumeration, duplicate_names)
)
return enumeration

File diff suppressed because it is too large Load Diff

@ -1,735 +0,0 @@
``enum`` --- support for enumerations
========================================
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
values.
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
----------------
An enumeration is a set of symbolic names (members) bound to unique, constant
values. Within an enumeration, the members can be compared by identity, and
the enumeration itself can be iterated over.
Module Contents
---------------
This module defines two enumeration classes that can be used to define unique
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
one decorator, ``unique``.
``Enum``
Base class for creating enumerated constants. See section `Functional API`_
for an alternate construction syntax.
``IntEnum``
Base class for creating enumerated constants that are also subclasses of ``int``.
``unique``
Enum class decorator that ensures only one name is bound to any one value.
Creating an Enum
----------------
Enumerations are created using the ``class`` syntax, which makes them
easy to read and write. An alternative creation method is described in
`Functional API`_. To define an enumeration, subclass ``Enum`` as
follows::
>>> from enum import Enum
>>> class Color(Enum):
... red = 1
... green = 2
... blue = 3
Note: Nomenclature
- The class ``Color`` is an *enumeration* (or *enum*)
- The attributes ``Color.red``, ``Color.green``, etc., are
*enumeration members* (or *enum members*).
- The enum members have *names* and *values* (the name of
``Color.red`` is ``red``, the value of ``Color.blue`` is
``3``, etc.)
Note:
Even though we use the ``class`` syntax to create Enums, Enums
are not normal Python classes. See `How are Enums different?`_ for
more details.
Enumeration members have human readable string representations::
>>> print(Color.red)
Color.red
...while their ``repr`` has more information::
>>> print(repr(Color.red))
<Color.red: 1>
The *type* of an enumeration member is the enumeration it belongs to::
>>> type(Color.red)
<enum 'Color'>
>>> isinstance(Color.green, Color)
True
>>>
Enum members also have a property that contains just their item name::
>>> print(Color.red.name)
red
Enumerations support iteration. In Python 3.x definition order is used; in
Python 2.x the definition order is not available, but class attribute
``__order__`` is supported; otherwise, value order is used::
>>> class Shake(Enum):
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
... vanilla = 7
... chocolate = 4
... cookies = 9
... mint = 3
...
>>> for shake in Shake:
... print(shake)
...
Shake.vanilla
Shake.chocolate
Shake.cookies
Shake.mint
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
(order is definition order); however, in the stdlib version it will be ignored
but not removed.
Enumeration members are hashable, so they can be used in dictionaries and sets::
>>> apples = {}
>>> apples[Color.red] = 'red delicious'
>>> apples[Color.green] = 'granny smith'
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
True
Programmatic access to enumeration members and their attributes
---------------------------------------------------------------
Sometimes it's useful to access members in enumerations programmatically (i.e.
situations where ``Color.red`` won't do because the exact color is not known
at program-writing time). ``Enum`` allows such access::
>>> Color(1)
<Color.red: 1>
>>> Color(3)
<Color.blue: 3>
If you want to access enum members by *name*, use item access::
>>> Color['red']
<Color.red: 1>
>>> Color['green']
<Color.green: 2>
If have an enum member and need its ``name`` or ``value``::
>>> member = Color.red
>>> member.name
'red'
>>> member.value
1
Duplicating enum members and values
-----------------------------------
Having two enum members (or any other attribute) with the same name is invalid;
in Python 3.x this would raise an error, but in Python 2.x the second member
simply overwrites the first::
>>> # python 2.x
>>> class Shape(Enum):
... square = 2
... square = 3
...
>>> Shape.square
<Shape.square: 3>
>>> # python 3.x
>>> class Shape(Enum):
... square = 2
... square = 3
Traceback (most recent call last):
...
TypeError: Attempted to reuse key: 'square'
However, two enum members are allowed to have the same value. Given two members
A and B with the same value (and A defined first), B is an alias to A. By-value
lookup of the value of A and B will return A. By-name lookup of B will also
return A::
>>> class Shape(Enum):
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
... square = 2
... diamond = 1
... circle = 3
... alias_for_square = 2
...
>>> Shape.square
<Shape.square: 2>
>>> Shape.alias_for_square
<Shape.square: 2>
>>> Shape(2)
<Shape.square: 2>
Allowing aliases is not always desirable. ``unique`` can be used to ensure
that none exist in a particular enumeration::
>>> from enum import unique
>>> @unique
... class Mistake(Enum):
... __order__ = 'one two three four' # only needed in 2.x
... one = 1
... two = 2
... three = 3
... four = 3
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
Iterating over the members of an enum does not provide the aliases::
>>> list(Shape)
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
The special attribute ``__members__`` is a dictionary mapping names to members.
It includes all names defined in the enumeration, including the aliases::
>>> for name, member in sorted(Shape.__members__.items()):
... name, member
...
('alias_for_square', <Shape.square: 2>)
('circle', <Shape.circle: 3>)
('diamond', <Shape.diamond: 1>)
('square', <Shape.square: 2>)
The ``__members__`` attribute can be used for detailed programmatic access to
the enumeration members. For example, finding all the aliases::
>>> [name for name, member in Shape.__members__.items() if member.name != name]
['alias_for_square']
Comparisons
-----------
Enumeration members are compared by identity::
>>> Color.red is Color.red
True
>>> Color.red is Color.blue
False
>>> Color.red is not Color.blue
True
Ordered comparisons between enumeration values are *not* supported. Enum
members are not integers (but see `IntEnum`_ below)::
>>> Color.red < Color.blue
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unorderable types: Color() < Color()
.. warning::
In Python 2 *everything* is ordered, even though the ordering may not
make sense. If you want your enumerations to have a sensible ordering
check out the `OrderedEnum`_ recipe below.
Equality comparisons are defined though::
>>> Color.blue == Color.red
False
>>> Color.blue != Color.red
True
>>> Color.blue == Color.blue
True
Comparisons against non-enumeration values will always compare not equal
(again, ``IntEnum`` was explicitly designed to behave differently, see
below)::
>>> Color.blue == 2
False
Allowed members and attributes of enumerations
----------------------------------------------
The examples above use integers for enumeration values. Using integers is
short and handy (and provided by default by the `Functional API`_), but not
strictly enforced. In the vast majority of use-cases, one doesn't care what
the actual value of an enumeration is. But if the value *is* important,
enumerations can have arbitrary values.
Enumerations are Python classes, and can have methods and special methods as
usual. If we have this enumeration::
>>> class Mood(Enum):
... funky = 1
... happy = 3
...
... def describe(self):
... # self is the member here
... return self.name, self.value
...
... def __str__(self):
... return 'my custom str! {0}'.format(self.value)
...
... @classmethod
... def favorite_mood(cls):
... # cls here is the enumeration
... return cls.happy
Then::
>>> Mood.favorite_mood()
<Mood.happy: 3>
>>> Mood.happy.describe()
('happy', 3)
>>> str(Mood.funky)
'my custom str! 1'
The rules for what is allowed are as follows: _sunder_ names (starting and
ending with a single underscore) are reserved by enum and cannot be used;
all other attributes defined within an enumeration will become members of this
enumeration, with the exception of *__dunder__* names and descriptors (methods
are also descriptors).
Note:
If your enumeration defines ``__new__`` and/or ``__init__`` then
whatever value(s) were given to the enum member will be passed into
those methods. See `Planet`_ for an example.
Restricted subclassing of enumerations
--------------------------------------
Subclassing an enumeration is allowed only if the enumeration does not define
any members. So this is forbidden::
>>> class MoreColor(Color):
... pink = 17
Traceback (most recent call last):
...
TypeError: Cannot extend enumerations
But this is allowed::
>>> class Foo(Enum):
... def some_behavior(self):
... pass
...
>>> class Bar(Foo):
... happy = 1
... sad = 2
...
Allowing subclassing of enums that define members would lead to a violation of
some important invariants of types and instances. On the other hand, it makes
sense to allow sharing some common behavior between a group of enumerations.
(See `OrderedEnum`_ for an example.)
Pickling
--------
Enumerations can be pickled and unpickled::
>>> from enum.test_enum import Fruit
>>> from pickle import dumps, loads
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
True
The usual restrictions for pickling apply: picklable enums must be defined in
the top level of a module, since unpickling requires them to be importable
from that module.
Note:
With pickle protocol version 4 (introduced in Python 3.4) it is possible
to easily pickle enums nested in other classes.
Functional API
--------------
The ``Enum`` class is callable, providing the following functional API::
>>> Animal = Enum('Animal', 'ant bee cat dog')
>>> Animal
<enum 'Animal'>
>>> Animal.ant
<Animal.ant: 1>
>>> Animal.ant.value
1
>>> list(Animal)
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
The semantics of this API resemble ``namedtuple``. The first argument
of the call to ``Enum`` is the name of the enumeration.
The second argument is the *source* of enumeration member names. It can be a
whitespace-separated string of names, a sequence of names, a sequence of
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
values. The last two options enable assigning arbitrary values to
enumerations; the others auto-assign increasing integers starting with 1. A
new class derived from ``Enum`` is returned. In other words, the above
assignment to ``Animal`` is equivalent to::
>>> class Animals(Enum):
... ant = 1
... bee = 2
... cat = 3
... dog = 4
Pickling enums created with the functional API can be tricky as frame stack
implementation details are used to try and figure out which module the
enumeration is being created in (e.g. it will fail if you use a utility
function in separate module, and also may not work on IronPython or Jython).
The solution is to specify the module name explicitly as follows::
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
Derived Enumerations
--------------------
IntEnum
^^^^^^^
A variation of ``Enum`` is provided which is also a subclass of
``int``. Members of an ``IntEnum`` can be compared to integers;
by extension, integer enumerations of different types can also be compared
to each other::
>>> from enum import IntEnum
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Request(IntEnum):
... post = 1
... get = 2
...
>>> Shape == 1
False
>>> Shape.circle == 1
True
>>> Shape.circle == Request.post
True
However, they still can't be compared to standard ``Enum`` enumerations::
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Color(Enum):
... red = 1
... green = 2
...
>>> Shape.circle == Color.red
False
``IntEnum`` values behave like integers in other ways you'd expect::
>>> int(Shape.circle)
1
>>> ['a', 'b', 'c'][Shape.circle]
'b'
>>> [i for i in range(Shape.square)]
[0, 1]
For the vast majority of code, ``Enum`` is strongly recommended,
since ``IntEnum`` breaks some semantic promises of an enumeration (by
being comparable to integers, and thus by transitivity to other
unrelated enumerations). It should be used only in special cases where
there's no other choice; for example, when integer constants are
replaced with enumerations and backwards compatibility is required with code
that still expects integers.
Others
^^^^^^
While ``IntEnum`` is part of the ``enum`` module, it would be very
simple to implement independently::
class IntEnum(int, Enum):
pass
This demonstrates how similar derived enumerations can be defined; for example
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
Some rules:
1. When subclassing ``Enum``, mix-in types must appear before
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
example above.
2. While ``Enum`` can have members of any type, once you mix in an
additional type, all the members must have values of that type, e.g.
``int`` above. This restriction does not apply to mix-ins which only
add methods and don't specify another data type such as ``int`` or
``str``.
3. When another data type is mixed in, the ``value`` attribute is *not the
same* as the enum member itself, although it is equivalant and will compare
equal.
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
IntEnum) treat the enum member as its mixed-in type.
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
or ``%u`` codes are used.
5. ``str.__format__`` (or ``format``) will use the mixed-in
type's ``__format__``. If the ``Enum``'s ``str`` or
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
Decorators
----------
unique
^^^^^^
A ``class`` decorator specifically for enumerations. It searches an
enumeration's ``__members__`` gathering any aliases it finds; if any are
found ``ValueError`` is raised with the details::
>>> @unique
... class NoDupes(Enum):
... first = 'one'
... second = 'two'
... third = 'two'
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
Interesting examples
--------------------
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
use-cases, they cannot cover them all. Here are recipes for some different
types of enumerations that can be used directly, or as examples for creating
one's own.
AutoNumber
^^^^^^^^^^
Avoids having to specify the value for each enumeration member::
>>> class AutoNumber(Enum):
... def __new__(cls):
... value = len(cls.__members__) + 1
... obj = object.__new__(cls)
... obj._value_ = value
... return obj
...
>>> class Color(AutoNumber):
... __order__ = "red green blue" # only needed in 2.x
... red = ()
... green = ()
... blue = ()
...
>>> Color.green.value == 2
True
Note:
The `__new__` method, if defined, is used during creation of the Enum
members; it is then replaced by Enum's `__new__` which is used after
class creation for lookup of existing members. Due to the way Enums are
supposed to behave, there is no way to customize Enum's `__new__`.
UniqueEnum
^^^^^^^^^^
Raises an error if a duplicate member name is found instead of creating an
alias::
>>> class UniqueEnum(Enum):
... def __init__(self, *args):
... cls = self.__class__
... if any(self.value == e.value for e in cls):
... a = self.name
... e = cls(self.value).name
... raise ValueError(
... "aliases not allowed in UniqueEnum: %r --> %r"
... % (a, e))
...
>>> class Color(UniqueEnum):
... red = 1
... green = 2
... blue = 3
... grene = 2
Traceback (most recent call last):
...
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
OrderedEnum
^^^^^^^^^^^
An ordered enumeration that is not based on ``IntEnum`` and so maintains
the normal ``Enum`` invariants (such as not being comparable to other
enumerations)::
>>> class OrderedEnum(Enum):
... def __ge__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ >= other._value_
... return NotImplemented
... def __gt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ > other._value_
... return NotImplemented
... def __le__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ <= other._value_
... return NotImplemented
... def __lt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ < other._value_
... return NotImplemented
...
>>> class Grade(OrderedEnum):
... __ordered__ = 'A B C D F'
... A = 5
... B = 4
... C = 3
... D = 2
... F = 1
...
>>> Grade.C < Grade.A
True
Planet
^^^^^^
If ``__new__`` or ``__init__`` is defined the value of the enum member
will be passed to those methods::
>>> class Planet(Enum):
... MERCURY = (3.303e+23, 2.4397e6)
... VENUS = (4.869e+24, 6.0518e6)
... EARTH = (5.976e+24, 6.37814e6)
... MARS = (6.421e+23, 3.3972e6)
... JUPITER = (1.9e+27, 7.1492e7)
... SATURN = (5.688e+26, 6.0268e7)
... URANUS = (8.686e+25, 2.5559e7)
... NEPTUNE = (1.024e+26, 2.4746e7)
... def __init__(self, mass, radius):
... self.mass = mass # in kilograms
... self.radius = radius # in meters
... @property
... def surface_gravity(self):
... # universal gravitational constant (m3 kg-1 s-2)
... G = 6.67300E-11
... return G * self.mass / (self.radius * self.radius)
...
>>> Planet.EARTH.value
(5.976e+24, 6378140.0)
>>> Planet.EARTH.surface_gravity
9.802652743337129
How are Enums different?
------------------------
Enums have a custom metaclass that affects many aspects of both derived Enum
classes and their instances (members).
Enum Classes
^^^^^^^^^^^^
The ``EnumMeta`` metaclass is responsible for providing the
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
allow one to do things with an ``Enum`` class that fail on a typical
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
responsible for ensuring that various other methods on the final ``Enum``
class are correct (such as ``__new__``, ``__getnewargs__``,
``__str__`` and ``__repr__``).
.. note::
``__dir__`` is not changed in the Python 2 line as it messes up some
of the decorators included in the stdlib.
Enum Members (aka instances)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The most interesting thing about Enum members is that they are singletons.
``EnumMeta`` creates them all while it is creating the ``Enum``
class itself, and then puts a custom ``__new__`` in place to ensure
that no new ones are ever instantiated by returning only the existing
member instances.
Finer Points
^^^^^^^^^^^^
``Enum`` members are instances of an ``Enum`` class, and even though they
are accessible as `EnumClass.member1.member2`, they should not be
accessed directly from the member as that lookup may fail or, worse,
return something besides the ``Enum`` member you were looking for
(changed in version 1.1.1)::
>>> class FieldTypes(Enum):
... name = 1
... value = 2
... size = 3
...
>>> FieldTypes.value.size
<FieldTypes.size: 3>
>>> FieldTypes.size.value
3
The ``__members__`` attribute is only available on the class.
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
as the ``OrderedDict`` type didn't exist yet.
If you give your ``Enum`` subclass extra methods, like the `Planet`_
class above, those methods will show up in a `dir` of the member,
but not of the class::
>>> dir(Planet)
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
'VENUS', '__class__', '__doc__', '__members__', '__module__']
>>> dir(Planet.EARTH)
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
A ``__new__`` method will only be used for the creation of the
``Enum`` members -- after that it is replaced. This means if you wish to
change how ``Enum`` members are looked up you either have to write a
helper function or a ``classmethod``.

File diff suppressed because it is too large Load Diff

@ -8,10 +8,16 @@ __copyright__ = "Copyright 2010, S Anand"
__license__ = "WTFPL"
from datetime import datetime
from six import PY3
def _df(seconds, denominator=1, text='', past=True):
if past: return str(round(seconds / denominator)) + text + ' ago'
else: return 'in ' + str(round(seconds / denominator)) + text
if PY3:
result = str(round(seconds / denominator))
else:
result = str((seconds + denominator/2)/ denominator)
if past: return result + text + ' ago'
else: return 'in ' + result + text
def date(time=False, asdays=False, short=False):
'''Returns a pretty formatted date.

@ -1,4 +1,5 @@
from __future__ import absolute_import
from six import PY3
import json
import subprocess
from os import path
@ -176,14 +177,25 @@ class VideoFileParser:
"""
command = [parser] + commandArgs + [inputFile]
completedProcess = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
)
if completedProcess.returncode != 0:
raise IOError(
"Error occurred during execution - " + completedProcess.stderr
if PY3:
completedProcess = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
)
return completedProcess.stdout
if completedProcess.returncode != 0:
raise IOError(
"Error occurred during execution - " + completedProcess.stderr
)
return completedProcess.stdout
else:
try:
completedProcess = subprocess.check_output(
command, stderr=subprocess.STDOUT
)
except subprocess.CalledProcessError as e:
raise IOError(
"Error occurred during execution - " + e.output
)
return completedProcess
@staticmethod
def _checkExecutable(executable):

@ -26,7 +26,13 @@ this script that does the job by parsing the website"s pages.
# imports
import re
import enum
from six import PY3
if PY3:
import enum
else:
import enum2 as enum
import sys
import requests
import time

@ -1,6 +1,7 @@
# coding=utf-8
from __future__ import absolute_import
from __future__ import unicode_literals
import re
from subzero.language import Language

@ -1,5 +1,6 @@
# coding=utf-8
from __future__ import absolute_import
from __future__ import unicode_literals
import re
from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG

@ -1,402 +0,0 @@
from .error import *
from .tokens import *
from .events import *
from .nodes import *
from .loader import *
from .dumper import *
__version__ = '5.1.2'
try:
from .cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
import io
#------------------------------------------------------------------------------
# Warnings control
#------------------------------------------------------------------------------
# 'Global' warnings state:
_warnings_enabled = {
'YAMLLoadWarning': True,
}
# Get or set global warnings' state
def warnings(settings=None):
if settings is None:
return _warnings_enabled
if type(settings) is dict:
for key in settings:
if key in _warnings_enabled:
_warnings_enabled[key] = settings[key]
# Warn when load() is called without Loader=...
class YAMLLoadWarning(RuntimeWarning):
pass
def load_warning(method):
if _warnings_enabled['YAMLLoadWarning'] is False:
return
import warnings
message = (
"calling yaml.%s() without Loader=... is deprecated, as the "
"default Loader is unsafe. Please read "
"https://msg.pyyaml.org/load for full details."
) % method
warnings.warn(message, YAMLLoadWarning, stacklevel=3)
#------------------------------------------------------------------------------
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
try:
while loader.check_token():
yield loader.get_token()
finally:
loader.dispose()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
try:
while loader.check_event():
yield loader.get_event()
finally:
loader.dispose()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
try:
return loader.get_single_node()
finally:
loader.dispose()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
try:
while loader.check_node():
yield loader.get_node()
finally:
loader.dispose()
def load(stream, Loader=None):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
if Loader is None:
load_warning('load')
Loader = FullLoader
loader = Loader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
def load_all(stream, Loader=None):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
if Loader is None:
load_warning('load_all')
Loader = FullLoader
loader = Loader(stream)
try:
while loader.check_data():
yield loader.get_data()
finally:
loader.dispose()
def full_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load(stream, FullLoader)
def full_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load_all(stream, FullLoader)
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load_all(stream, SafeLoader)
def unsafe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load(stream, UnsafeLoader)
def unsafe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load_all(stream, UnsafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
stream = io.StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
try:
for event in events:
dumper.emit(event)
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
try:
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)
try:
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=Loader, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=Loader):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(metaclass=YAMLObjectMetaclass):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = Loader
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
@classmethod
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
@classmethod
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)

@ -1,139 +0,0 @@
__all__ = ['Composer', 'ComposerError']
from .error import MarkedYAMLError
from .events import *
from .nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer:
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor, event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurrence"
% anchor, self.anchors[anchor].start_mark,
"second occurrence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node

@ -1,720 +0,0 @@
__all__ = [
'BaseConstructor',
'SafeConstructor',
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
]
from .error import *
from .nodes import *
import collections.abc, datetime, base64, binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor:
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if node in self.constructed_objects:
return self.constructed_objects[node]
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.abc.Hashable):
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
'yes': True,
'no': False,
'true': True,
'false': False,
'on': True,
'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
r'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P<hour>[0-9][0-9]?)
:(?P<minute>[0-9][0-9])
:(?P<second>[0-9][0-9])
(?:\.(?P<fraction>[0-9]*))?
(?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
(?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
delta = None
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
data = datetime.datetime(year, month, day, hour, minute, second, fraction)
if delta:
data -= delta
return data
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
return self.construct_scalar(node)
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class FullConstructor(SafeConstructor):
def construct_python_str(self, node):
return self.construct_scalar(node)
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_bytes(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
def construct_python_long(self, node):
return self.construct_yaml_int(node)
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
if unsafe:
try:
__import__(name)
except ImportError as exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name, exc), mark)
if not name in sys.modules:
raise ConstructorError("while constructing a Python module", mark,
"module %r is not imported" % name, mark)
return sys.modules[name]
def find_python_name(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if '.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = 'builtins'
object_name = name
if unsafe:
try:
__import__(module_name)
except ImportError as exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name, exc), mark)
if not module_name in sys.modules:
raise ConstructorError("while constructing a Python object", mark,
"module %r is not imported" % module_name, mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r"
% (object_name, module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_module(suffix, node.start_mark)
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False, unsafe=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if not (unsafe or isinstance(cls, type)):
raise ConstructorError("while constructing a Python instance", node.start_mark,
"expected a class, but found %r" % type(cls),
node.start_mark)
if newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
setattr(object, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/none',
FullConstructor.construct_yaml_null)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bool',
FullConstructor.construct_yaml_bool)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/str',
FullConstructor.construct_python_str)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/unicode',
FullConstructor.construct_python_unicode)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bytes',
FullConstructor.construct_python_bytes)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/int',
FullConstructor.construct_yaml_int)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/long',
FullConstructor.construct_python_long)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/float',
FullConstructor.construct_yaml_float)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/complex',
FullConstructor.construct_python_complex)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/list',
FullConstructor.construct_yaml_seq)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/tuple',
FullConstructor.construct_python_tuple)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/dict',
FullConstructor.construct_yaml_map)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/name:',
FullConstructor.construct_python_name)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/module:',
FullConstructor.construct_python_module)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object:',
FullConstructor.construct_python_object)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/apply:',
FullConstructor.construct_python_object_apply)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/new:',
FullConstructor.construct_python_object_new)
class UnsafeConstructor(FullConstructor):
def find_python_module(self, name, mark):
return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
def find_python_name(self, name, mark):
return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
return super(UnsafeConstructor, self).make_python_instance(
suffix, node, args, kwds, newobj, unsafe=True)
# Constructor is same as UnsafeConstructor. Need to leave this in place in case
# people have extended it directly.
class Constructor(UnsafeConstructor):
pass

@ -1,101 +0,0 @@
__all__ = [
'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper'
]
from _yaml import CParser, CEmitter
from .constructor import *
from .serializer import *
from .representer import *
from .resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CFullLoader(CParser, FullConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
FullConstructor.__init__(self)
Resolver.__init__(self)
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
UnsafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

@ -1,62 +0,0 @@
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

File diff suppressed because it is too large Load Diff

@ -1,75 +0,0 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end]
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)

@ -1,86 +0,0 @@
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass

@ -1,63 +0,0 @@
__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
from .reader import *
from .scanner import *
from .parser import *
from .composer import *
from .constructor import *
from .resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
FullConstructor.__init__(self)
Resolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
# UnsafeLoader is the same as Loader (which is and was always unsafe on
# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
# FullLoad should be able to load almost all YAML safely. Loader is left intact
# to ensure backwards compatability.
class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)

@ -1,49 +0,0 @@
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = '<empty>'
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'

@ -1,589 +0,0 @@
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from .error import MarkedYAMLError
from .tokens import *
from .events import *
from .scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
'!': '!',
'!!': 'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == 'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == 'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle,
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle,
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == '!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == '!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == '!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), '',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), '', mark, mark)

@ -1,185 +0,0 @@
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from .error import YAMLError, Mark
import codecs, re
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `bytes` object,
# - a `str` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, str):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+'\0'
elif isinstance(stream, bytes):
self.name = "<byte string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in '\n\x85\u2028\u2029' \
or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
elif ch != '\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += '\0'
self.raw_buffer = None
break
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True

@ -1,389 +0,0 @@
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from .error import *
from .nodes import *
import datetime, sys, copyreg, types, base64, collections
class RepresenterError(YAMLError):
pass
class BaseRepresenter:
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=False, sort_keys=True):
self.default_style = default_style
self.sort_keys = sort_keys
self.default_flow_style = default_flow_style
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, str(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
@classmethod
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
@classmethod
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = list(mapping.items())
if self.sort_keys:
try:
mapping = sorted(mapping)
except TypeError:
pass
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data is None:
return True
if isinstance(data, tuple) and data == ():
return True
if isinstance(data, (str, bytes, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
def represent_str(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', data)
def represent_binary(self, data):
if hasattr(base64, 'encodebytes'):
data = base64.encodebytes(data).decode('ascii')
else:
data = base64.encodestring(data).decode('ascii')
return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
def represent_bool(self, data):
if data:
value = 'true'
else:
value = 'false'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar('tag:yaml.org,2002:int', str(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = '.nan'
elif data == self.inf_value:
value = '.inf'
elif data == -self.inf_value:
value = '-.inf'
else:
value = repr(data).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if '.' not in value and 'e' in value:
value = value.replace('e', '.0e', 1)
return self.represent_scalar('tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence('tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping('tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping('tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = data.isoformat()
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = data.isoformat(' ')
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object", data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(bytes,
SafeRepresenter.represent_binary)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_complex(self, data):
if data.imag == 0.0:
data = '%r' % data.real
elif data.real == 0.0:
data = '%rj' % data.imag
elif data.imag > 0:
data = '%r+%rj' % (data.real, data.imag)
else:
data = '%r%rj' % (data.real, data.imag)
return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = '%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
def represent_module(self, data):
return self.represent_scalar(
'tag:yaml.org,2002:python/module:'+data.__name__, '')
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copyreg.dispatch_table:
reduce = copyreg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent an object", data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = 'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = 'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = '%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
def represent_ordered_dict(self, data):
# Provide uniform representation across different Python versions.
data_type = type(data)
tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \
% (data_type.__module__, data_type.__name__)
items = [[key, value] for key, value in data.items()]
return self.represent_sequence(tag, [items])
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(collections.OrderedDict,
Representer.represent_ordered_dict)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(object,
Representer.represent_object)

@ -1,227 +0,0 @@
__all__ = ['BaseResolver', 'Resolver']
from .error import *
from .nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver:
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
@classmethod
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
implicit_resolvers = {}
for key in cls.yaml_implicit_resolvers:
implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:]
cls.yaml_implicit_resolvers = implicit_resolvers
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
@classmethod
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, str) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (str, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, str):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:bool',
re.compile(r'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list('yYnNtTfFoO'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:float',
re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list('-+0123456789.'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:int',
re.compile(r'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list('-+0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:merge',
re.compile(r'^(?:<<)$'),
['<'])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:null',
re.compile(r'''^(?: ~
|null|Null|NULL
| )$''', re.X),
['~', 'n', 'N', ''])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:timestamp',
re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list('0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:value',
re.compile(r'^(?:=)$'),
['='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:yaml',
re.compile(r'^(?:!|&|\*)$'),
list('!&*'))

File diff suppressed because it is too large Load Diff

@ -1,111 +0,0 @@
__all__ = ['Serializer', 'SerializerError']
from .error import YAMLError
from .events import *
from .nodes import *
class SerializerError(YAMLError):
pass
class Serializer:
ANCHOR_TEMPLATE = 'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):
self.use_encoding = encoding
self.use_explicit_start = explicit_start
self.use_explicit_end = explicit_end
self.use_version = version
self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
def open(self):
if self.closed is None:
self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
else:
raise SerializerError("serializer is already opened")
def close(self):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
self.emit(StreamEndEvent())
self.closed = True
#def __del__(self):
# self.close()
def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node, None, None)
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors:
if self.anchors[node] is None:
self.anchors[node] = self.generate_anchor(node)
else:
self.anchors[node] = None
if isinstance(node, SequenceNode):
for item in node.value:
self.anchor_node(item)
elif isinstance(node, MappingNode):
for key, value in node.value:
self.anchor_node(key)
self.anchor_node(value)
def generate_anchor(self, node):
self.last_anchor_id += 1
return self.ANCHOR_TEMPLATE % self.last_anchor_id
def serialize_node(self, node, parent, index):
alias = self.anchors[node]
if node in self.serialized_nodes:
self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
self.descend_resolver(parent, index)
if isinstance(node, ScalarNode):
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolve(ScalarNode, node.value, (False, True))
implicit = (node.tag == detected_tag), (node.tag == default_tag)
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
style=node.style))
elif isinstance(node, SequenceNode):
implicit = (node.tag
== self.resolve(SequenceNode, node.value, True))
self.emit(SequenceStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
index = 0
for item in node.value:
self.serialize_node(item, node, index)
index += 1
self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
implicit = (node.tag
== self.resolve(MappingNode, node.value, True))
self.emit(MappingStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
for key, value in node.value:
self.serialize_node(key, node, None)
self.serialize_node(value, node, key)
self.emit(MappingEndEvent())
self.ascend_resolver()

@ -1,104 +0,0 @@
class Token(object):
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = '<byte order mark>'
class DirectiveToken(Token):
id = '<directive>'
def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class DocumentStartToken(Token):
id = '<document start>'
class DocumentEndToken(Token):
id = '<document end>'
class StreamStartToken(Token):
id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None,
encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token):
id = '<stream end>'
class BlockSequenceStartToken(Token):
id = '<block sequence start>'
class BlockMappingStartToken(Token):
id = '<block mapping start>'
class BlockEndToken(Token):
id = '<block end>'
class FlowSequenceStartToken(Token):
id = '['
class FlowMappingStartToken(Token):
id = '{'
class FlowSequenceEndToken(Token):
id = ']'
class FlowMappingEndToken(Token):
id = '}'
class KeyToken(Token):
id = '?'
class ValueToken(Token):
id = ':'
class BlockEntryToken(Token):
id = '-'
class FlowEntryToken(Token):
id = ','
class AliasToken(Token):
id = '<alias>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class AnchorToken(Token):
id = '<anchor>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class TagToken(Token):
id = '<tag>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class ScalarToken(Token):
id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style

@ -1,406 +0,0 @@
from error import *
from tokens import *
from events import *
from nodes import *
from loader import *
from dumper import *
__version__ = '5.1'
try:
from cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
#------------------------------------------------------------------------------
# Warnings control
#------------------------------------------------------------------------------
# 'Global' warnings state:
_warnings_enabled = {
'YAMLLoadWarning': True,
}
# Get or set global warnings' state
def warnings(settings=None):
if settings is None:
return _warnings_enabled
if type(settings) is dict:
for key in settings:
if key in _warnings_enabled:
_warnings_enabled[key] = settings[key]
# Warn when load() is called without Loader=...
class YAMLLoadWarning(RuntimeWarning):
pass
def load_warning(method):
if _warnings_enabled['YAMLLoadWarning'] is False:
return
import warnings
message = (
"calling yaml.%s() without Loader=... is deprecated, as the "
"default Loader is unsafe. Please read "
"https://msg.pyyaml.org/load for full details."
) % method
warnings.warn(message, YAMLLoadWarning, stacklevel=3)
#------------------------------------------------------------------------------
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
try:
while loader.check_token():
yield loader.get_token()
finally:
loader.dispose()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
try:
while loader.check_event():
yield loader.get_event()
finally:
loader.dispose()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
try:
return loader.get_single_node()
finally:
loader.dispose()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
try:
while loader.check_node():
yield loader.get_node()
finally:
loader.dispose()
def load(stream, Loader=None):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
if Loader is None:
load_warning('load')
Loader = FullLoader
loader = Loader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
def load_all(stream, Loader=None):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
if Loader is None:
load_warning('load_all')
Loader = FullLoader
loader = Loader(stream)
try:
while loader.check_data():
yield loader.get_data()
finally:
loader.dispose()
def full_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load(stream, FullLoader)
def full_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load_all(stream, FullLoader)
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load_all(stream, SafeLoader)
def unsafe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load(stream, UnsafeLoader)
def unsafe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load_all(stream, UnsafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
from StringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
try:
for event in events:
dumper.emit(event)
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
try:
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)
try:
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=Loader, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=Loader):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(object):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__metaclass__ = YAMLObjectMetaclass
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = Loader
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
from_yaml = classmethod(from_yaml)
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
to_yaml = classmethod(to_yaml)

@ -1,139 +0,0 @@
__all__ = ['Composer', 'ComposerError']
from error import MarkedYAMLError
from events import *
from nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer(object):
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor.encode('utf-8'), event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurrence"
% anchor.encode('utf-8'), self.anchors[anchor].start_mark,
"second occurrence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == u'!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node

@ -1,709 +0,0 @@
__all__ = [
'BaseConstructor',
'SafeConstructor',
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
]
from error import *
from nodes import *
import datetime
import binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor(object):
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if node in self.constructed_objects:
return self.constructed_objects[node]
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = generator.next()
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
add_constructor = classmethod(add_constructor)
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
add_multi_constructor = classmethod(add_multi_constructor)
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == u'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return BaseConstructor.construct_scalar(self, node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == u'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == u'tag:yaml.org,2002:value':
key_node.tag = u'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return BaseConstructor.construct_mapping(self, node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
u'yes': True,
u'no': False,
u'true': True,
u'false': False,
u'on': True,
u'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = str(self.construct_scalar(node))
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = str(self.construct_scalar(node))
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
value = self.construct_scalar(node)
try:
return str(value).decode('base64')
except (binascii.Error, UnicodeEncodeError), exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
ur'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P<hour>[0-9][0-9]?)
:(?P<minute>[0-9][0-9])
:(?P<second>[0-9][0-9])
(?:\.(?P<fraction>[0-9]*))?
(?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
(?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
delta = None
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
data = datetime.datetime(year, month, day, hour, minute, second, fraction)
if delta:
data -= delta
return data
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
value = self.construct_scalar(node)
try:
return value.encode('ascii')
except UnicodeEncodeError:
return value
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
node.start_mark)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class FullConstructor(SafeConstructor):
def construct_python_str(self, node):
return self.construct_scalar(node).encode('utf-8')
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_long(self, node):
return long(self.construct_yaml_int(node))
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
if unsafe:
try:
__import__(name)
except ImportError, exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
if not name in sys.modules:
raise ConstructorError("while constructing a Python module", mark,
"module %r is not imported" % name.encode('utf-8'), mark)
return sys.modules[name]
def find_python_name(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if u'.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = '__builtin__'
object_name = name
if unsafe:
try:
__import__(module_name)
except ImportError, exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
if not module_name in sys.modules:
raise ConstructorError("while constructing a Python object", mark,
"module %r is not imported" % module_name.encode('utf-8'), mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r" % (object_name.encode('utf-8'),
module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
return self.find_python_module(suffix, node.start_mark)
class classobj: pass
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False, unsafe=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if not (unsafe or isinstance(cls, type) or isinstance(cls, type(self.classobj))):
raise ConstructorError("while constructing a Python instance", node.start_mark,
"expected a class, but found %r" % type(cls),
node.start_mark)
if newobj and isinstance(cls, type(self.classobj)) \
and not args and not kwds:
instance = self.classobj()
instance.__class__ = cls
return instance
elif newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
setattr(object, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/none',
FullConstructor.construct_yaml_null)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/bool',
FullConstructor.construct_yaml_bool)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/str',
FullConstructor.construct_python_str)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/unicode',
FullConstructor.construct_python_unicode)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/int',
FullConstructor.construct_yaml_int)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/long',
FullConstructor.construct_python_long)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/float',
FullConstructor.construct_yaml_float)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/complex',
FullConstructor.construct_python_complex)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/list',
FullConstructor.construct_yaml_seq)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/tuple',
FullConstructor.construct_python_tuple)
FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/dict',
FullConstructor.construct_yaml_map)
FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/name:',
FullConstructor.construct_python_name)
FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/module:',
FullConstructor.construct_python_module)
FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object:',
FullConstructor.construct_python_object)
FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/apply:',
FullConstructor.construct_python_object_apply)
FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/new:',
FullConstructor.construct_python_object_new)
class UnsafeConstructor(FullConstructor):
def find_python_module(self, name, mark):
return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
def find_python_name(self, name, mark):
return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
return super(UnsafeConstructor, self).make_python_instance(
suffix, node, args, kwds, newobj, unsafe=True)
# Constructor is same as UnsafeConstructor. Need to leave this in place in case
# people have extended it directly.
class Constructor(UnsafeConstructor):
pass

@ -1,101 +0,0 @@
__all__ = [
'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper'
]
from _yaml import CParser, CEmitter
from constructor import *
from serializer import *
from representer import *
from resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CFullLoader(CParser, FullConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
FullConstructor.__init__(self)
Resolver.__init__(self)
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
UnsafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

@ -1,62 +0,0 @@
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from emitter import *
from serializer import *
from representer import *
from resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)

File diff suppressed because it is too large Load Diff

@ -1,75 +0,0 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark(object):
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end].encode('utf-8')
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)

@ -1,86 +0,0 @@
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass

@ -1,63 +0,0 @@
__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
from reader import *
from scanner import *
from parser import *
from composer import *
from constructor import *
from resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
FullConstructor.__init__(self)
Resolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
# UnsafeLoader is the same as Loader (which is and was always unsafe on
# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
# FullLoad should be able to load almost all YAML safely. Loader is left intact
# to ensure backwards compatability.
class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)

@ -1,49 +0,0 @@
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = '<empty>'
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'

@ -1,589 +0,0 @@
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from error import MarkedYAMLError
from tokens import *
from events import *
from scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser(object):
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
u'!': u'!',
u'!!': u'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == u'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == u'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle.encode('utf-8'),
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle.encode('utf-8'),
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == u'!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == u'!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == u'!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), u'',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), u'', mark, mark)

@ -1,188 +0,0 @@
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from error import YAMLError, Mark
import codecs, re, sys
has_ucs4 = sys.maxunicode > 0xffff
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, str):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to unicode,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `str` object,
# - a `unicode` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = u''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, unicode):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+u'\0'
elif isinstance(stream, str):
self.name = "<string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = ''
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in u'\n\x85\u2028\u2029' \
or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
self.line += 1
self.column = 0
elif ch != u'\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
if has_ucs4:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
else:
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError, exc:
character = exc.object[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += u'\0'
self.raw_buffer = None
break
def update_raw(self, size=1024):
data = self.stream.read(size)
if data:
self.raw_buffer += data
self.stream_pointer += len(data)
else:
self.eof = True

@ -1,488 +0,0 @@
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from error import *
from nodes import *
import datetime
import sys, copy_reg, types
class RepresenterError(YAMLError):
pass
class BaseRepresenter(object):
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=False, sort_keys=True):
self.default_style = default_style
self.default_flow_style = default_flow_style
self.sort_keys = sort_keys
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def get_classobj_bases(self, cls):
bases = [cls]
for base in cls.__bases__:
bases.extend(self.get_classobj_bases(base))
return bases
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if type(data) is types.InstanceType:
data_types = self.get_classobj_bases(data.__class__)+list(data_types)
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, unicode(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
add_representer = classmethod(add_representer)
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
add_multi_representer = classmethod(add_multi_representer)
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = mapping.items()
if self.sort_keys:
mapping.sort()
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data is None:
return True
if isinstance(data, tuple) and data == ():
return True
if isinstance(data, (str, unicode, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null',
u'null')
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
def represent_unicode(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:str', data)
def represent_bool(self, data):
if data:
value = u'true'
else:
value = u'false'
return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
def represent_long(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = u'.nan'
elif data == self.inf_value:
value = u'.inf'
elif data == -self.inf_value:
value = u'-.inf'
else:
value = unicode(repr(data)).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if u'.' not in value and u'e' in value:
value = value.replace(u'e', u'.0e', 1)
return self.represent_scalar(u'tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping(u'tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping(u'tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = unicode(data.isoformat())
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = unicode(data.isoformat(' '))
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object", data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(unicode,
SafeRepresenter.represent_unicode)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(long,
SafeRepresenter.represent_long)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:python/str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
def represent_unicode(self, data):
tag = None
try:
data.encode('ascii')
tag = u'tag:yaml.org,2002:python/unicode'
except UnicodeEncodeError:
tag = u'tag:yaml.org,2002:str'
return self.represent_scalar(tag, data)
def represent_long(self, data):
tag = u'tag:yaml.org,2002:int'
if int(data) is not data:
tag = u'tag:yaml.org,2002:python/long'
return self.represent_scalar(tag, unicode(data))
def represent_complex(self, data):
if data.imag == 0.0:
data = u'%r' % data.real
elif data.real == 0.0:
data = u'%rj' % data.imag
elif data.imag > 0:
data = u'%r+%rj' % (data.real, data.imag)
else:
data = u'%r%rj' % (data.real, data.imag)
return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = u'%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
def represent_module(self, data):
return self.represent_scalar(
u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
def represent_instance(self, data):
# For instances of classic classes, we use __getinitargs__ and
# __getstate__ to serialize the data.
# If data.__getinitargs__ exists, the object must be reconstructed by
# calling cls(**args), where args is a tuple returned by
# __getinitargs__. Otherwise, the cls.__init__ method should never be
# called and the class instance is created by instantiating a trivial
# class and assigning to the instance's __class__ variable.
# If data.__getstate__ exists, it returns the state of the object.
# Otherwise, the state of the object is data.__dict__.
# We produce either a !!python/object or !!python/object/new node.
# If data.__getinitargs__ does not exist and state is a dictionary, we
# produce a !!python/object node . Otherwise we produce a
# !!python/object/new node.
cls = data.__class__
class_name = u'%s.%s' % (cls.__module__, cls.__name__)
args = None
state = None
if hasattr(data, '__getinitargs__'):
args = list(data.__getinitargs__())
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__
if args is None and isinstance(state, dict):
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+class_name, state)
if isinstance(state, dict) and not state:
return self.represent_sequence(
u'tag:yaml.org,2002:python/object/new:'+class_name, args)
value = {}
if args:
value['args'] = args
value['state'] = state
return self.represent_mapping(
u'tag:yaml.org,2002:python/object/new:'+class_name, value)
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copy_reg.dispatch_table:
reduce = copy_reg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent an object", data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = u'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = u'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = u'%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
Representer.add_representer(str,
Representer.represent_str)
Representer.add_representer(unicode,
Representer.represent_unicode)
Representer.add_representer(long,
Representer.represent_long)
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(types.ClassType,
Representer.represent_name)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(types.InstanceType,
Representer.represent_instance)
Representer.add_multi_representer(object,
Representer.represent_object)

@ -1,227 +0,0 @@
__all__ = ['BaseResolver', 'Resolver']
from error import *
from nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver(object):
DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
implicit_resolvers = {}
for key in cls.yaml_implicit_resolvers:
implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:]
cls.yaml_implicit_resolvers = implicit_resolvers
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
add_implicit_resolver = classmethod(add_implicit_resolver)
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, basestring) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (basestring, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
add_path_resolver = classmethod(add_path_resolver)
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, basestring):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, basestring):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == u'':
resolvers = self.yaml_implicit_resolvers.get(u'', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:bool',
re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list(u'yYnNtTfFoO'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:float',
re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list(u'-+0123456789.'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:int',
re.compile(ur'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list(u'-+0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:merge',
re.compile(ur'^(?:<<)$'),
[u'<'])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:null',
re.compile(ur'''^(?: ~
|null|Null|NULL
| )$''', re.X),
[u'~', u'n', u'N', u''])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:timestamp',
re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list(u'0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:value',
re.compile(ur'^(?:=)$'),
[u'='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:yaml',
re.compile(ur'^(?:!|&|\*)$'),
list(u'!&*'))

File diff suppressed because it is too large Load Diff

@ -1,111 +0,0 @@
__all__ = ['Serializer', 'SerializerError']
from error import YAMLError
from events import *
from nodes import *
class SerializerError(YAMLError):
pass
class Serializer(object):
ANCHOR_TEMPLATE = u'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):
self.use_encoding = encoding
self.use_explicit_start = explicit_start
self.use_explicit_end = explicit_end
self.use_version = version
self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
def open(self):
if self.closed is None:
self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
else:
raise SerializerError("serializer is already opened")
def close(self):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
self.emit(StreamEndEvent())
self.closed = True
#def __del__(self):
# self.close()
def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node, None, None)
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors:
if self.anchors[node] is None:
self.anchors[node] = self.generate_anchor(node)
else:
self.anchors[node] = None
if isinstance(node, SequenceNode):
for item in node.value:
self.anchor_node(item)
elif isinstance(node, MappingNode):
for key, value in node.value:
self.anchor_node(key)
self.anchor_node(value)
def generate_anchor(self, node):
self.last_anchor_id += 1
return self.ANCHOR_TEMPLATE % self.last_anchor_id
def serialize_node(self, node, parent, index):
alias = self.anchors[node]
if node in self.serialized_nodes:
self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
self.descend_resolver(parent, index)
if isinstance(node, ScalarNode):
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolve(ScalarNode, node.value, (False, True))
implicit = (node.tag == detected_tag), (node.tag == default_tag)
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
style=node.style))
elif isinstance(node, SequenceNode):
implicit = (node.tag
== self.resolve(SequenceNode, node.value, True))
self.emit(SequenceStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
index = 0
for item in node.value:
self.serialize_node(item, node, index)
index += 1
self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
implicit = (node.tag
== self.resolve(MappingNode, node.value, True))
self.emit(MappingStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
for key, value in node.value:
self.serialize_node(key, node, None)
self.serialize_node(value, node, key)
self.emit(MappingEndEvent())
self.ascend_resolver()

@ -1,104 +0,0 @@
class Token(object):
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = '<byte order mark>'
class DirectiveToken(Token):
id = '<directive>'
def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class DocumentStartToken(Token):
id = '<document start>'
class DocumentEndToken(Token):
id = '<document end>'
class StreamStartToken(Token):
id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None,
encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token):
id = '<stream end>'
class BlockSequenceStartToken(Token):
id = '<block sequence start>'
class BlockMappingStartToken(Token):
id = '<block mapping start>'
class BlockEndToken(Token):
id = '<block end>'
class FlowSequenceStartToken(Token):
id = '['
class FlowMappingStartToken(Token):
id = '{'
class FlowSequenceEndToken(Token):
id = ']'
class FlowMappingEndToken(Token):
id = '}'
class KeyToken(Token):
id = '?'
class ValueToken(Token):
id = ':'
class BlockEntryToken(Token):
id = '-'
class FlowEntryToken(Token):
id = ','
class AliasToken(Token):
id = '<alias>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class AnchorToken(Token):
id = '<anchor>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class TagToken(Token):
id = '<tag>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class ScalarToken(Token):
id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
Loading…
Cancel
Save