Upgraded some embedded dependencies to be ready for Python 3.10. This doesn't mean that it's fully supported right now.

2 years ago · 402c82d84f
parent 2d214bfbd5
commit 402c82d84f
244 changed files with 8217 additions and 96583 deletions
--- a/bazarr.py
+++ b/bazarr.py
@ -20,8 +20,8 @@ def check_python_version():
        print("Python " + minimum_py3_str + " or greater required. "
              "Current version is " + platform.python_version() + ". Please upgrade Python.")
        sys.exit(1)
-    elif int(python_version[0]) == 3 and int(python_version[1]) == 9:
-        print("Python 3.9.x is unsupported. Current version is " + platform.python_version() +
+    elif int(python_version[0]) == 3 and int(python_version[1]) > 8:
+        print("Python version greater than 3.8.x is unsupported. Current version is " + platform.python_version() +
              ". Keep in mind that even if it works, you're on your own.")
    elif (int(python_version[0]) == minimum_py3_tuple[0] and int(python_version[1]) < minimum_py3_tuple[1]) or \
            (int(python_version[0]) != minimum_py3_tuple[0]):
--- a/libs/babelfish/init.py
+++ b/libs/babelfish/init.py
@ -4,12 +4,6 @@
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
-__title__ = 'babelfish'
-__version__ = '0.5.5-dev'
-__author__ = 'Antoine Bertin'
-__license__ = 'BSD'
-__copyright__ = 'Copyright 2015 the BabelFish authors'
-
 import sys

 if sys.version_info[0] >= 3:
--- a/libs/babelfish/converters/init.py
+++ b/libs/babelfish/converters/init.py
@ -2,17 +2,22 @@
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
-import collections
 from pkg_resources import iter_entry_points, EntryPoint
 from ..exceptions import LanguageConvertError, LanguageReverseError

+try:
+    # Python 3.3+
+    from collections.abc import Mapping, MutableMapping
+except ImportError:
+    from collections import Mapping, MutableMapping
+

 # from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
-class CaseInsensitiveDict(collections.MutableMapping):
+class CaseInsensitiveDict(MutableMapping):
    """A case-insensitive ``dict``-like object.

    Implements all methods and operations of
-    ``collections.MutableMapping`` as well as dict's ``copy``. Also
+    ``collections.abc.MutableMapping`` as well as dict's ``copy``. Also
    provides ``lower_items``.

    All keys are expected to be strings. The structure remembers the
@ -63,7 +68,7 @@ class CaseInsensitiveDict(collections.MutableMapping):
        )

    def __eq__(self, other):
-        if isinstance(other, collections.Mapping):
+        if isinstance(other, Mapping):
            other = CaseInsensitiveDict(other)
        else:
            return NotImplemented
--- a/libs/babelfish/converters/opensubtitles.py
+++ b/libs/babelfish/converters/opensubtitles.py
@ -14,9 +14,9 @@ class OpenSubtitlesConverter(LanguageReverseConverter):
    def __init__(self):
        self.alpha3b_converter = language_converters['alpha3b']
        self.alpha2_converter = language_converters['alpha2']
-        self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
+        self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne', ('chi', 'TW'): 'zht'}
        self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
-                                                       'scc': ('srp', None), 'mne': ('srp', 'ME')})
+                                                       'scc': ('srp', None), 'mne': ('srp', 'ME'), 'zht': ('zho', 'TW')})
        self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))

    def convert(self, alpha3, country=None, script=None):
--- a/libs/babelfish/country.py
+++ b/libs/babelfish/country.py
@ -4,6 +4,7 @@
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
+from __future__ import unicode_literals
 from collections import namedtuple
 from functools import partial
 from pkg_resources import resource_stream  # @UnresolvedImport
--- a/libs/babelfish/data/get_files.py
+++ b/libs/babelfish/data/get_files.py
@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2013 the BabelFish authors. All rights reserved.
-# Use of this source code is governed by the 3-clause BSD license
-# that can be found in the LICENSE file.
-#
-from __future__ import unicode_literals
-import os.path
-import tempfile
-import zipfile
-import requests
-
-
-DATA_DIR = os.path.dirname(__file__)
-
-# iso-3166-1.txt
-print('Downloading ISO-3166-1 standard (ISO country codes)...')
-with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
-    r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
-    f.write(r.content.strip())
-
-# iso-639-3.tab
-print('Downloading ISO-639-3 standard (ISO language codes)...')
-with tempfile.TemporaryFile() as f:
-    r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
-    f.write(r.content)
-    with zipfile.ZipFile(f) as z:
-        z.extract('iso-639-3.tab', DATA_DIR)
-
-# iso-15924
-print('Downloading ISO-15924 standard (ISO script codes)...')
-with tempfile.TemporaryFile() as f:
-    r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
-    f.write(r.content)
-    with zipfile.ZipFile(f) as z:
-        z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
-
-# opensubtitles supported languages
-print('Downloading OpenSubtitles supported languages...')
-with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
-    r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
-    f.write(r.content)
-
-print('Done!')
--- a/libs/babelfish/language.py
+++ b/libs/babelfish/language.py
@ -4,6 +4,7 @@
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
+from __future__ import unicode_literals
 from collections import namedtuple
 from functools import partial
 from pkg_resources import resource_stream  # @UnresolvedImport
--- a/libs/babelfish/script.py
+++ b/libs/babelfish/script.py
@ -4,6 +4,7 @@
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
+from __future__ import unicode_literals
 from collections import namedtuple
 from pkg_resources import resource_stream  # @UnresolvedImport
 from . import basestr
--- a/libs/babelfish/tests.py
+++ b/libs/babelfish/tests.py
@ -1,377 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2013 the BabelFish authors. All rights reserved.
-# Use of this source code is governed by the 3-clause BSD license
-# that can be found in the LICENSE file.
-#
-from __future__ import unicode_literals
-import re
-import sys
-import pickle
-from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
-from pkg_resources import resource_stream  # @UnresolvedImport
-from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
-    LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
-
-
-if sys.version_info[:2] <= (2, 6):
-    _MAX_LENGTH = 80
-
-    def safe_repr(obj, short=False):
-        try:
-            result = repr(obj)
-        except Exception:
-            result = object.__repr__(obj)
-        if not short or len(result) < _MAX_LENGTH:
-            return result
-        return result[:_MAX_LENGTH] + ' [truncated]...'
-
-    class _AssertRaisesContext(object):
-        """A context manager used to implement TestCase.assertRaises* methods."""
-
-        def __init__(self, expected, test_case, expected_regexp=None):
-            self.expected = expected
-            self.failureException = test_case.failureException
-            self.expected_regexp = expected_regexp
-
-        def __enter__(self):
-            return self
-
-        def __exit__(self, exc_type, exc_value, tb):
-            if exc_type is None:
-                try:
-                    exc_name = self.expected.__name__
-                except AttributeError:
-                    exc_name = str(self.expected)
-                raise self.failureException(
-                    "{0} not raised".format(exc_name))
-            if not issubclass(exc_type, self.expected):
-                # let unexpected exceptions pass through
-                return False
-            self.exception = exc_value  # store for later retrieval
-            if self.expected_regexp is None:
-                return True
-
-            expected_regexp = self.expected_regexp
-            if isinstance(expected_regexp, basestring):
-                expected_regexp = re.compile(expected_regexp)
-            if not expected_regexp.search(str(exc_value)):
-                raise self.failureException('"%s" does not match "%s"' %
-                         (expected_regexp.pattern, str(exc_value)))
-            return True
-
-    class _Py26FixTestCase(object):
-        def assertIsNone(self, obj, msg=None):
-            """Same as self.assertTrue(obj is None), with a nicer default message."""
-            if obj is not None:
-                standardMsg = '%s is not None' % (safe_repr(obj),)
-                self.fail(self._formatMessage(msg, standardMsg))
-
-        def assertIsNotNone(self, obj, msg=None):
-            """Included for symmetry with assertIsNone."""
-            if obj is None:
-                standardMsg = 'unexpectedly None'
-                self.fail(self._formatMessage(msg, standardMsg))
-
-        def assertIn(self, member, container, msg=None):
-            """Just like self.assertTrue(a in b), but with a nicer default message."""
-            if member not in container:
-                standardMsg = '%s not found in %s' % (safe_repr(member),
-                                                      safe_repr(container))
-                self.fail(self._formatMessage(msg, standardMsg))
-
-        def assertNotIn(self, member, container, msg=None):
-            """Just like self.assertTrue(a not in b), but with a nicer default message."""
-            if member in container:
-                standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
-                                                            safe_repr(container))
-                self.fail(self._formatMessage(msg, standardMsg))
-
-        def assertIs(self, expr1, expr2, msg=None):
-            """Just like self.assertTrue(a is b), but with a nicer default message."""
-            if expr1 is not expr2:
-                standardMsg = '%s is not %s' % (safe_repr(expr1),
-                                                 safe_repr(expr2))
-                self.fail(self._formatMessage(msg, standardMsg))
-
-        def assertIsNot(self, expr1, expr2, msg=None):
-            """Just like self.assertTrue(a is not b), but with a nicer default message."""
-            if expr1 is expr2:
-                standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
-                self.fail(self._formatMessage(msg, standardMsg))
-
-else:
-    class _Py26FixTestCase(object):
-        pass
-
-
-class TestScript(TestCase, _Py26FixTestCase):
-    def test_wrong_script(self):
-        self.assertRaises(ValueError, lambda: Script('Azer'))
-
-    def test_eq(self):
-        self.assertEqual(Script('Latn'), Script('Latn'))
-
-    def test_ne(self):
-        self.assertNotEqual(Script('Cyrl'), Script('Latn'))
-
-    def test_hash(self):
-        self.assertEqual(hash(Script('Hira')), hash('Hira'))
-
-    def test_pickle(self):
-        self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
-
-
-class TestCountry(TestCase, _Py26FixTestCase):
-    def test_wrong_country(self):
-        self.assertRaises(ValueError, lambda: Country('ZZ'))
-
-    def test_eq(self):
-        self.assertEqual(Country('US'), Country('US'))
-
-    def test_ne(self):
-        self.assertNotEqual(Country('GB'), Country('US'))
-        self.assertIsNotNone(Country('US'))
-
-    def test_hash(self):
-        self.assertEqual(hash(Country('US')), hash('US'))
-
-    def test_pickle(self):
-        for country in [Country('GB'), Country('US')]:
-            self.assertEqual(pickle.loads(pickle.dumps(country)), country)
-
-    def test_converter_name(self):
-        self.assertEqual(Country('US').name, 'UNITED STATES')
-        self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
-        self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
-        self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
-        self.assertEqual(len(country_converters['name'].codes), 249)
-
-
-class TestLanguage(TestCase, _Py26FixTestCase):
-    def test_languages(self):
-        self.assertEqual(len(LANGUAGES), 7874)
-
-    def test_wrong_language(self):
-        self.assertRaises(ValueError, lambda: Language('zzz'))
-
-    def test_unknown_language(self):
-        self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
-
-    def test_converter_alpha2(self):
-        self.assertEqual(Language('eng').alpha2, 'en')
-        self.assertEqual(Language.fromalpha2('en'), Language('eng'))
-        self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
-        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
-        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
-        self.assertEqual(len(language_converters['alpha2'].codes), 184)
-
-    def test_converter_alpha3b(self):
-        self.assertEqual(Language('fra').alpha3b, 'fre')
-        self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
-        self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
-        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
-        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
-        self.assertEqual(len(language_converters['alpha3b'].codes), 418)
-
-    def test_converter_alpha3t(self):
-        self.assertEqual(Language('fra').alpha3t, 'fra')
-        self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
-        self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
-        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
-        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
-        self.assertEqual(len(language_converters['alpha3t'].codes), 418)
-
-    def test_converter_name(self):
-        self.assertEqual(Language('eng').name, 'English')
-        self.assertEqual(Language.fromname('English'), Language('eng'))
-        self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
-        self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
-        self.assertEqual(len(language_converters['name'].codes), 7874)
-
-    def test_converter_scope(self):
-        self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
-        self.assertEqual(Language('eng').scope, 'individual')
-        self.assertEqual(Language('und').scope, 'special')
-
-    def test_converter_type(self):
-        self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
-        self.assertEqual(Language('eng').type, 'living')
-        self.assertEqual(Language('und').type, 'special')
-
-    def test_converter_opensubtitles(self):
-        self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
-        self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
-        self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
-        self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
-        self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
-        # Montenegrin is not recognized as an ISO language (yet?) but for now it is
-        # unofficially accepted as Serbian from Montenegro
-        self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
-        self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
-        self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
-        self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
-        self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
-
-        # test with all the LANGUAGES from the opensubtitles api
-        # downloaded from: http://www.opensubtitles.org/addons/export_languages.php
-        f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
-        f.readline()
-        for l in f:
-            idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
-            if not int(upload_enabled) and not int(web_enabled):
-                # do not test LANGUAGES that are too esoteric / not widely available
-                continue
-            self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
-            if alpha2:
-                self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
-        f.close()
-
-    def test_converter_opensubtitles_codes(self):
-        for code in language_converters['opensubtitles'].from_opensubtitles.keys():
-            self.assertIn(code, language_converters['opensubtitles'].codes)
-
-    def test_fromietf_country_script(self):
-        language = Language.fromietf('fra-FR-Latn')
-        self.assertEqual(language.alpha3, 'fra')
-        self.assertEqual(language.country, Country('FR'))
-        self.assertEqual(language.script, Script('Latn'))
-
-    def test_fromietf_country_no_script(self):
-        language = Language.fromietf('fra-FR')
-        self.assertEqual(language.alpha3, 'fra')
-        self.assertEqual(language.country, Country('FR'))
-        self.assertIsNone(language.script)
-
-    def test_fromietf_no_country_no_script(self):
-        language = Language.fromietf('fra-FR')
-        self.assertEqual(language.alpha3, 'fra')
-        self.assertEqual(language.country, Country('FR'))
-        self.assertIsNone(language.script)
-
-    def test_fromietf_no_country_script(self):
-        language = Language.fromietf('fra-Latn')
-        self.assertEqual(language.alpha3, 'fra')
-        self.assertIsNone(language.country)
-        self.assertEqual(language.script, Script('Latn'))
-
-    def test_fromietf_alpha2_language(self):
-        language = Language.fromietf('fr-Latn')
-        self.assertEqual(language.alpha3, 'fra')
-        self.assertIsNone(language.country)
-        self.assertEqual(language.script, Script('Latn'))
-
-    def test_fromietf_wrong_language(self):
-        self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
-
-    def test_fromietf_wrong_country(self):
-        self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
-
-    def test_fromietf_wrong_script(self):
-        self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
-
-    def test_eq(self):
-        self.assertEqual(Language('eng'), Language('eng'))
-
-    def test_ne(self):
-        self.assertNotEqual(Language('fra'), Language('eng'))
-        self.assertIsNotNone(Language('fra'))
-
-    def test_nonzero(self):
-        self.assertFalse(bool(Language('und')))
-        self.assertTrue(bool(Language('eng')))
-
-    def test_language_hasattr(self):
-        self.assertTrue(hasattr(Language('fra'), 'alpha3'))
-        self.assertTrue(hasattr(Language('fra'), 'alpha2'))
-        self.assertFalse(hasattr(Language('bej'), 'alpha2'))
-
-    def test_country_hasattr(self):
-        self.assertTrue(hasattr(Country('US'), 'name'))
-        self.assertTrue(hasattr(Country('FR'), 'alpha2'))
-        self.assertFalse(hasattr(Country('BE'), 'none'))
-
-    def test_country(self):
-        self.assertEqual(Language('por', 'BR').country, Country('BR'))
-        self.assertEqual(Language('eng', Country('US')).country, Country('US'))
-
-    def test_eq_with_country(self):
-        self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
-
-    def test_ne_with_country(self):
-        self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
-
-    def test_script(self):
-        self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
-        self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
-
-    def test_eq_with_script(self):
-        self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
-
-    def test_ne_with_script(self):
-        self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
-
-    def test_eq_with_country_and_script(self):
-        self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
-
-    def test_ne_with_country_and_script(self):
-        self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
-
-    def test_hash(self):
-        self.assertEqual(hash(Language('fra')), hash('fr'))
-        self.assertEqual(hash(Language('ace')), hash('ace'))
-        self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
-        self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
-        self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
-
-    def test_pickle(self):
-        for lang in [Language('fra'),
-                     Language('eng', 'US'),
-                     Language('srp', script='Latn'),
-                     Language('eng', 'US', 'Latn')]:
-            self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
-
-    def test_str(self):
-        self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
-        self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
-        self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
-
-    def test_register_converter(self):
-        class TestConverter(LanguageReverseConverter):
-            def __init__(self):
-                self.to_test = {'fra': 'test1', 'eng': 'test2'}
-                self.from_test = {'test1': 'fra', 'test2': 'eng'}
-
-            def convert(self, alpha3, country=None, script=None):
-                if alpha3 not in self.to_test:
-                    raise LanguageConvertError(alpha3, country, script)
-                return self.to_test[alpha3]
-
-            def reverse(self, test):
-                if test not in self.from_test:
-                    raise LanguageReverseError(test)
-                return (self.from_test[test], None)
-        language = Language('fra')
-        self.assertFalse(hasattr(language, 'test'))
-        language_converters['test'] = TestConverter()
-        self.assertTrue(hasattr(language, 'test'))
-        self.assertIn('test', language_converters)
-        self.assertEqual(Language('fra').test, 'test1')
-        self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
-        del language_converters['test']
-        self.assertNotIn('test', language_converters)
-        self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
-        self.assertRaises(AttributeError, lambda: Language('fra').test)
-
-
-def suite():
-    suite = TestSuite()
-    suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
-    suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
-    suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
-    return suite
-
-
-if __name__ == '__main__':
-    TextTestRunner().run(suite())
--- a/libs/flask_restful/init.py
+++ b/libs/flask_restful/init.py
@ -11,10 +11,12 @@ from werkzeug.wrappers import Response as ResponseBase
 from flask_restful.utils import http_status_message, unpack, OrderedDict
 from flask_restful.representations.json import output_json
 import sys
-from flask.helpers import _endpoint_from_view_func
 from types import MethodType
 import operator
-from collections import Mapping
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping


 __all__ = ('Api', 'Resource', 'marshal', 'marshal_with', 'marshal_with_field', 'abort')
@ -58,7 +60,7 @@ class Api(object):
        to handle 404 errors throughout your app
    :param serve_challenge_on_401: Whether to serve a challenge response to
        clients on receiving 401. This usually leads to a username/password
-        popup in web browers.
+        popup in web browsers.
    :param url_part_order: A string that controls the order that the pieces
        of the url are concatenated when the full url is constructed.  'b'
        is the blueprint (or blueprint registration) prefix, 'a' is the api
@ -153,7 +155,7 @@ class Api(object):
            rule = blueprint_setup.url_prefix + rule
        options.setdefault('subdomain', blueprint_setup.subdomain)
        if endpoint is None:
-            endpoint = _endpoint_from_view_func(view_func)
+            endpoint = view_func.__name__
        defaults = blueprint_setup.url_defaults
        if 'defaults' in options:
            defaults = dict(defaults, **options.pop('defaults'))
@ -287,6 +289,13 @@ class Api(object):

        headers = Headers()
        if isinstance(e, HTTPException):
+            if e.response is not None:
+                # If HTTPException is initialized with a response, then return e.get_response().
+                # This prevents specified error response from being overridden.
+                # eg. HTTPException(response=Response("Hello World"))
+                resp = e.get_response()
+                return resp
+
            code = e.code
            default_data = {
                'message': getattr(e, 'description', http_status_message(code))
--- a/libs/flask_restful/version.py
+++ b/libs/flask_restful/version.py
@ -1,3 +1,3 @@
 #!/usr/bin/env python

-__version__ = '0.3.7'
+__version__ = '0.3.9'
--- a/libs/flask_restful/fields.py
+++ b/libs/flask_restful/fields.py
@ -1,6 +1,4 @@
-from datetime import datetime
 from calendar import timegm
-import pytz
 from decimal import Decimal as MyDecimal, ROUND_HALF_EVEN
 from email.utils import formatdate
 import six
@ -9,8 +7,7 @@ try:
 except ImportError:
    # python3
    from urllib.parse import urlparse, urlunparse
-
-from flask_restful import inputs, marshal
+from flask_restful import marshal
 from flask import url_for, request

 __all__ = ["String", "FormattedString", "Url", "DateTime", "Float",
--- a/libs/flask_restful/inputs.py
+++ b/libs/flask_restful/inputs.py
@ -269,7 +269,7 @@ def datetime_from_rfc822(datetime_str):


 def datetime_from_iso8601(datetime_str):
-    """Turns an ISO8601 formatted date into a datetime object.
+    """Turns an ISO8601 formatted datetime into a datetime object.

    Example::

--- a/libs/flask_restful/reqparse.py
+++ b/libs/flask_restful/reqparse.py
@ -1,6 +1,9 @@
 from copy import deepcopy

-import collections
+try:
+    from collections.abc import MutableSequence
+except ImportError:
+    from collections import MutableSequence
 from flask import current_app, request
 from werkzeug.datastructures import MultiDict, FileStorage
 from werkzeug import exceptions
@ -146,7 +149,7 @@ class Argument(object):
        except TypeError:
            try:
                if self.type is decimal.Decimal:
-                    return self.type(str(value), self.name)
+                    return self.type(str(value))
                else:
                    return self.type(value, self.name)
            except TypeError:
@ -194,7 +197,7 @@ class Argument(object):
                    values = source.getlist(name)
                else:
                    values = source.get(name)
-                    if not (isinstance(values, collections.MutableSequence) and self.action == 'append'):
+                    if not (isinstance(values, MutableSequence) and self.action == 'append'):
                        values = [values]

                for value in values:
--- a/libs/flask_restful/utils/init.py
+++ b/libs/flask_restful/utils/init.py
@ -1,9 +1,9 @@
 import sys

 try:
-    from collections import OrderedDict
+    from collections.abc import OrderedDict
 except ImportError:
-    from ordereddict import OrderedDict
+    from collections import OrderedDict

 from werkzeug.http import HTTP_STATUS_CODES

--- a/libs/future/init.py
+++ b/libs/future/init.py
@ -68,7 +68,7 @@ See: http://python-future.org
 Credits
 -------

-:Author:  Ed Schofield
+:Author:  Ed Schofield, Jordan M. Adler, et al
 :Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
          Ltd, Singapore. http://pythoncharmers.com
 :Others:  See docs/credits.rst or http://python-future.org/credits.html
@ -76,7 +76,7 @@ Credits

 Licensing
 ---------
-Copyright 2013-2018 Python Charmers Pty Ltd, Australia.
+Copyright 2013-2019 Python Charmers Pty Ltd, Australia.
 The software is distributed under an MIT licence. See LICENSE.txt.

 """
@ -84,10 +84,10 @@ The software is distributed under an MIT licence. See LICENSE.txt.
 __title__ = 'future'
 __author__ = 'Ed Schofield'
 __license__ = 'MIT'
-__copyright__ = 'Copyright 2013-2018 Python Charmers Pty Ltd'
+__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd'
 __ver_major__ = 0
-__ver_minor__ = 17
-__ver_patch__ = 0
+__ver_minor__ = 18
+__ver_patch__ = 2
 __ver_sub__ = ''
 __version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
                              __ver_patch__, __ver_sub__)
--- a/libs/future/backports/init.py
+++ b/libs/future/backports/init.py
@ -10,7 +10,7 @@ __future_module__ = True
 from future.standard_library import import_top_level_modules


-if sys.version_info[0] == 3:
+if sys.version_info[0] >= 3:
    import_top_level_modules()


--- a/libs/future/backports/email/message.py
+++ b/libs/future/backports/email/message.py
@ -800,7 +800,7 @@ class Message(object):
            # There was no Content-Type header, and we don't know what type
            # to set it to, so raise an exception.
            raise errors.HeaderParseError('No Content-Type header found')
-        newparams = []
+        newparams = list()
        foundp = False
        for pk, pv in params:
            if pk.lower() == 'boundary':
@ -814,10 +814,10 @@ class Message(object):
            # instead???
            newparams.append(('boundary', '"%s"' % boundary))
        # Replace the existing Content-Type header with the new value
-        newheaders = []
+        newheaders = list()
        for h, v in self._headers:
            if h.lower() == 'content-type':
-                parts = []
+                parts = list()
                for k, v in newparams:
                    if v == '':
                        parts.append(k)
--- a/libs/future/backports/http/client.py
+++ b/libs/future/backports/http/client.py
@ -79,11 +79,15 @@ from future.backports.misc import create_connection as socket_create_connection
 import io
 import os
 import socket
-import collections
 from future.backports.urllib.parse import urlsplit
 import warnings
 from array import array

+if PY2:
+    from collections import Iterable
+else:
+    from collections.abc import Iterable
+
 __all__ = ["HTTPResponse", "HTTPConnection",
           "HTTPException", "NotConnected", "UnknownProtocol",
           "UnknownTransferEncoding", "UnimplementedFileMode",
@ -696,9 +700,19 @@ class HTTPResponse(io.RawIOBase):
        while total_bytes < len(b):
            if MAXAMOUNT < len(mvb):
                temp_mvb = mvb[0:MAXAMOUNT]
-                n = self.fp.readinto(temp_mvb)
+                if PY2:
+                    data = self.fp.read(len(temp_mvb))
+                    n = len(data)
+                    temp_mvb[:n] = data
+                else:
+                    n = self.fp.readinto(temp_mvb)
            else:
-                n = self.fp.readinto(mvb)
+                if PY2:
+                    data = self.fp.read(len(mvb))
+                    n = len(data)
+                    mvb[:n] = data
+                else:
+                    n = self.fp.readinto(mvb)
            if not n:
                raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
            mvb = mvb[n:]
@ -892,7 +906,7 @@ class HTTPConnection(object):
        try:
            self.sock.sendall(data)
        except TypeError:
-            if isinstance(data, collections.Iterable):
+            if isinstance(data, Iterable):
                for d in data:
                    self.sock.sendall(d)
            else:
--- a/libs/future/backports/http/cookiejar.py
+++ b/libs/future/backports/http/cookiejar.py
@ -33,7 +33,7 @@ from __future__ import print_function
 from __future__ import division
 from __future__ import absolute_import
 from future.builtins import filter, int, map, open, str
-from future.utils import as_native_str
+from future.utils import as_native_str, PY2

 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
           'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
@ -41,7 +41,8 @@ __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
 import copy
 import datetime
 import re
-re.ASCII = 0
+if PY2:
+    re.ASCII = 0
 import time
 from future.backports.urllib.parse import urlparse, urlsplit, quote
 from future.backports.http.client import HTTP_PORT
--- a/libs/future/backports/http/cookies.py
+++ b/libs/future/backports/http/cookies.py
@ -138,7 +138,8 @@ from future.utils import PY2, as_native_str
 # Import our required modules
 #
 import re
-re.ASCII = 0    # for py2 compatibility
+if PY2:
+    re.ASCII = 0    # for py2 compatibility
 import string

 __all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
--- a/libs/future/backports/misc.py
+++ b/libs/future/backports/misc.py
@ -16,7 +16,6 @@ from __future__ import absolute_import

 import subprocess
 from math import ceil as oldceil
-from collections import Mapping, MutableMapping

 from operator import itemgetter as _itemgetter, eq as _eq
 import sys
@ -25,7 +24,12 @@ from _weakref import proxy as _proxy
 from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
 from socket import getaddrinfo, SOCK_STREAM, error, socket

-from future.utils import iteritems, itervalues, PY26, PY3
+from future.utils import iteritems, itervalues, PY2, PY26, PY3
+
+if PY2:
+    from collections import Mapping, MutableMapping
+else:
+    from collections.abc import Mapping, MutableMapping


 def ceil(x):
--- a/libs/future/backports/test/pystone.py
+++ b/libs/future/backports/test/pystone.py
--- a/libs/future/backports/urllib/request.py
+++ b/libs/future/backports/urllib/request.py
@ -109,11 +109,17 @@ import re
 import socket
 import sys
 import time
-import collections
 import tempfile
 import contextlib
 import warnings

+from future.utils import PY2
+
+if PY2:
+    from collections import Iterable
+else:
+    from collections.abc import Iterable
+
 # check for SSL
 try:
    import ssl
@ -1221,7 +1227,7 @@ class AbstractHTTPHandler(BaseHandler):
                        mv = memoryview(data)
                        size = len(mv) * mv.itemsize
                except TypeError:
-                    if isinstance(data, collections.Iterable):
+                    if isinstance(data, Iterable):
                        raise ValueError("Content-Length should be specified "
                                "for iterable data of type %r %r" % (type(data),
                                data))
--- a/libs/future/builtins/init.py
+++ b/libs/future/builtins/init.py
@ -11,7 +11,7 @@ from future.builtins.iterators import (filter, map, zip)
 # The isinstance import is no longer needed. We provide it only for
 # backward-compatibility with future v0.8.2. It will be removed in future v1.0.
 from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
-                                  oct, open, pow, round, super)
+                                  oct, open, pow, round, super, max, min)
 from future.utils import PY3

 if PY3:
@ -43,7 +43,7 @@ if not utils.PY3:
    __all__ = ['filter', 'map', 'zip',
               'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
               'round', 'super',
-               'bytes', 'dict', 'int', 'list', 'object', 'range', 'str',
+               'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min'
              ]

 else:
--- a/libs/future/builtins/misc.py
+++ b/libs/future/builtins/misc.py
@ -13,6 +13,8 @@ The builtin functions are:
 - ``open`` (equivalent to io.open on Py2)
 - ``super`` (backport of Py3's magic zero-argument super() function
 - ``round`` (new "Banker's Rounding" behaviour from Py3)
+- ``max`` (new default option from Py3.4)
+- ``min`` (new default option from Py3.4)

 ``isinstance`` is also currently exported for backwards compatibility
 with v0.8.2, although this has been deprecated since v0.9.
@ -59,6 +61,8 @@ if utils.PY2:
    from future.builtins.newnext import newnext as next
    from future.builtins.newround import newround as round
    from future.builtins.newsuper import newsuper as super
+    from future.builtins.new_min_max import newmax as max
+    from future.builtins.new_min_max import newmin as min
    from future.types.newint import newint

    _SENTINEL = object()
@ -89,11 +93,12 @@ if utils.PY2:
            else:
                return _builtin_pow(x+0j, y, z)

+
    # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
    #     callable = __builtin__.callable

    __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
-               'open', 'pow', 'round', 'super']
+               'open', 'pow', 'round', 'super', 'max', 'min']

 else:
    import builtins
@ -109,8 +114,14 @@ else:
    pow = builtins.pow
    round = builtins.round
    super = builtins.super
-
-    __all__ = []
+    if utils.PY34_PLUS:
+        max = builtins.max
+        min = builtins.min
+        __all__ = []
+    else:
+        from future.builtins.new_min_max import newmax as max
+        from future.builtins.new_min_max import newmin as min
+        __all__ = ['min', 'max']

    # The callable() function was removed from Py3.0 and 3.1 and
    # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
--- a/libs/future/builtins/new_min_max.py
+++ b/libs/future/builtins/new_min_max.py
@ -0,0 +1,59 @@
+import itertools
+
+from future import utils
+if utils.PY2:
+    from __builtin__ import max as _builtin_max, min as _builtin_min
+else:
+    from builtins import max as _builtin_max, min as _builtin_min
+
+_SENTINEL = object()
+
+
+def newmin(*args, **kwargs):
+    return new_min_max(_builtin_min, *args, **kwargs)
+
+
+def newmax(*args, **kwargs):
+    return new_min_max(_builtin_max, *args, **kwargs)
+
+
+def new_min_max(_builtin_func, *args, **kwargs):
+    """
+    To support the argument "default" introduced in python 3.4 for min and max
+    :param _builtin_func: builtin min or builtin max
+    :param args:
+    :param kwargs:
+    :return: returns the min or max based on the arguments passed
+    """
+
+    for key, _ in kwargs.items():
+        if key not in set(['key', 'default']):
+            raise TypeError('Illegal argument %s', key)
+
+    if len(args) == 0:
+        raise TypeError
+
+    if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL:
+        raise TypeError
+
+    if len(args) == 1:
+        iterator = iter(args[0])
+        try:
+            first = next(iterator)
+        except StopIteration:
+            if kwargs.get('default', _SENTINEL) is not _SENTINEL:
+                return kwargs.get('default')
+            else:
+                raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__))
+        else:
+            iterator = itertools.chain([first], iterator)
+        if kwargs.get('key') is not None:
+            return _builtin_func(iterator, key=kwargs.get('key'))
+        else:
+            return _builtin_func(iterator)
+
+    if len(args) > 1:
+        if kwargs.get('key') is not None:
+            return _builtin_func(args, key=kwargs.get('key'))
+        else:
+            return _builtin_func(args)
--- a/libs/future/builtins/newround.py
+++ b/libs/future/builtins/newround.py
@ -38,11 +38,14 @@ def newround(number, ndigits=None):
        if 'numpy' in repr(type(number)):
            number = float(number)

-    if not PY26:
-        d = Decimal.from_float(number).quantize(exponent,
-                                            rounding=ROUND_HALF_EVEN)
+    if isinstance(number, Decimal):
+        d = number
    else:
-        d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
+        if not PY26:
+            d = Decimal.from_float(number).quantize(exponent,
+                                                rounding=ROUND_HALF_EVEN)
+        else:
+            d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)

    if return_int:
        return int(d)
--- a/libs/future/moves/init.py
+++ b/libs/future/moves/init.py
@ -4,5 +4,5 @@ import sys
 __future_module__ = True
 from future.standard_library import import_top_level_modules

-if sys.version_info[0] == 3:
+if sys.version_info[0] >= 3:
    import_top_level_modules()
--- a/libs/future/moves/copyreg.py
+++ b/libs/future/moves/copyreg.py
@ -2,7 +2,11 @@ from __future__ import absolute_import
 from future.utils import PY3

 if PY3:
-    from copyreg import *
+    import copyreg, sys
+    # A "*" import uses Python 3's copyreg.__all__ which does not include
+    # all public names in the API surface for copyreg, this avoids that
+    # problem by just making our module _be_ a reference to the actual module.
+    sys.modules['future.moves.copyreg'] = copyreg
 else:
    __future_module__ = True
    from copy_reg import *
--- a/libs/future/moves/urllib/request.py
+++ b/libs/future/moves/urllib/request.py
@ -11,19 +11,8 @@ if PY3:
                                proxy_bypass,
                                quote,
                                request_host,
-                                splitattr,
-                                splithost,
-                                splitpasswd,
-                                splitport,
-                                splitquery,
-                                splittag,
-                                splittype,
-                                splituser,
-                                splitvalue,
                                thishost,
-                                to_bytes,
                                unquote,
-                                unwrap,
                                url2pathname,
                                urlcleanup,
                                urljoin,
@ -32,6 +21,18 @@ if PY3:
                                urlretrieve,
                                urlsplit,
                                urlunparse)
+
+    from urllib.parse import (splitattr,
+                              splithost,
+                              splitpasswd,
+                              splitport,
+                              splitquery,
+                              splittag,
+                              splittype,
+                              splituser,
+                              splitvalue,
+                              to_bytes,
+                              unwrap)
 else:
    __future_module__ = True
    with suspend_hooks():
--- a/libs/future/tests/base.py
+++ b/libs/future/tests/base.py
@ -272,7 +272,11 @@ class CodeHandler(unittest.TestCase):
        else:
            headers = ''

-        self.compare(output, headers + reformat_code(expected),
+        reformatted = reformat_code(expected)
+        if headers in reformatted:
+            headers = ''
+
+        self.compare(output, headers + reformatted,
                     ignore_imports=ignore_imports)

    def unchanged(self, code, **kwargs):
@ -338,6 +342,10 @@ class CodeHandler(unittest.TestCase):
                        '----\n%s\n----' % f.read(),
                    )
            ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
+
+            if not hasattr(e, 'output'):
+                # The attribute CalledProcessError.output doesn't exist on Py2.6
+                e.output = None
            raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
        return output

--- a/libs/future/types/newbytes.py
+++ b/libs/future/types/newbytes.py
@ -5,15 +5,19 @@ Why do this? Without it, the Python 2 bytes object is a very, very
 different beast to the Python 3 bytes object.
 """

-from collections import Iterable
 from numbers import Integral
 import string
 import copy

-from future.utils import istext, isbytes, PY3, with_metaclass
+from future.utils import istext, isbytes, PY2, PY3, with_metaclass
 from future.types import no, issubset
 from future.types.newobject import newobject

+if PY2:
+    from collections import Iterable
+else:
+    from collections.abc import Iterable
+

 _builtin_bytes = bytes

--- a/libs/future/types/newint.py
+++ b/libs/future/types/newint.py
@ -8,7 +8,6 @@ They are very similar. The most notable difference is:
 from __future__ import division

 import struct
-import collections

 from future.types.newbytes import newbytes
 from future.types.newobject import newobject
@ -17,6 +16,9 @@ from future.utils import PY3, isint, istext, isbytes, with_metaclass, native

 if PY3:
    long = int
+    from collections.abc import Iterable
+else:
+    from collections import Iterable


 class BaseNewInt(type):
@ -356,7 +358,7 @@ class newint(with_metaclass(BaseNewInt, long)):
            raise TypeError("cannot convert unicode objects to bytes")
        # mybytes can also be passed as a sequence of integers on Py3.
        # Test for this:
-        elif isinstance(mybytes, collections.Iterable):
+        elif isinstance(mybytes, Iterable):
            mybytes = newbytes(mybytes)
        b = mybytes if byteorder == 'big' else mybytes[::-1]
        if len(b) == 0:
--- a/libs/future/types/newmemoryview.py
+++ b/libs/future/types/newmemoryview.py
@ -1,14 +1,16 @@
 """
 A pretty lame implementation of a memoryview object for Python 2.6.
 """
-
-from collections import Iterable
 from numbers import Integral
 import string

-from future.utils import istext, isbytes, PY3, with_metaclass
+from future.utils import istext, isbytes, PY2, with_metaclass
 from future.types import no, issubset

+if PY2:
+    from collections import Iterable
+else:
+    from collections.abc import Iterable

 # class BaseNewBytes(type):
 #     def __instancecheck__(cls, instance):
--- a/libs/future/types/newobject.py
+++ b/libs/future/types/newobject.py
@ -112,5 +112,6 @@ class newobject(object):
        """
        return object(self)

+    __slots__ = []

 __all__ = ['newobject']
--- a/libs/future/types/newrange.py
+++ b/libs/future/types/newrange.py
@ -19,7 +19,12 @@ From Dan Crosta's README:
 """
 from __future__ import absolute_import

-from collections import Sequence, Iterator
+from future.utils import PY2
+
+if PY2:
+    from collections import Sequence, Iterator
+else:
+    from collections.abc import Sequence, Iterator
 from itertools import islice

 from future.backports.misc import count   # with step parameter on Py2.6
--- a/libs/future/types/newstr.py
+++ b/libs/future/types/newstr.py
@ -40,7 +40,6 @@ representations of your objects portably across Py3 and Py2, use the

 """

-from collections import Iterable
 from numbers import Number

 from future.utils import PY3, istext, with_metaclass, isnewbytes
@ -51,6 +50,9 @@ from future.types.newobject import newobject
 if PY3:
    # We'll probably never use newstr on Py3 anyway...
    unicode = str
+    from collections.abc import Iterable
+else:
+    from collections import Iterable


 class BaseNewStr(type):
@ -105,6 +107,7 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
        """
        Without the u prefix
        """
+
        value = super(newstr, self).__repr__()
        # assert value[0] == u'u'
        return value[1:]
@ -290,7 +293,14 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
            isinstance(other, bytes) and not isnewbytes(other)):
            return super(newstr, self).__eq__(other)
        else:
-            return False
+            return NotImplemented
+
+    def __hash__(self):
+        if (isinstance(self, unicode) or
+            isinstance(self, bytes) and not isnewbytes(self)):
+            return super(newstr, self).__hash__()
+        else:
+            raise NotImplementedError()

    def __ne__(self, other):
        if (isinstance(other, unicode) or
--- a/libs/future/utils/init.py
+++ b/libs/future/utils/init.py
@ -18,8 +18,10 @@ This module exports useful functions for 2/3 compatible code:
    * types:

        * text_type: unicode in Python 2, str in Python 3
-        * binary_type: str in Python 2, bytes in Python 3
        * string_types: basestring in Python 2, str in Python 3
+        * binary_type: str in Python 2, bytes in Python 3
+        * integer_types: (int, long) in Python 2, int in Python 3
+        * class_types: (type, types.ClassType) in Python 2, type in Python 3

    * bchr(c):
        Take an integer and make a 1-character byte string
@ -55,7 +57,8 @@ import copy
 import inspect


-PY3 = sys.version_info[0] == 3
+PY3 = sys.version_info[0] >= 3
+PY34_PLUS = sys.version_info[0:2] >= (3, 4)
 PY35_PLUS = sys.version_info[0:2] >= (3, 5)
 PY36_PLUS = sys.version_info[0:2] >= (3, 6)
 PY2 = sys.version_info[0] == 2
@ -405,12 +408,34 @@ if PY3:
        allows re-raising exceptions with the cls value and traceback on
        Python 2 and 3.
        """
-        if value is not None and isinstance(tp, Exception):
-            raise TypeError("instance exception may not have a separate value")
-        if value is not None:
-            exc = tp(value)
-        else:
+        if isinstance(tp, BaseException):
+            # If the first object is an instance, the type of the exception
+            # is the class of the instance, the instance itself is the value,
+            # and the second object must be None.
+            if value is not None:
+                raise TypeError("instance exception may not have a separate value")
            exc = tp
+        elif isinstance(tp, type) and not issubclass(tp, BaseException):
+            # If the first object is a class, it becomes the type of the
+            # exception.
+            raise TypeError("class must derive from BaseException, not %s" % tp.__name__)
+        else:
+            # The second object is used to determine the exception value: If it
+            # is an instance of the class, the instance becomes the exception
+            # value. If the second object is a tuple, it is used as the argument
+            # list for the class constructor; if it is None, an empty argument
+            # list is used, and any other object is treated as a single argument
+            # to the constructor. The instance so created by calling the
+            # constructor is used as the exception value.
+            if isinstance(value, tp):
+                exc = value
+            elif isinstance(value, tuple):
+                exc = tp(*value)
+            elif value is None:
+                exc = tp()
+            else:
+                exc = tp(value)
+
        if exc.__traceback__ is not tb:
            raise exc.with_traceback(tb)
        raise exc
@ -443,12 +468,14 @@ else:
        e.__suppress_context__ = False
        if isinstance(cause, type) and issubclass(cause, Exception):
            e.__cause__ = cause()
+            e.__cause__.__traceback__ = sys.exc_info()[2]
            e.__suppress_context__ = True
        elif cause is None:
            e.__cause__ = None
            e.__suppress_context__ = True
        elif isinstance(cause, BaseException):
            e.__cause__ = cause
+            object.__setattr__(e.__cause__,  '__traceback__', sys.exc_info()[2])
            e.__suppress_context__ = True
        else:
            raise TypeError("exception causes must derive from BaseException")
@ -552,15 +579,14 @@ def isbytes(obj):

 def isnewbytes(obj):
    """
-    Equivalent to the result of ``isinstance(obj, newbytes)`` were
-    ``__instancecheck__`` not overridden on the newbytes subclass. In
-    other words, it is REALLY a newbytes instance, not a Py2 native str
+    Equivalent to the result of ``type(obj)  == type(newbytes)``
+    in other words, it is REALLY a newbytes instance, not a Py2 native str
    object?
+
+    Note that this does not cover subclasses of newbytes, and it is not
+    equivalent to ininstance(obj, newbytes)
    """
-    # TODO: generalize this so that it works with subclasses of newbytes
-    # Import is here to avoid circular imports:
-    from future.types.newbytes import newbytes
-    return type(obj) == newbytes
+    return type(obj).__name__ == 'newbytes'


 def isint(obj):
@ -726,16 +752,16 @@ else:


 __all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
-           'as_native_str', 'bind_method', 'bord', 'bstr',
-           'bytes_to_native_str', 'encode_filename', 'ensure_new_type',
-           'exec_', 'get_next', 'getexception', 'implements_iterator',
-           'is_new_style', 'isbytes', 'isidentifier', 'isint',
-           'isnewbytes', 'istext', 'iteritems', 'iterkeys', 'itervalues',
-           'lfilter', 'listitems', 'listvalues', 'lmap', 'lrange',
-           'lzip', 'native', 'native_bytes', 'native_str',
+           'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr',
+           'bytes_to_native_str', 'class_types', 'encode_filename',
+           'ensure_new_type', 'exec_', 'get_next', 'getexception',
+           'implements_iterator', 'integer_types', 'is_new_style', 'isbytes',
+           'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems',
+           'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues',
+           'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str',
           'native_str_to_bytes', 'old_div',
           'python_2_unicode_compatible', 'raise_',
-           'raise_with_traceback', 'reraise', 'text_to_native_str',
-           'tobytes', 'viewitems', 'viewkeys', 'viewvalues',
-           'with_metaclass'
-          ]
+           'raise_with_traceback', 'reraise', 'string_types',
+           'text_to_native_str', 'text_type', 'tobytes', 'viewitems',
+           'viewkeys', 'viewvalues', 'with_metaclass'
+           ]
--- a/libs/html5lib/init.py
+++ b/libs/html5lib/init.py
@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",

 # this has to be at the top level, see how setup.py parses this
 #: Distribution version number.
-__version__ = "1.0.1"
+__version__ = "1.1"
--- a/libs/html5lib/_ihatexml.py
+++ b/libs/html5lib/_ihatexml.py
@ -136,6 +136,7 @@ def normaliseCharList(charList):
        i += j
    return rv

+
 # We don't really support characters above the BMP :(
 max_unicode = int("FFFF", 16)

@ -254,7 +255,7 @@ class InfosetFilter(object):
        nameRest = name[1:]
        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
        if m:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
            nameFirstOutput = self.getReplacementCharacter(nameFirst)
        else:
            nameFirstOutput = nameFirst
@ -262,7 +263,7 @@ class InfosetFilter(object):
        nameRestOutput = nameRest
        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
        for char in replaceChars:
-            warnings.warn("Coercing non-XML name", DataLossWarning)
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
            replacement = self.getReplacementCharacter(char)
            nameRestOutput = nameRestOutput.replace(char, replacement)
        return nameFirstOutput + nameRestOutput
--- a/libs/html5lib/_inputstream.py
+++ b/libs/html5lib/_inputstream.py
@ -1,10 +1,11 @@
 from __future__ import absolute_import, division, unicode_literals

-from six import text_type, binary_type
+from six import text_type
 from six.moves import http_client, urllib

 import codecs
 import re
+from io import BytesIO, StringIO

 import webencodings

@ -12,13 +13,6 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
 from .constants import _ReparseException
 from . import _utils

-from io import StringIO
-
-try:
-    from io import BytesIO
-except ImportError:
-    BytesIO = StringIO
-
 # Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
@ -40,13 +34,13 @@ if _utils.supports_lone_surrogates:
 else:
    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)

-non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
-                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
-                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
-                                  0x10FFFE, 0x10FFFF])
+non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                              0x10FFFE, 0x10FFFF}

 ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")

@ -367,7 +361,7 @@ class HTMLUnicodeInputStream(object):
    def unget(self, char):
        # Only one character is allowed to be ungotten at once - it must
        # be consumed again before any further call to unget
-        if char is not None:
+        if char is not EOF:
            if self.chunkOffset == 0:
                # unget is called quite rarely, so it's a good idea to do
                # more work here if it saves a bit of work in the frequently
@ -449,7 +443,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):

        try:
            stream.seek(stream.tell())
-        except:  # pylint:disable=bare-except
+        except Exception:
            stream = BufferedStream(stream)

        return stream
@ -461,7 +455,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        if charEncoding[0] is not None:
            return charEncoding

-        # If we've been overriden, we've been overriden
+        # If we've been overridden, we've been overridden
        charEncoding = lookupEncoding(self.override_encoding), "certain"
        if charEncoding[0] is not None:
            return charEncoding
@ -664,9 +658,7 @@ class EncodingBytes(bytes):
        """Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone"""
-        p = self.position
-        data = self[p:p + len(bytes)]
-        rv = data.startswith(bytes)
+        rv = self.startswith(bytes, self.position)
        if rv:
            self.position += len(bytes)
        return rv
@ -674,15 +666,11 @@ class EncodingBytes(bytes):
    def jumpTo(self, bytes):
        """Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the match"""
-        newPosition = self[self.position:].find(bytes)
-        if newPosition > -1:
-            # XXX: This is ugly, but I can't see a nicer way to fix this.
-            if self._position == -1:
-                self._position = 0
-            self._position += (newPosition + len(bytes) - 1)
-            return True
-        else:
+        try:
+            self._position = self.index(bytes, self.position) + len(bytes) - 1
+        except ValueError:
            raise StopIteration
+        return True


 class EncodingParser(object):
@ -694,6 +682,9 @@ class EncodingParser(object):
        self.encoding = None

    def getEncoding(self):
+        if b"<meta" not in self.data:
+            return None
+
        methodDispatch = (
            (b"<!--", self.handleComment),
            (b"<meta", self.handleMeta),
@ -703,6 +694,10 @@ class EncodingParser(object):
            (b"<", self.handlePossibleStartTag))
        for _ in self.data:
            keepParsing = True
+            try:
+                self.data.jumpTo(b"<")
+            except StopIteration:
+                break
            for key, method in methodDispatch:
                if self.data.matchBytes(key):
                    try:
@ -908,7 +903,7 @@ class ContentAttrParser(object):
 def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, binary_type):
+    if isinstance(encoding, bytes):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
--- a/libs/html5lib/_tokenizer.py
+++ b/libs/html5lib/_tokenizer.py
@ -2,7 +2,8 @@ from __future__ import absolute_import, division, unicode_literals

 from six import unichr as chr

-from collections import deque
+from collections import deque, OrderedDict
+from sys import version_info

 from .constants import spaceCharacters
 from .constants import entities
@ -17,6 +18,11 @@ from ._trie import Trie

 entitiesTrie = Trie(entities)

+if version_info >= (3, 7):
+    attributeMap = dict
+else:
+    attributeMap = OrderedDict
+

 class HTMLTokenizer(object):
    """ This class takes care of tokenizing HTML.
@ -228,6 +234,14 @@ class HTMLTokenizer(object):
        # Add token to the queue to be yielded
        if (token["type"] in tagTokenTypes):
            token["name"] = token["name"].translate(asciiUpper2Lower)
+            if token["type"] == tokenTypes["StartTag"]:
+                raw = token["data"]
+                data = attributeMap(raw)
+                if len(raw) > len(data):
+                    # we had some duplicated attribute, fix so first wins
+                    data.update(raw[::-1])
+                token["data"] = data
+
            if token["type"] == tokenTypes["EndTag"]:
                if token["data"]:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
--- a/libs/html5lib/_trie/init.py
+++ b/libs/html5lib/_trie/init.py
@ -1,14 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals

-from .py import Trie as PyTrie
+from .py import Trie

-Trie = PyTrie
-
-# pylint:disable=wrong-import-position
-try:
-    from .datrie import Trie as DATrie
-except ImportError:
-    pass
-else:
-    Trie = DATrie
-# pylint:enable=wrong-import-position
+__all__ = ["Trie"]
--- a/libs/html5lib/_trie/_base.py
+++ b/libs/html5lib/_trie/_base.py
@ -1,6 +1,9 @@
 from __future__ import absolute_import, division, unicode_literals

-from collections import Mapping
+try:
+    from collections.abc import Mapping
+except ImportError:  # Python 2.7
+    from collections import Mapping


 class Trie(Mapping):
--- a/libs/html5lib/_trie/datrie.py
+++ b/libs/html5lib/_trie/datrie.py
@ -1,44 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from datrie import Trie as DATrie
-from six import text_type
-
-from ._base import Trie as ABCTrie
-
-
-class Trie(ABCTrie):
-    def __init__(self, data):
-        chars = set()
-        for key in data.keys():
-            if not isinstance(key, text_type):
-                raise TypeError("All keys must be strings")
-            for char in key:
-                chars.add(char)
-
-        self._data = DATrie("".join(chars))
-        for key, value in data.items():
-            self._data[key] = value
-
-    def __contains__(self, key):
-        return key in self._data
-
-    def __len__(self):
-        return len(self._data)
-
-    def __iter__(self):
-        raise NotImplementedError()
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def keys(self, prefix=None):
-        return self._data.keys(prefix)
-
-    def has_keys_with_prefix(self, prefix):
-        return self._data.has_keys_with_prefix(prefix)
-
-    def longest_prefix(self, prefix):
-        return self._data.longest_prefix(prefix)
-
-    def longest_prefix_item(self, prefix):
-        return self._data.longest_prefix_item(prefix)
--- a/libs/html5lib/_utils.py
+++ b/libs/html5lib/_utils.py
@ -2,12 +2,20 @@ from __future__ import absolute_import, division, unicode_literals

 from types import ModuleType

-from six import text_type
-
 try:
-    import xml.etree.cElementTree as default_etree
+    from collections.abc import Mapping
 except ImportError:
+    from collections import Mapping
+
+from six import text_type, PY3
+
+if PY3:
    import xml.etree.ElementTree as default_etree
+else:
+    try:
+        import xml.etree.cElementTree as default_etree
+    except ImportError:
+        import xml.etree.ElementTree as default_etree


 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@ -27,7 +35,7 @@ try:
        # We need this with u"" because of http://bugs.jython.org/issue2039
        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
        assert isinstance(_x, text_type)
-except:  # pylint:disable=bare-except
+except Exception:
    supports_lone_surrogates = False
 else:
    supports_lone_surrogates = True
@ -47,9 +55,6 @@ class MethodDispatcher(dict):
    """

    def __init__(self, items=()):
-        # Using _dictEntries instead of directly assigning to self is about
-        # twice as fast. Please do careful performance testing before changing
-        # anything here.
        _dictEntries = []
        for name, value in items:
            if isinstance(name, (list, tuple, frozenset, set)):
@ -64,6 +69,36 @@ class MethodDispatcher(dict):
    def __getitem__(self, key):
        return dict.get(self, key, self.default)

+    def __get__(self, instance, owner=None):
+        return BoundMethodDispatcher(instance, self)
+
+
+class BoundMethodDispatcher(Mapping):
+    """Wraps a MethodDispatcher, binding its return values to `instance`"""
+    def __init__(self, instance, dispatcher):
+        self.instance = instance
+        self.dispatcher = dispatcher
+
+    def __getitem__(self, key):
+        # see https://docs.python.org/3/reference/datamodel.html#object.__get__
+        # on a function, __get__ is used to bind a function to an instance as a bound method
+        return self.dispatcher[key].__get__(self.instance)
+
+    def get(self, key, default):
+        if key in self.dispatcher:
+            return self[key]
+        else:
+            return default
+
+    def __iter__(self):
+        return iter(self.dispatcher)
+
+    def __len__(self):
+        return len(self.dispatcher)
+
+    def __contains__(self, key):
+        return key in self.dispatcher
+

 # Some utility functions to deal with weirdness around UCS2 vs UCS4
 # python builds
--- a/libs/html5lib/constants.py
+++ b/libs/html5lib/constants.py
@ -519,8 +519,8 @@ adjustForeignAttributes = {
    "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
 }

-unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
-                                  adjustForeignAttributes.items()])
+unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
+                             adjustForeignAttributes.items()}

 spaceCharacters = frozenset([
    "\t",
@ -544,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)

-asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
-                         for c in string.ascii_uppercase])
+asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}

 # Heading elements need to be ordered
 headingElements = (
@ -2934,7 +2933,7 @@ tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
                           tokenTypes["EmptyTag"]])


-prefixes = dict([(v, k) for k, v in namespaces.items()])
+prefixes = {v: k for k, v in namespaces.items()}
 prefixes["http://www.w3.org/1998/Math/MathML"] = "math"


--- a/libs/html5lib/filters/sanitizer.py
+++ b/libs/html5lib/filters/sanitizer.py
@ -1,6 +1,15 @@
+"""Deprecated from html5lib 1.1.
+
+See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
+information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
+is recommended as a replacement. Please let us know in the aforementioned issue
+if Bleach is unsuitable for your needs.
+
+"""
 from __future__ import absolute_import, division, unicode_literals

 import re
+import warnings
 from xml.sax.saxutils import escape, unescape

 from six.moves import urllib_parse as urlparse
@ -11,6 +20,14 @@ from ..constants import namespaces, prefixes
 __all__ = ["Filter"]


+_deprecation_msg = (
+    "html5lib's sanitizer is deprecated; see " +
+    "https://github.com/html5lib/html5lib-python/issues/443 and please let " +
+    "us know if Bleach is unsuitable for your needs"
+)
+
+warnings.warn(_deprecation_msg, DeprecationWarning)
+
 allowed_elements = frozenset((
    (namespaces['html'], 'a'),
    (namespaces['html'], 'abbr'),
@ -750,6 +767,9 @@ class Filter(base.Filter):

        """
        super(Filter, self).__init__(source)
+
+        warnings.warn(_deprecation_msg, DeprecationWarning)
+
        self.allowed_elements = allowed_elements
        self.allowed_attributes = allowed_attributes
        self.allowed_css_properties = allowed_css_properties
--- a/libs/html5lib/html5parser.py
+++ b/libs/html5lib/html5parser.py
@ -2,7 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
 from six import with_metaclass, viewkeys

 import types
-from collections import OrderedDict

 from . import _inputstream
 from . import _tokenizer
@ -119,8 +118,8 @@ class HTMLParser(object):
        self.tree = tree(namespaceHTMLElements)
        self.errors = []

-        self.phases = dict([(name, cls(self, self.tree)) for name, cls in
-                            getPhases(debug).items()])
+        self.phases = {name: cls(self, self.tree) for name, cls in
+                       getPhases(debug).items()}

    def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):

@ -202,7 +201,7 @@ class HTMLParser(object):
        DoctypeToken = tokenTypes["Doctype"]
        ParseErrorToken = tokenTypes["ParseError"]

-        for token in self.normalizedTokens():
+        for token in self.tokenizer:
            prev_token = None
            new_token = token
            while new_token is not None:
@ -260,10 +259,6 @@ class HTMLParser(object):
            if reprocess:
                assert self.phase not in phases

-    def normalizedTokens(self):
-        for token in self.tokenizer:
-            yield self.normalizeToken(token)
-
    def parse(self, stream, *args, **kwargs):
        """Parse a HTML document into a well-formed tree

@ -325,17 +320,6 @@ class HTMLParser(object):
        if self.strict:
            raise ParseError(E[errorcode] % datavars)

-    def normalizeToken(self, token):
-        # HTML5 specific normalizations to the token stream
-        if token["type"] == tokenTypes["StartTag"]:
-            raw = token["data"]
-            token["data"] = OrderedDict(raw)
-            if len(raw) > len(token["data"]):
-                # we had some duplicated attribute, fix so first wins
-                token["data"].update(raw[::-1])
-
-        return token
-
    def adjustMathMLAttributes(self, token):
        adjust_attributes(token, adjustMathMLAttributes)

@ -413,16 +397,12 @@ class HTMLParser(object):
 def getPhases(debug):
    def log(function):
        """Logger that records which phase processes each token"""
-        type_names = dict((value, key) for key, value in
-                          tokenTypes.items())
+        type_names = {value: key for key, value in tokenTypes.items()}

        def wrapped(self, *args, **kwargs):
            if function.__name__.startswith("process") and len(args) > 0:
                token = args[0]
-                try:
-                    info = {"type": type_names[token['type']]}
-                except:
-                    raise
+                info = {"type": type_names[token['type']]}
                if token['type'] in tagTokenTypes:
                    info["name"] = token['name']

@ -446,10 +426,13 @@ def getPhases(debug):
    class Phase(with_metaclass(getMetaclass(debug, log))):
        """Base class for helper object that implements each phase of processing
        """
+        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")

        def __init__(self, parser, tree):
            self.parser = parser
            self.tree = tree
+            self.__startTagCache = {}
+            self.__endTagCache = {}

        def processEOF(self):
            raise NotImplementedError
@ -469,7 +452,21 @@ def getPhases(debug):
            self.tree.insertText(token["data"])

        def processStartTag(self, token):
-            return self.startTagHandler[token["name"]](token)
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__startTagCache:
+                func = self.__startTagCache[name]
+            else:
+                func = self.__startTagCache[name] = self.startTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
+            return func(token)

        def startTagHtml(self, token):
            if not self.parser.firstStartTag and token["name"] == "html":
@ -482,9 +479,25 @@ def getPhases(debug):
            self.parser.firstStartTag = False

        def processEndTag(self, token):
-            return self.endTagHandler[token["name"]](token)
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__endTagCache:
+                func = self.__endTagCache[name]
+            else:
+                func = self.__endTagCache[name] = self.endTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
+            return func(token)

    class InitialPhase(Phase):
+        __slots__ = tuple()
+
        def processSpaceCharacters(self, token):
            pass

@ -613,6 +626,8 @@ def getPhases(debug):
            return True

    class BeforeHtmlPhase(Phase):
+        __slots__ = tuple()
+
        # helper methods
        def insertHtmlElement(self):
            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
@ -648,19 +663,7 @@ def getPhases(debug):
                return token

    class BeforeHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                (("head", "body", "html", "br"), self.endTagImplyHead)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def processEOF(self):
            self.startTagHead(impliedTagToken("head", "StartTag"))
@ -693,28 +696,19 @@ def getPhases(debug):
            self.parser.parseError("end-tag-after-implied-root",
                                   {"name": token["name"]})

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("head", "body", "html", "br"), endTagImplyHead)
+        ])
+        endTagHandler.default = endTagOther
+
    class InHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("title", self.startTagTitle),
-                (("noframes", "style"), self.startTagNoFramesStyle),
-                ("noscript", self.startTagNoscript),
-                ("script", self.startTagScript),
-                (("base", "basefont", "bgsound", "command", "link"),
-                 self.startTagBaseLinkCommand),
-                ("meta", self.startTagMeta),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("head", self.endTagHead),
-                (("br", "html", "body"), self.endTagHtmlBodyBr)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # the real thing
        def processEOF(self):
@ -796,22 +790,27 @@ def getPhases(debug):
        def anythingElse(self):
            self.endTagHead(impliedTagToken("head"))

-    class InHeadNoscriptPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("title", startTagTitle),
+            (("noframes", "style"), startTagNoFramesStyle),
+            ("noscript", startTagNoscript),
+            ("script", startTagScript),
+            (("base", "basefont", "bgsound", "command", "link"),
+             startTagBaseLinkCommand),
+            ("meta", startTagMeta),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("head", endTagHead),
+            (("br", "html", "body"), endTagHtmlBodyBr)
+        ])
+        endTagHandler.default = endTagOther

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
-                (("head", "noscript"), self.startTagHeadNoscript),
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("noscript", self.endTagNoscript),
-                ("br", self.endTagBr),
-            ])
-            self.endTagHandler.default = self.endTagOther
+    class InHeadNoscriptPhase(Phase):
+        __slots__ = tuple()

        def processEOF(self):
            self.parser.parseError("eof-in-head-noscript")
@ -860,23 +859,21 @@ def getPhases(debug):
            # Caller must raise parse error first!
            self.endTagNoscript(impliedTagToken("noscript"))

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
+            (("head", "noscript"), startTagHeadNoscript),
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("noscript", endTagNoscript),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
    class AfterHeadPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("body", self.startTagBody),
-                ("frameset", self.startTagFrameset),
-                (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
-                  "style", "title"),
-                 self.startTagFromHead),
-                ("head", self.startTagHead)
-            ])
-            self.startTagHandler.default = self.startTagOther
-            self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
-                                                           self.endTagHtmlBodyBr)])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def processEOF(self):
            self.anythingElse()
@ -927,80 +924,30 @@ def getPhases(debug):
            self.parser.phase = self.parser.phases["inBody"]
            self.parser.framesetOK = True

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
+              "style", "title"),
+             startTagFromHead),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+                                                  endTagHtmlBodyBr)])
+        endTagHandler.default = endTagOther
+
    class InBodyPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
        # the really-really-really-very crazy mode
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        __slots__ = ("processSpaceCharacters",)

+        def __init__(self, *args, **kwargs):
+            super(InBodyPhase, self).__init__(*args, **kwargs)
            # Set this to the default handler
            self.processSpaceCharacters = self.processSpaceCharactersNonPre

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("base", "basefont", "bgsound", "command", "link", "meta",
-                  "script", "style", "title"),
-                 self.startTagProcessInHead),
-                ("body", self.startTagBody),
-                ("frameset", self.startTagFrameset),
-                (("address", "article", "aside", "blockquote", "center", "details",
-                  "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
-                  "section", "summary", "ul"),
-                 self.startTagCloseP),
-                (headingElements, self.startTagHeading),
-                (("pre", "listing"), self.startTagPreListing),
-                ("form", self.startTagForm),
-                (("li", "dd", "dt"), self.startTagListItem),
-                ("plaintext", self.startTagPlaintext),
-                ("a", self.startTagA),
-                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-                  "strong", "tt", "u"), self.startTagFormatting),
-                ("nobr", self.startTagNobr),
-                ("button", self.startTagButton),
-                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
-                ("xmp", self.startTagXmp),
-                ("table", self.startTagTable),
-                (("area", "br", "embed", "img", "keygen", "wbr"),
-                 self.startTagVoidFormatting),
-                (("param", "source", "track"), self.startTagParamSource),
-                ("input", self.startTagInput),
-                ("hr", self.startTagHr),
-                ("image", self.startTagImage),
-                ("isindex", self.startTagIsIndex),
-                ("textarea", self.startTagTextarea),
-                ("iframe", self.startTagIFrame),
-                ("noscript", self.startTagNoscript),
-                (("noembed", "noframes"), self.startTagRawtext),
-                ("select", self.startTagSelect),
-                (("rp", "rt"), self.startTagRpRt),
-                (("option", "optgroup"), self.startTagOpt),
-                (("math"), self.startTagMath),
-                (("svg"), self.startTagSvg),
-                (("caption", "col", "colgroup", "frame", "head",
-                  "tbody", "td", "tfoot", "th", "thead",
-                  "tr"), self.startTagMisplaced)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("body", self.endTagBody),
-                ("html", self.endTagHtml),
-                (("address", "article", "aside", "blockquote", "button", "center",
-                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
-                  "section", "summary", "ul"), self.endTagBlock),
-                ("form", self.endTagForm),
-                ("p", self.endTagP),
-                (("dd", "dt", "li"), self.endTagListItem),
-                (headingElements, self.endTagHeading),
-                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
-                  "strike", "strong", "tt", "u"), self.endTagFormatting),
-                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
-                ("br", self.endTagBr),
-            ])
-            self.endTagHandler.default = self.endTagOther
-
        def isMatchingFormattingElement(self, node1, node2):
            return (node1.name == node2.name and
                    node1.namespace == node2.namespace and
@ -1650,14 +1597,73 @@ def getPhases(debug):
                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                        break

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("base", "basefont", "bgsound", "command", "link", "meta",
+              "script", "style", "title"),
+             startTagProcessInHead),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("address", "article", "aside", "blockquote", "center", "details",
+              "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+              "section", "summary", "ul"),
+             startTagCloseP),
+            (headingElements, startTagHeading),
+            (("pre", "listing"), startTagPreListing),
+            ("form", startTagForm),
+            (("li", "dd", "dt"), startTagListItem),
+            ("plaintext", startTagPlaintext),
+            ("a", startTagA),
+            (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+              "strong", "tt", "u"), startTagFormatting),
+            ("nobr", startTagNobr),
+            ("button", startTagButton),
+            (("applet", "marquee", "object"), startTagAppletMarqueeObject),
+            ("xmp", startTagXmp),
+            ("table", startTagTable),
+            (("area", "br", "embed", "img", "keygen", "wbr"),
+             startTagVoidFormatting),
+            (("param", "source", "track"), startTagParamSource),
+            ("input", startTagInput),
+            ("hr", startTagHr),
+            ("image", startTagImage),
+            ("isindex", startTagIsIndex),
+            ("textarea", startTagTextarea),
+            ("iframe", startTagIFrame),
+            ("noscript", startTagNoscript),
+            (("noembed", "noframes"), startTagRawtext),
+            ("select", startTagSelect),
+            (("rp", "rt"), startTagRpRt),
+            (("option", "optgroup"), startTagOpt),
+            (("math"), startTagMath),
+            (("svg"), startTagSvg),
+            (("caption", "col", "colgroup", "frame", "head",
+              "tbody", "td", "tfoot", "th", "thead",
+              "tr"), startTagMisplaced)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("body", endTagBody),
+            ("html", endTagHtml),
+            (("address", "article", "aside", "blockquote", "button", "center",
+              "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+              "section", "summary", "ul"), endTagBlock),
+            ("form", endTagForm),
+            ("p", endTagP),
+            (("dd", "dt", "li"), endTagListItem),
+            (headingElements, endTagHeading),
+            (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+              "strike", "strong", "tt", "u"), endTagFormatting),
+            (("applet", "marquee", "object"), endTagAppletMarqueeObject),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
    class TextPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = _utils.MethodDispatcher([])
-            self.startTagHandler.default = self.startTagOther
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("script", self.endTagScript)])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def processCharacters(self, token):
            self.tree.insertText(token["data"])
@ -1683,30 +1689,15 @@ def getPhases(debug):
            self.tree.openElements.pop()
            self.parser.phase = self.parser.originalPhase

+        startTagHandler = _utils.MethodDispatcher([])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("script", endTagScript)])
+        endTagHandler.default = endTagOther
+
    class InTablePhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("caption", self.startTagCaption),
-                ("colgroup", self.startTagColgroup),
-                ("col", self.startTagCol),
-                (("tbody", "tfoot", "thead"), self.startTagRowGroup),
-                (("td", "th", "tr"), self.startTagImplyTbody),
-                ("table", self.startTagTable),
-                (("style", "script"), self.startTagStyleScript),
-                ("input", self.startTagInput),
-                ("form", self.startTagForm)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("table", self.endTagTable),
-                (("body", "caption", "col", "colgroup", "html", "tbody", "td",
-                  "tfoot", "th", "thead", "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # helper methods
        def clearStackToTableContext(self):
@ -1828,9 +1819,32 @@ def getPhases(debug):
            self.parser.phases["inBody"].processEndTag(token)
            self.tree.insertFromTable = False

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("caption", startTagCaption),
+            ("colgroup", startTagColgroup),
+            ("col", startTagCol),
+            (("tbody", "tfoot", "thead"), startTagRowGroup),
+            (("td", "th", "tr"), startTagImplyTbody),
+            ("table", startTagTable),
+            (("style", "script"), startTagStyleScript),
+            ("input", startTagInput),
+            ("form", startTagForm)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
+              "tfoot", "th", "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
    class InTableTextPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        __slots__ = ("originalPhase", "characterTokens")
+
+        def __init__(self, *args, **kwargs):
+            super(InTableTextPhase, self).__init__(*args, **kwargs)
            self.originalPhase = None
            self.characterTokens = []

@ -1875,23 +1889,7 @@ def getPhases(debug):

    class InCaptionPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.startTagTableElement)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("caption", self.endTagCaption),
-                ("table", self.endTagTable),
-                (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def ignoreEndTagCaption(self):
            return not self.tree.elementInScope("caption", variant="table")
@ -1944,23 +1942,24 @@ def getPhases(debug):
        def endTagOther(self, token):
            return self.parser.phases["inBody"].processEndTag(token)

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableElement)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("caption", endTagCaption),
+            ("table", endTagTable),
+            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
    class InColumnGroupPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
-
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("col", self.startTagCol)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("colgroup", self.endTagColgroup),
-                ("col", self.endTagCol)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def ignoreEndTagColgroup(self):
            return self.tree.openElements[-1].name == "html"
@ -2010,26 +2009,21 @@ def getPhases(debug):
            if not ignoreEndTag:
                return token

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("col", startTagCol)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("colgroup", endTagColgroup),
+            ("col", endTagCol)
+        ])
+        endTagHandler.default = endTagOther
+
    class InTableBodyPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("tr", self.startTagTr),
-                (("td", "th"), self.startTagTableCell),
-                (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
-                 self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
-                ("table", self.endTagTable),
-                (("body", "caption", "col", "colgroup", "html", "td", "th",
-                  "tr"), self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # helper methods
        def clearStackToTableBodyContext(self):
@ -2108,26 +2102,26 @@ def getPhases(debug):
        def endTagOther(self, token):
            return self.parser.phases["inTable"].processEndTag(token)

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("tr", startTagTr),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
+             startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "td", "th",
+              "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
    class InRowPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("td", "th"), self.startTagTableCell),
-                (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
-                  "tr"), self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("tr", self.endTagTr),
-                ("table", self.endTagTable),
-                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
-                (("body", "caption", "col", "colgroup", "html", "td", "th"),
-                 self.endTagIgnore)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # helper methods (XXX unify this with other table helper methods)
        def clearStackToTableRowContext(self):
@ -2197,23 +2191,26 @@ def getPhases(debug):
        def endTagOther(self, token):
            return self.parser.phases["inTable"].processEndTag(token)

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
+              "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("tr", endTagTr),
+            ("table", endTagTable),
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            (("body", "caption", "col", "colgroup", "html", "td", "th"),
+             endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
    class InCellPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-                  "thead", "tr"), self.startTagTableOther)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                (("td", "th"), self.endTagTableCell),
-                (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
-                (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # helper
        def closeCell(self):
@ -2273,26 +2270,22 @@ def getPhases(debug):
        def endTagOther(self, token):
            return self.parser.phases["inBody"].processEndTag(token)

+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("td", "th"), endTagTableCell),
+            (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
+            (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
+        ])
+        endTagHandler.default = endTagOther
+
    class InSelectPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("option", self.startTagOption),
-                ("optgroup", self.startTagOptgroup),
-                ("select", self.startTagSelect),
-                (("input", "keygen", "textarea"), self.startTagInput),
-                ("script", self.startTagScript)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([
-                ("option", self.endTagOption),
-                ("optgroup", self.endTagOptgroup),
-                ("select", self.endTagSelect)
-            ])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
        def processEOF(self):
@ -2373,21 +2366,25 @@ def getPhases(debug):
            self.parser.parseError("unexpected-end-tag-in-select",
                                   {"name": token["name"]})

-    class InSelectInTablePhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-                 self.startTagTable)
-            ])
-            self.startTagHandler.default = self.startTagOther
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("option", startTagOption),
+            ("optgroup", startTagOptgroup),
+            ("select", startTagSelect),
+            (("input", "keygen", "textarea"), startTagInput),
+            ("script", startTagScript)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("option", endTagOption),
+            ("optgroup", endTagOptgroup),
+            ("select", endTagSelect)
+        ])
+        endTagHandler.default = endTagOther

-            self.endTagHandler = _utils.MethodDispatcher([
-                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-                 self.endTagTable)
-            ])
-            self.endTagHandler.default = self.endTagOther
+    class InSelectInTablePhase(Phase):
+        __slots__ = tuple()

        def processEOF(self):
            self.parser.phases["inSelect"].processEOF()
@ -2412,7 +2409,21 @@ def getPhases(debug):
        def endTagOther(self, token):
            return self.parser.phases["inSelect"].processEndTag(token)

+        startTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             startTagTable)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             endTagTable)
+        ])
+        endTagHandler.default = endTagOther
+
    class InForeignContentPhase(Phase):
+        __slots__ = tuple()
+
        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                      "center", "code", "dd", "div", "dl", "dt",
                                      "em", "embed", "h1", "h2", "h3",
@ -2422,9 +2433,6 @@ def getPhases(debug):
                                      "span", "strong", "strike", "sub", "sup",
                                      "table", "tt", "u", "ul", "var"])

-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
        def adjustSVGTagNames(self, token):
            replacements = {"altglyph": "altGlyph",
                            "altglyphdef": "altGlyphDef",
@ -2478,7 +2486,7 @@ def getPhases(debug):
            currentNode = self.tree.openElements[-1]
            if (token["name"] in self.breakoutElements or
                (token["name"] == "font" and
-                 set(token["data"].keys()) & set(["color", "face", "size"]))):
+                 set(token["data"].keys()) & {"color", "face", "size"})):
                self.parser.parseError("unexpected-html-element-in-foreign-content",
                                       {"name": token["name"]})
                while (self.tree.openElements[-1].namespace !=
@ -2528,16 +2536,7 @@ def getPhases(debug):
            return new_token

    class AfterBodyPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
-
-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
-            self.endTagHandler.default = self.endTagOther
+        __slots__ = tuple()

        def processEOF(self):
            # Stop parsing
@ -2574,23 +2573,17 @@ def getPhases(debug):
            self.parser.phase = self.parser.phases["inBody"]
            return token

-    class InFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("frameset", self.startTagFrameset),
-                ("frame", self.startTagFrame),
-                ("noframes", self.startTagNoframes)
-            ])
-            self.startTagHandler.default = self.startTagOther
+        endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
+        endTagHandler.default = endTagOther

-            self.endTagHandler = _utils.MethodDispatcher([
-                ("frameset", self.endTagFrameset)
-            ])
-            self.endTagHandler.default = self.endTagOther
+    class InFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+        __slots__ = tuple()

        def processEOF(self):
            if self.tree.openElements[-1].name != "html":
@ -2631,21 +2624,22 @@ def getPhases(debug):
            self.parser.parseError("unexpected-end-tag-in-frameset",
                                   {"name": token["name"]})

-    class AfterFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#after3
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("frameset", startTagFrameset),
+            ("frame", startTagFrame),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("noframes", self.startTagNoframes)
-            ])
-            self.startTagHandler.default = self.startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("frameset", endTagFrameset)
+        ])
+        endTagHandler.default = endTagOther

-            self.endTagHandler = _utils.MethodDispatcher([
-                ("html", self.endTagHtml)
-            ])
-            self.endTagHandler.default = self.endTagOther
+    class AfterFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#after3
+        __slots__ = tuple()

        def processEOF(self):
            # Stop parsing
@ -2668,14 +2662,19 @@ def getPhases(debug):
            self.parser.parseError("unexpected-end-tag-after-frameset",
                                   {"name": token["name"]})

-    class AfterAfterBodyPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml)
-            ])
-            self.startTagHandler.default = self.startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("html", endTagHtml)
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterAfterBodyPhase(Phase):
+        __slots__ = tuple()

        def processEOF(self):
            pass
@ -2706,15 +2705,13 @@ def getPhases(debug):
            self.parser.phase = self.parser.phases["inBody"]
            return token

-    class AfterAfterFramesetPhase(Phase):
-        def __init__(self, parser, tree):
-            Phase.__init__(self, parser, tree)
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther

-            self.startTagHandler = _utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                ("noframes", self.startTagNoFrames)
-            ])
-            self.startTagHandler.default = self.startTagOther
+    class AfterAfterFramesetPhase(Phase):
+        __slots__ = tuple()

        def processEOF(self):
            pass
@ -2741,6 +2738,13 @@ def getPhases(debug):
        def processEndTag(self, token):
            self.parser.parseError("expected-eof-but-got-end-tag",
                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("noframes", startTagNoFrames)
+        ])
+        startTagHandler.default = startTagOther
+
    # pylint:enable=unused-argument

    return {
@ -2774,8 +2778,8 @@ def getPhases(debug):
 def adjust_attributes(token, replacements):
    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
    if needs_adjustment:
-        token['data'] = OrderedDict((replacements.get(k, k), v)
-                                    for k, v in token['data'].items())
+        token['data'] = type(token['data'])((replacements.get(k, k), v)
+                                            for k, v in token['data'].items())


 def impliedTagToken(name, type="EndTag", attributes=None,
--- a/libs/html5lib/serializer.py
+++ b/libs/html5lib/serializer.py
@ -274,7 +274,7 @@ class HTMLSerializer(object):
                if token["systemId"]:
                    if token["systemId"].find('"') >= 0:
                        if token["systemId"].find("'") >= 0:
-                            self.serializeError("System identifer contains both single and double quote characters")
+                            self.serializeError("System identifier contains both single and double quote characters")
                        quote_char = "'"
                    else:
                        quote_char = '"'
--- a/libs/html5lib/tests/sanitizer-testdata/tests1.dat
+++ b/libs/html5lib/tests/sanitizer-testdata/tests1.dat
@ -0,0 +1,433 @@
+[
+  {
+    "name": "IE_Comments",
+    "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
+    "output": ""
+  },
+
+  {
+    "name": "IE_Comments_2",
+    "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
+    "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;"
+  },
+
+  {
+    "name": "allow_colons_in_path_component",
+    "input": "<a href=\"./this:that\">foo</a>",
+    "output": "<a href='./this:that'>foo</a>"
+  },
+
+  {
+    "name": "background_attribute",
+    "input": "<div background=\"javascript:alert('XSS')\"></div>",
+    "output": "<div></div>"
+  },
+
+  {
+    "name": "bgsound",
+    "input": "<bgsound src=\"javascript:alert('XSS');\" />",
+    "output": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
+  },
+
+  {
+    "name": "div_background_image_unicode_encoded",
+    "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "div_expression",
+    "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "double_open_angle_brackets",
+    "input": "<img src=http://ha.ckers.org/scriptlet.html <",
+    "output": ""
+  },
+
+  {
+    "name": "double_open_angle_brackets_2",
+    "input": "<script src=http://ha.ckers.org/scriptlet.html <",
+    "output": ""
+  },
+
+  {
+    "name": "grave_accents",
+    "input": "<img src=`javascript:alert('XSS')` />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "img_dynsrc_lowsrc",
+    "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "img_vbscript",
+    "input": "<img src='vbscript:msgbox(\"XSS\")' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "input_image",
+    "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
+    "output": "<input type='image'/>"
+  },
+
+  {
+    "name": "link_stylesheets",
+    "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
+    "output": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"&gt;"
+  },
+
+  {
+    "name": "link_stylesheets_2",
+    "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
+    "output": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"&gt;"
+  },
+
+  {
+    "name": "list_style_image",
+    "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
+    "output": "<li style=''>foo</li>"
+  },
+
+  {
+    "name": "no_closing_script_tags",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "non_alpha_non_digit",
+    "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "non_alpha_non_digit_2",
+    "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_3",
+    "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
+    "output": "<img src='http://ha.ckers.org/xss.js'/>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_II",
+    "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "non_alpha_non_digit_III",
+    "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
+    "output": "<a>foo</a>"
+  },
+
+  {
+    "name": "platypus",
+    "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
+    "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
+  },
+
+  {
+    "name": "protocol_resolution_in_script_tag",
+    "input": "<script src=//ha.ckers.org/.j></script>",
+    "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_allow_anchors",
+    "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
+    "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
+  },
+
+  {
+    "name": "should_allow_image_alt_attribute",
+    "input": "<img alt='foo' onclick='bar' />",
+    "output": "<img alt='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_height_attribute",
+    "input": "<img height='foo' onclick='bar' />",
+    "output": "<img height='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_src_attribute",
+    "input": "<img src='foo' onclick='bar' />",
+    "output": "<img src='foo'/>"
+  },
+
+  {
+    "name": "should_allow_image_width_attribute",
+    "input": "<img width='foo' onclick='bar' />",
+    "output": "<img width='foo'/>"
+  },
+
+  {
+    "name": "should_handle_blank_text",
+    "input": "",
+    "output": ""
+  },
+
+  {
+    "name": "should_handle_malformed_image_tags",
+    "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
+    "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;"
+  },
+
+  {
+    "name": "should_handle_non_html",
+    "input": "abc",
+    "output": "abc"
+  },
+
+  {
+    "name": "should_not_fall_for_ridiculous_hack",
+    "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_0",
+    "input": "<img src=\"javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_1",
+    "input": "<img src=javascript:alert('XSS') />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_10",
+    "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_11",
+    "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_12",
+    "input": "<img src=\" &#14;  javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_13",
+    "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_14",
+    "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_2",
+    "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_3",
+    "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_4",
+    "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_5",
+    "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_6",
+    "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_7",
+    "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_8",
+    "input": "<img src=\"jav\tascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_9",
+    "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
+    "output": "<img/>"
+  },
+
+  {
+    "name": "should_sanitize_half_open_scripts",
+    "input": "<img src=\"javascript:alert('XSS')\"",
+    "output": ""
+  },
+
+  {
+    "name": "should_sanitize_invalid_script_tag",
+    "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets",
+    "input": "<<script>alert(\"XSS\");//<</script>",
+    "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
+    "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
+    "output": ""
+  },
+
+  {
+    "name": "should_sanitize_tag_broken_up_by_null",
+    "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
+    "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;"
+  },
+
+  {
+    "name": "should_sanitize_unclosed_script",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols",
+    "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
+    "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols",
+    "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
+    "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo"
+  },
+
+  {
+    "name": "xml_base",
+    "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
+    "output": "<div>foo</div>"
+  },
+
+  {
+    "name": "xul",
+    "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
+    "output": "<p style=''>fubar</p>"
+  },
+
+  {
+    "name": "quotes_in_attributes",
+    "input": "<img src='foo' title='\"foo\" bar' />",
+    "output": "<img src='foo' title='\"foo\" bar'/>"
+  },
+
+  {
+    "name": "uri_refs_in_svg_attributes",
+    "input": "<svg><rect fill='url(#foo)' />",
+    "output": "<svg><rect fill='url(#foo)'></rect></svg>"
+  },
+
+  {
+    "name": "absolute_uri_refs_in_svg_attributes",
+    "input": "<svg><rect fill='url(http://bad.com/) #fff' />",
+    "output": "<svg><rect fill='  #fff'></rect></svg>"
+  },
+
+  {
+    "name": "uri_ref_with_space_in svg_attribute",
+    "input": "<svg><rect fill='url(\n#foo)' />",
+    "output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
+  },
+
+  {
+    "name": "absolute_uri_ref_with_space_in svg_attribute",
+    "input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
+    "output": "<svg><rect fill=' '></rect></svg>"
+  },
+
+  {
+    "name": "allow_html5_image_tag",
+    "input": "<image src='foo' />",
+    "output": "<img src='foo'/>"
+  },
+
+  {
+    "name": "style_attr_end_with_nothing",
+    "input": "<div style=\"color: blue\" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_space",
+    "input": "<div style=\"color: blue \" />",
+    "output": "<div style='color: blue ;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon",
+    "input": "<div style=\"color: blue;\" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon_space",
+    "input": "<div style=\"color: blue; \" />",
+    "output": "<div style='color: blue;'></div>"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes",
+   "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
+   "output": "<img src='doesntexist.jpg\"&#39;onerror=\"alert(1)'/>"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes_II",
+   "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
+   "output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
+  }
+]
--- a/libs/html5lib/tests/sanitizer.py
+++ b/libs/html5lib/tests/sanitizer.py
@ -27,14 +27,15 @@ class SanitizerTest(pytest.Item):
        expected = self.test["output"]

        parsed = parseFragment(input)
-        serialized = serialize(parsed,
-                               sanitize=True,
-                               omit_optional_tags=False,
-                               use_trailing_solidus=True,
-                               space_before_trailing_solidus=False,
-                               quote_attr_values="always",
-                               quote_char="'",
-                               alphabetical_attributes=True)
+        with pytest.deprecated_call():
+            serialized = serialize(parsed,
+                                   sanitize=True,
+                                   omit_optional_tags=False,
+                                   use_trailing_solidus=True,
+                                   space_before_trailing_solidus=False,
+                                   quote_attr_values="always",
+                                   quote_char="'",
+                                   alphabetical_attributes=True)
        errorMsg = "\n".join(["\n\nInput:", input,
                              "\nExpected:", expected,
                              "\nReceived:", serialized])
--- a/libs/html5lib/tests/serializer-testdata/core.test
+++ b/libs/html5lib/tests/serializer-testdata/core.test
@ -0,0 +1,395 @@
+{
+    "tests": [
+        {
+            "expected": [
+                "<span title='test \"with\" &amp;quot;'>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "test \"with\" &quot;"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value escaping"
+        },
+        {
+            "expected": [
+                "<span title=foo>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value non-quoting"
+        },
+        {
+            "expected": [
+                "<span title=\"foo<bar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo<bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value non-quoting (with <)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo=bar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo=bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with =)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo>bar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo>bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with >)"
+        },
+        {
+            "expected": [
+                "<span title='foo\"bar'>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\"bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with \")"
+        },
+        {
+            "expected": [
+                "<span title=\"foo'bar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo'bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with ')"
+        },
+        {
+            "expected": [
+                "<span title=\"foo'bar&quot;baz\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo'bar\"baz"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with both \" and ')"
+        },
+        {
+            "expected": [
+                "<span title=\"foo bar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo bar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with space)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo\tbar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\tbar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with tab)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo\nbar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\nbar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with LF)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo\rbar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\rbar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with CR)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo\u000bbar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\u000bbar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value non-quoting (with linetab)"
+        },
+        {
+            "expected": [
+                "<span title=\"foo\fbar\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "foo\fbar"
+                        }
+                    ]
+                ]
+            ],
+            "description": "proper attribute value quoting (with form feed)"
+        },
+        {
+            "expected": [
+                "<img>"
+            ],
+            "input": [
+                [
+                    "EmptyTag",
+                    "img",
+                    {}
+                ]
+            ],
+            "description": "void element (as EmptyTag token)"
+        },
+        {
+            "expected": [
+                "<!DOCTYPE foo>"
+            ],
+            "input": [
+                [
+                    "Doctype",
+                    "foo"
+                ]
+            ],
+            "description": "doctype in error"
+        },
+        {
+            "expected": [
+                "a&lt;b&gt;c&amp;d"
+            ],
+            "input": [
+                [
+                    "Characters",
+                    "a<b>c&d"
+                ]
+            ],
+            "description": "character data",
+            "options": {
+                "encoding": "utf-8"
+            }
+        },
+        {
+            "expected": [
+                "<script>a<b>c&d"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "script",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "a<b>c&d"
+                ]
+            ],
+            "description": "rcdata"
+        },
+        {
+            "expected": [
+                "<!DOCTYPE HTML>"
+            ],
+            "input": [
+                [
+                    "Doctype",
+                    "HTML"
+                ]
+            ],
+            "description": "doctype"
+        },
+        {
+            "expected": [
+                "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
+            ],
+            "input": [
+                [
+                    "Doctype",
+                    "HTML",
+                    "-//W3C//DTD HTML 4.01//EN",
+                    "http://www.w3.org/TR/html4/strict.dtd"
+                ]
+            ],
+            "description": "HTML 4.01 DOCTYPE"
+        },
+        {
+            "expected": [
+                "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"
+            ],
+            "input": [
+                [
+                    "Doctype",
+                    "HTML",
+                    "-//W3C//DTD HTML 4.01//EN"
+                ]
+            ],
+            "description": "HTML 4.01 DOCTYPE without system identifier"
+        },
+        {
+            "expected": [
+                "<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"
+            ],
+            "input": [
+                [
+                    "Doctype",
+                    "html",
+                    "",
+                    "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
+                ]
+            ],
+            "description": "IBM DOCTYPE without public identifier"
+        }
+    ]
+}
--- a/libs/html5lib/tests/serializer-testdata/injectmeta.test
+++ b/libs/html5lib/tests/serializer-testdata/injectmeta.test
@ -0,0 +1,350 @@
+{
+    "tests": [
+        {
+            "expected": [
+                ""
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "no encoding",
+            "options": {
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta charset=utf-8>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "empytag head",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta charset=utf-8><title>foo</title>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "title",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "foo"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "title"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/title",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta charset=utf-8>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "charset",
+                            "value": "ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/meta-charset",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta charset=utf-8><meta charset=utf-8>",
+                "<head><meta charset=utf-8><meta charset=ascii>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "charset",
+                            "value": "ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "charset",
+                            "value": "ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/ two meta-charset",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta charset=utf-8><meta content=noindex name=robots>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "name",
+                            "value": "robots"
+                        },
+                        {
+                            "namespace": null,
+                            "name": "content",
+                            "value": "noindex"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/robots",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta content=noindex name=robots><meta charset=utf-8>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "name",
+                            "value": "robots"
+                        },
+                        {
+                            "namespace": null,
+                            "name": "content",
+                            "value": "noindex"
+                        }
+                    ]
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "charset",
+                            "value": "ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/robots & charset",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "http-equiv",
+                            "value": "content-type"
+                        },
+                        {
+                            "namespace": null,
+                            "name": "content",
+                            "value": "text/html; charset=ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/ charset in http-equiv content-type",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        },
+        {
+            "expected": [
+                "<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head",
+                    {}
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "name",
+                            "value": "robots"
+                        },
+                        {
+                            "namespace": null,
+                            "name": "content",
+                            "value": "noindex"
+                        }
+                    ]
+                ],
+                [
+                    "EmptyTag",
+                    "meta",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "http-equiv",
+                            "value": "content-type"
+                        },
+                        {
+                            "namespace": null,
+                            "name": "content",
+                            "value": "text/html; charset=ascii"
+                        }
+                    ]
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "head"
+                ]
+            ],
+            "description": "head w/robots & charset in http-equiv content-type",
+            "options": {
+                "encoding": "utf-8",
+                "inject_meta_charset": true
+            }
+        }
+    ]
+}
--- a/libs/html5lib/tests/serializer-testdata/optionaltags.test
+++ b/libs/html5lib/tests/serializer-testdata/optionaltags.test
--- a/libs/html5lib/tests/serializer-testdata/options.test
+++ b/libs/html5lib/tests/serializer-testdata/options.test
@ -0,0 +1,334 @@
+{
+    "tests": [
+        {
+            "expected": [
+                "<span title='test &#39;with&#39; quote_char'>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "test 'with' quote_char"
+                        }
+                    ]
+                ]
+            ],
+            "description": "quote_char=\"'\"",
+            "options": {
+                "quote_char": "'"
+            }
+        },
+        {
+            "expected": [
+                "<button disabled>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "button",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "disabled",
+                            "value": "disabled"
+                        }
+                    ]
+                ]
+            ],
+            "description": "quote_attr_values='always'",
+            "options": {
+                "quote_attr_values": "always"
+            }
+        },
+        {
+            "expected": [
+                "<div itemscope>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "itemscope",
+                            "value": "itemscope"
+                        }
+                    ]
+                ]
+            ],
+            "description": "quote_attr_values='always' with itemscope",
+            "options": {
+                "quote_attr_values": "always"
+            }
+        },
+        {
+            "expected": [
+                "<div irrelevant>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "irrelevant",
+                            "value": "irrelevant"
+                        }
+                    ]
+                ]
+            ],
+            "description": "quote_attr_values='always' with irrelevant",
+            "options": {
+                "quote_attr_values": "always"
+            }
+        },
+        {
+            "expected": [
+                "<div class=\"foo\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "class",
+                            "value": "foo"
+                        }
+                    ]
+                ]
+            ],
+            "description": "non-minimized quote_attr_values='always'",
+            "options": {
+                "quote_attr_values": "always"
+            }
+        },
+        {
+            "expected": [
+                "<div class=foo>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "class",
+                            "value": "foo"
+                        }
+                    ]
+                ]
+            ],
+            "description": "non-minimized quote_attr_values='legacy'",
+            "options": {
+                "quote_attr_values": "legacy"
+            }
+        },
+        {
+            "expected": [
+                "<div class=foo>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "class",
+                            "value": "foo"
+                        }
+                    ]
+                ]
+            ],
+            "description": "non-minimized quote_attr_values='spec'",
+            "options": {
+                "quote_attr_values": "spec"
+            }
+        },
+        {
+            "expected": [
+                "<img />"
+            ],
+            "input": [
+                [
+                    "EmptyTag",
+                    "img",
+                    {}
+                ]
+            ],
+            "description": "use_trailing_solidus=true with void element",
+            "options": {
+                "use_trailing_solidus": true
+            }
+        },
+        {
+            "expected": [
+                "<div>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    {}
+                ]
+            ],
+            "description": "use_trailing_solidus=true with non-void element",
+            "options": {
+                "use_trailing_solidus": true
+            }
+        },
+        {
+            "expected": [
+                "<div itemscope=itemscope>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "itemscope",
+                            "value": "itemscope"
+                        }
+                    ]
+                ]
+            ],
+            "description": "minimize_boolean_attributes=false",
+            "options": {
+                "minimize_boolean_attributes": false
+            }
+        },
+        {
+            "expected": [
+                "<div irrelevant=irrelevant>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "irrelevant",
+                            "value": "irrelevant"
+                        }
+                    ]
+                ]
+            ],
+            "description": "minimize_boolean_attributes=false",
+            "options": {
+                "minimize_boolean_attributes": false
+            }
+        },
+        {
+            "expected": [
+                "<div itemscope=\"\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "itemscope",
+                            "value": ""
+                        }
+                    ]
+                ]
+            ],
+            "description": "minimize_boolean_attributes=false with empty value",
+            "options": {
+                "minimize_boolean_attributes": false
+            }
+        },
+        {
+            "expected": [
+                "<div irrelevant=\"\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "div",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "irrelevant",
+                            "value": ""
+                        }
+                    ]
+                ]
+            ],
+            "description": "minimize_boolean_attributes=false with empty value",
+            "options": {
+                "minimize_boolean_attributes": false
+            }
+        },
+        {
+            "expected": [
+                "<a title=\"a&lt;b>c&amp;d\">"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "a",
+                    [
+                        {
+                            "namespace": null,
+                            "name": "title",
+                            "value": "a<b>c&d"
+                        }
+                    ]
+                ]
+            ],
+            "description": "escape less than signs in attribute values",
+            "options": {
+                "escape_lt_in_attrs": true
+            }
+        },
+        {
+            "expected": [
+                "<script>a&lt;b&gt;c&amp;d"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "script",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "a<b>c&d"
+                ]
+            ],
+            "description": "rcdata",
+            "options": {
+                "escape_rcdata": true
+            }
+        }
+    ]
+}
--- a/libs/html5lib/tests/serializer-testdata/whitespace.test
+++ b/libs/html5lib/tests/serializer-testdata/whitespace.test
@ -0,0 +1,198 @@
+{
+    "tests": [
+        {
+            "expected": [
+                " foo"
+            ],
+            "input": [
+                [
+                    "Characters",
+                    "\t\r\n\f foo"
+                ]
+            ],
+            "description": "bare text with leading spaces",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "foo "
+            ],
+            "input": [
+                [
+                    "Characters",
+                    "foo \t\r\n\f"
+                ]
+            ],
+            "description": "bare text with trailing spaces",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "foo bar"
+            ],
+            "input": [
+                [
+                    "Characters",
+                    "foo \t\r\n\f bar"
+                ]
+            ],
+            "description": "bare text with inner spaces",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "pre",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "pre"
+                ]
+            ],
+            "description": "text within <pre>",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "pre",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "\t\r\n\f fo"
+                ],
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "o \t\r\n\f b"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "span"
+                ],
+                [
+                    "Characters",
+                    "ar \t\r\n\f"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "pre"
+                ]
+            ],
+            "description": "text within <pre>, with inner markup",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "textarea",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "textarea"
+                ]
+            ],
+            "description": "text within <textarea>",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "script",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "script"
+                ]
+            ],
+            "description": "text within <script>",
+            "options": {
+                "strip_whitespace": true
+            }
+        },
+        {
+            "expected": [
+                "<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>"
+            ],
+            "input": [
+                [
+                    "StartTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "style",
+                    {}
+                ],
+                [
+                    "Characters",
+                    "\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
+                ],
+                [
+                    "EndTag",
+                    "http://www.w3.org/1999/xhtml",
+                    "style"
+                ]
+            ],
+            "description": "text within <style>",
+            "options": {
+                "strip_whitespace": true
+            }
+        }
+    ]
+}
--- a/libs/html5lib/tests/support.py
+++ b/libs/html5lib/tests/support.py
@ -143,11 +143,12 @@ def convert(stripChars):
        return "\n".join(rv)
    return convertData

+
 convertExpected = convert(2)


 def errorMessage(input, expected, actual):
-    msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
+    msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
           (repr(input), repr(expected), repr(actual)))
    if sys.version_info[0] == 2:
        msg = msg.encode("ascii", "backslashreplace")
--- a/libs/html5lib/tests/test_encoding.py
+++ b/libs/html5lib/tests/test_encoding.py
@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
    assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")


-def runParserEncodingTest(data, encoding):
+def param_encoding():
+    for filename in get_data_files("encoding"):
+        tests = _TestData(filename, b"data", encoding=None)
+        for test in tests:
+            yield test[b'data'], test[b'encoding']
+
+
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_parser_encoding(data, encoding):
    p = HTMLParser()
    assert p.documentEncoding is None
    p.parse(data, useChardet=False)
@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
    assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)


-def runPreScanEncodingTest(data, encoding):
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_prescan_encoding(data, encoding):
    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
    encoding = encoding.lower().decode("ascii")

@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
    assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)


-def test_encoding():
-    for filename in get_data_files("encoding"):
-        tests = _TestData(filename, b"data", encoding=None)
-        for test in tests:
-            yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
-            yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
-
-
 # pylint:disable=wrong-import-position
 try:
    import chardet  # noqa
--- a/libs/html5lib/tests/test_meta.py
+++ b/libs/html5lib/tests/test_meta.py
@ -28,10 +28,10 @@ def test_errorMessage():

    # Assertions!
    if six.PY2:
-        assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
+        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
    else:
        assert six.PY3
-        assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
+        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r

    assert input.__repr__.call_count == 1
    assert expected.__repr__.call_count == 1
--- a/libs/html5lib/tests/test_parser2.py
+++ b/libs/html5lib/tests/test_parser2.py
@ -1,12 +1,12 @@
 from __future__ import absolute_import, division, unicode_literals

-from six import PY2, text_type, unichr
+from six import PY2, text_type

 import io

 from . import support  # noqa

-from html5lib.constants import namespaces, tokenTypes
+from html5lib.constants import namespaces
 from html5lib import parse, parseFragment, HTMLParser


@ -53,42 +53,6 @@ def test_unicode_file():
    assert parse(io.StringIO("a")) is not None


-def test_maintain_attribute_order():
-    # This is here because we impl it in parser and not tokenizer
-    p = HTMLParser()
-    # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
-    token = {'name': 'html',
-             'selfClosing': False,
-             'selfClosingAcknowledged': False,
-             'type': tokenTypes["StartTag"],
-             'data': attrs}
-    out = p.normalizeToken(token)
-    attr_order = list(out["data"].keys())
-    assert attr_order == [x for x, i in attrs]
-
-
-def test_duplicate_attribute():
-    # This is here because we impl it in parser and not tokenizer
-    doc = parse('<p class=a class=b>')
-    el = doc[1][0]
-    assert el.get("class") == "a"
-
-
-def test_maintain_duplicate_attribute_order():
-    # This is here because we impl it in parser and not tokenizer
-    p = HTMLParser()
-    attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
-    token = {'name': 'html',
-             'selfClosing': False,
-             'selfClosingAcknowledged': False,
-             'type': tokenTypes["StartTag"],
-             'data': attrs + [('a', len(attrs))]}
-    out = p.normalizeToken(token)
-    attr_order = list(out["data"].keys())
-    assert attr_order == [x for x, i in attrs]
-
-
 def test_debug_log():
    parser = HTMLParser(debug=True)
    parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
--- a/libs/html5lib/tests/test_sanitizer.py
+++ b/libs/html5lib/tests/test_sanitizer.py
@ -1,31 +1,22 @@
 from __future__ import absolute_import, division, unicode_literals

+import pytest
+
 from html5lib import constants, parseFragment, serialize
 from html5lib.filters import sanitizer


-def runSanitizerTest(_, expected, input):
-    parsed = parseFragment(expected)
-    expected = serialize(parsed,
-                         omit_optional_tags=False,
-                         use_trailing_solidus=True,
-                         space_before_trailing_solidus=False,
-                         quote_attr_values="always",
-                         quote_char='"',
-                         alphabetical_attributes=True)
-    assert expected == sanitize_html(input)
-
-
 def sanitize_html(stream):
    parsed = parseFragment(stream)
-    serialized = serialize(parsed,
-                           sanitize=True,
-                           omit_optional_tags=False,
-                           use_trailing_solidus=True,
-                           space_before_trailing_solidus=False,
-                           quote_attr_values="always",
-                           quote_char='"',
-                           alphabetical_attributes=True)
+    with pytest.deprecated_call():
+        serialized = serialize(parsed,
+                               sanitize=True,
+                               omit_optional_tags=False,
+                               use_trailing_solidus=True,
+                               space_before_trailing_solidus=False,
+                               quote_attr_values="always",
+                               quote_char='"',
+                               alphabetical_attributes=True)
    return serialized


@ -59,7 +50,7 @@ def test_data_uri_disallowed_type():
    assert expected == sanitized


-def test_sanitizer():
+def param_sanitizer():
    for ns, tag_name in sanitizer.allowed_elements:
        if ns != constants.namespaces["html"]:
            continue
@ -67,19 +58,19 @@ def test_sanitizer():
                        'tfoot', 'th', 'thead', 'tr', 'select']:
            continue  # TODO
        if tag_name == 'image':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        elif tag_name == 'br':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        elif tag_name in constants.voidElements:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
        else:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))

@ -93,7 +84,7 @@ def test_sanitizer():
        attribute_value = 'foo'
        if attribute_name in sanitizer.attr_val_is_uri:
            attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
-        yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
+        yield ("test_should_allow_%s_attribute" % attribute_name,
               "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
               "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))

@ -101,7 +92,7 @@ def test_sanitizer():
        rest_of_uri = '//sub.domain.tld/path/object.ext'
        if protocol == 'data':
            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
-        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))

@ -110,11 +101,26 @@ def test_sanitizer():
        if protocol == 'data':
            rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
        protocol = protocol.upper()
-        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))


+@pytest.mark.parametrize("expected, input",
+                         (pytest.param(expected, input, id=id)
+                          for id, expected, input in param_sanitizer()))
+def test_sanitizer(expected, input):
+    parsed = parseFragment(expected)
+    expected = serialize(parsed,
+                         omit_optional_tags=False,
+                         use_trailing_solidus=True,
+                         space_before_trailing_solidus=False,
+                         quote_attr_values="always",
+                         quote_char='"',
+                         alphabetical_attributes=True)
+    assert expected == sanitize_html(input)
+
+
 def test_lowercase_color_codes_in_style():
    sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
    expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
--- a/libs/html5lib/tests/test_serializer.py
+++ b/libs/html5lib/tests/test_serializer.py
@ -80,7 +80,7 @@ class JsonWalker(TreeWalker):


 def serialize_html(input, options):
-    options = dict([(str(k), v) for k, v in options.items()])
+    options = {str(k): v for k, v in options.items()}
    encoding = options.get("encoding", None)
    if "encoding" in options:
        del options["encoding"]
@ -89,19 +89,6 @@ def serialize_html(input, options):
    return serializer.render(stream, encoding)


-def runSerializerTest(input, expected, options):
-    encoding = options.get("encoding", None)
-
-    if encoding:
-        expected = list(map(lambda x: x.encode(encoding), expected))
-
-    result = serialize_html(input, options)
-    if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
-    elif result not in expected:
-        assert False, "Expected: %s, Received: %s" % (expected, result)
-
-
 def throwsWithLatin1(input):
    with pytest.raises(UnicodeEncodeError):
        serialize_html(input, {"encoding": "iso-8859-1"})
@ -120,13 +107,13 @@ def testDoctypeSystemId():


 def testCdataCharacters():
-    runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
-                      ["<style>&amacr;"], {"encoding": "iso-8859-1"})
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
+                    ["<style>&amacr;"], {"encoding": "iso-8859-1"})


 def testCharacters():
-    runSerializerTest([["Characters", "\u0101"]],
-                      ["&amacr;"], {"encoding": "iso-8859-1"})
+    test_serializer([["Characters", "\u0101"]],
+                    ["&amacr;"], {"encoding": "iso-8859-1"})


 def testStartTagName():
@ -138,9 +125,9 @@ def testAttributeName():


 def testAttributeValue():
-    runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
-                        [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
-                      ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
+                      [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
+                    ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})


 def testEndTagName():
@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
    else:
        output_ = ['<span foo="%s">' % c]
    options_ = {"quote_attr_values": "spec"}
-    runSerializerTest(input_, output_, options_)
+    test_serializer(input_, output_, options_)


@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
    else:
        output_ = ['<span foo="%s">' % c]
    options_ = {"quote_attr_values": "legacy"}
-    runSerializerTest(input_, output_, options_)
+    test_serializer(input_, output_, options_)


@pytest.fixture
@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
    assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'


-def test_serializer():
+def param_serializer():
    for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
        with open(filename) as fp:
            tests = json.load(fp)
            for test in tests['tests']:
-                yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
+                yield test["input"], test["expected"], test.get("options", {})
+
+
+@pytest.mark.parametrize("input, expected, options", param_serializer())
+def test_serializer(input, expected, options):
+    encoding = options.get("encoding", None)
+
+    if encoding:
+        expected = list(map(lambda x: x.encode(encoding), expected))
+
+    result = serialize_html(input, options)
+    if len(expected) == 1:
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+    elif result not in expected:
+        assert False, "Expected: %s, Received: %s" % (expected, result)
--- a/libs/html5lib/tests/test_stream.py
+++ b/libs/html5lib/tests/test_stream.py
@ -308,9 +308,11 @@ def test_invalid_codepoints(inp, num):
                          ("'\\uD800\\uD800\\uD800'", 3),
                          ("'a\\uD800a\\uD800a\\uD800a'", 3),
                          ("'\\uDFFF\\uDBFF'", 2),
-                          pytest.mark.skipif(sys.maxunicode == 0xFFFF,
-                                             ("'\\uDBFF\\uDFFF'", 2),
-                                             reason="narrow Python")])
+                          pytest.param(
+                              "'\\uDBFF\\uDFFF'", 2,
+                              marks=pytest.mark.skipif(
+                                  sys.maxunicode == 0xFFFF,
+                                  reason="narrow Python"))])
 def test_invalid_codepoints_surrogates(inp, num):
    inp = eval(inp)  # pylint:disable=eval-used
    fp = StringIO(inp)
--- a/libs/html5lib/tests/test_tokenizer2.py
+++ b/libs/html5lib/tests/test_tokenizer2.py
@ -0,0 +1,66 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import io
+
+from six import unichr, text_type
+
+from html5lib._tokenizer import HTMLTokenizer
+from html5lib.constants import tokenTypes
+
+
+def ignore_parse_errors(toks):
+    for tok in toks:
+        if tok['type'] != tokenTypes['ParseError']:
+            yield tok
+
+
+def test_maintain_attribute_order():
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == len(attrs)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
+        assert in_name == out_name
+        assert in_value == out_value
+
+
+def test_duplicate_attribute():
+    stream = io.StringIO("<span a=1 a=2 a=3>")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == 1
+    assert list(attrs_tok.items()) == [('a', '1')]
+
+
+def test_maintain_duplicate_attribute_order():
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
+
+    toks = HTMLTokenizer(stream)
+    out = list(ignore_parse_errors(toks))
+
+    assert len(out) == 1
+    assert out[0]['type'] == tokenTypes['StartTag']
+
+    attrs_tok = out[0]['data']
+    assert len(attrs_tok) == len(attrs)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
+        assert in_name == out_name
+        assert in_value == out_value
--- a/libs/html5lib/tests/test_treewalkers.py
+++ b/libs/html5lib/tests/test_treewalkers.py
@ -1,7 +1,9 @@
 from __future__ import absolute_import, division, unicode_literals

 import itertools
+import sys

+from six import unichr, text_type
 import pytest

 try:
@ -61,24 +63,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
        setter['ElementTree'](docfrag)(name, value)


-def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
-    """tests what happens when we add attributes to the intext"""
-    treeName, treeClass = tree
-    if treeClass is None:
-        pytest.skip("Treebuilder not loaded")
-    parser = html5parser.HTMLParser(tree=treeClass["builder"])
-    document = parser.parseFragment(intext)
-    for nom, val in attrs_to_add:
-        set_attribute_on_first_child(document, nom, val, treeName)
-
-    document = treeClass.get("adapter", lambda x: x)(document)
-    output = treewalkers.pprint(treeClass["walker"](document))
-    output = attrlist.sub(sortattrs, output)
-    if output not in expected:
-        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
-
-
-def test_treewalker_six_mix():
+def param_treewalker_six_mix():
    """Str/Unicode mix. If str attrs added to tree"""

    # On Python 2.x string literals are of type str. Unless, like this
@ -99,7 +84,25 @@ def test_treewalker_six_mix():

    for tree in sorted(treeTypes.items()):
        for intext, attrs, expected in sm_tests:
-            yield runTreewalkerEditTest, intext, expected, attrs, tree
+            yield intext, expected, attrs, tree
+
+
+@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
+def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
+    """tests what happens when we add attributes to the intext"""
+    treeName, treeClass = tree
+    if treeClass is None:
+        pytest.skip("Treebuilder not loaded")
+    parser = html5parser.HTMLParser(tree=treeClass["builder"])
+    document = parser.parseFragment(intext)
+    for nom, val in attrs_to_add:
+        set_attribute_on_first_child(document, nom, val, treeName)
+
+    document = treeClass.get("adapter", lambda x: x)(document)
+    output = treewalkers.pprint(treeClass["walker"](document))
+    output = attrlist.sub(sortattrs, output)
+    if output not in expected:
+        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
@ -134,3 +137,69 @@ def test_lxml_xml():
    output = Lint(walker(lxmltree))

    assert list(output) == expected
+
+
+@pytest.mark.parametrize("treeName",
+                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
+                                                        pytest.mark.skipif(
+                                                            treeName != "lxml" or
+                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                          for treeName in sorted(treeTypes.keys())])
+def test_maintain_attribute_order(treeName):
+    treeAPIs = treeTypes[treeName]
+    if treeAPIs is None:
+        pytest.skip("Treebuilder not loaded")
+
+    # generate loads to maximize the chance a hash-based mutation will occur
+    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
+
+    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
+    document = parser.parseFragment(data)
+
+    document = treeAPIs.get("adapter", lambda x: x)(document)
+    output = list(Lint(treeAPIs["walker"](document)))
+
+    assert len(output) == 2
+    assert output[0]['type'] == 'StartTag'
+    assert output[1]['type'] == "EndTag"
+
+    attrs_out = output[0]['data']
+    assert len(attrs) == len(attrs_out)
+
+    for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
+        assert (None, in_name) == out_name
+        assert in_value == out_value
+
+
+@pytest.mark.parametrize("treeName",
+                         [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
+                                                        pytest.mark.skipif(
+                                                            treeName != "lxml" or
+                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                          for treeName in sorted(treeTypes.keys())])
+def test_maintain_attribute_order_adjusted(treeName):
+    treeAPIs = treeTypes[treeName]
+    if treeAPIs is None:
+        pytest.skip("Treebuilder not loaded")
+
+    # generate loads to maximize the chance a hash-based mutation will occur
+    data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
+
+    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
+    document = parser.parseFragment(data)
+
+    document = treeAPIs.get("adapter", lambda x: x)(document)
+    output = list(Lint(treeAPIs["walker"](document)))
+
+    assert len(output) == 2
+    assert output[0]['type'] == 'StartTag'
+    assert output[1]['type'] == "EndTag"
+
+    attrs_out = output[0]['data']
+
+    assert list(attrs_out.items()) == [((None, 'a'), '1'),
+                                       ((None, 'refX'), '2'),
+                                       ((None, 'b'), '3'),
+                                       (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
+                                       ((None, 'c'), '5')]
--- a/libs/html5lib/tests/testdata/AUTHORS.rst
+++ b/libs/html5lib/tests/testdata/AUTHORS.rst
@ -1,34 +0,0 @@
-Credits
-=======
-
-The ``html5lib`` test data is maintained by:
-
- James Graham
- Geoffrey Sneddon
-
-
-Contributors
------------
-
- Adam Barth
- Andi Sidwell
- Anne van Kesteren
- David Flanagan
- Edward Z. Yang
- Geoffrey Sneddon
- Henri Sivonen
- Ian Hickson
- Jacques Distler
- James Graham
- Lachlan Hunt
- lantis63
- Mark Pilgrim
- Mats Palmgren
- Ms2ger
- Nolan Waite
- Philip Taylor
- Rafael Weinstein
- Ryan King
- Sam Ruby
- Simon Pieters
- Thomas Broyer
--- a/libs/html5lib/tests/testdata/LICENSE
+++ b/libs/html5lib/tests/testdata/LICENSE
@ -1,21 +0,0 @@
-Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
-other contributors
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/libs/html5lib/tests/testdata/encoding/chardet/test_big5.txt
+++ b/libs/html5lib/tests/testdata/encoding/chardet/test_big5.txt
@ -1,51 +0,0 @@
-老子《道德經》 第一~四十章
-
-老子道經
-
-第一章
-
-道可道，非常道。名可名，非常名。無，名天地之始﹔有，名萬物之母。
-故常無，欲以觀其妙；常有，欲以觀其徼。此兩者，同出而異名，同謂之
-玄。玄之又玄，眾妙之門。
-
-第二章
-
-天下皆知美之為美，斯惡矣﹔皆知善之為善，斯不善矣。故有無相生，難
-易相成，長短相形，高下相傾，音聲相和，前後相隨。是以聖人處「無為
-」之事，行「不言」之教。萬物作焉而不辭，生而不有，為而不恃，功成
-而弗居。夫唯弗居，是以不去。
-
-第三章
-
-不尚賢，使民不爭﹔不貴難得之貨，使民不為盜﹔不見可欲，使民心不亂
-。是以「聖人」之治，虛其心，實其腹，弱其志，強其骨。常使民無知無
-欲。使夫智者不敢為也。為「無為」，則無不治。
-
-第四章
-
-「道」沖，而用之或不盈。淵兮，似萬物之宗﹔挫其銳，解其紛，和其光
-，同其塵﹔湛兮似或存。吾不知誰之子？象帝之先。
-
-第五章
-
-天地不仁，以萬物為芻狗﹔聖人不仁，以百姓為芻狗。天地之間，其猶橐
-蘥乎？虛而不屈，動而愈出。多言數窮，不如守中。
-
-第六章
-
-谷神不死，是謂玄牝。玄牝之門，是謂天地根。綿綿若存，用之不勤。
-
-第七章
-
-天長地久。天地所以能長且久者，以其不自生，故能長久。是以聖人後其
-身而身先，外其身而身存。非以其無私邪？故能成其私。
-
-第八章
-
-上善若水。水善利萬物而不爭。處眾人之所惡，故幾於道。居善地，心善
-淵，與善仁，言善信，政善治，事善能，動善時。夫唯不爭，故無尤。
-
-第九章
-
-持而盈之，不如其已﹔揣而銳之，不可長保。金玉滿堂，莫之能守﹔富貴
-而驕，自遺其咎。功遂身退，天之道。
--- a/libs/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
+++ b/libs/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
@ -1,10 +0,0 @@
-#data
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
-<!--京-->
-<title>Yahoo! JAPAN</title>
-<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
-<style type="text/css" media="all">
-#encoding
-euc-jp
--- a/libs/html5lib/tests/testdata/encoding/tests1.dat
+++ b/libs/html5lib/tests/testdata/encoding/tests1.dat
--- a/libs/html5lib/tests/testdata/encoding/tests2.dat
+++ b/libs/html5lib/tests/testdata/encoding/tests2.dat
@ -1,115 +0,0 @@
-#data
-<meta
-#encoding
-windows-1252
-
-#data
-<
-#encoding
-windows-1252
-
-#data
-<!
-#encoding
-windows-1252
-
-#data
-<meta charset = "
-#encoding
-windows-1252
-
-#data
-<meta charset=euc-jp
-#encoding
-windows-1252
-
-#data
-<meta <meta charset='euc-jp'>
-#encoding
-euc-jp
-
-#data
-<meta       charset    =     'euc-jp'>
-#encoding
-euc-jp
-
-#data
-<!-- -->
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-#encoding
-utf-8
-
-#data
-<!-- -->
-<meta http-equiv="Content-Type" content="text/html; charset=utf
-#encoding
-windows-1252
-
-#data
-<meta http-equiv="Content-Type<meta charset="utf-8">
-#encoding
-windows-1252
-
-#data
-<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
-#encoding
-utf-8
-
-#data
-<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
-#encoding
-windows-1252
-
-#data
-<meta                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
-#encoding
-windows-1252
-
-#data
-<meta charset                    =                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
-#encoding
-windows-1252
-
-#data
-<meta charset=                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            utf-8
->
-#encoding
-utf-8
-
-#data
-<meta content = "text/html;
-#encoding
-windows-1252
-
-#data
-<meta charset="UTF-16">
-#encoding
-utf-8
-
-#data
-<meta charset="UTF-16LE">
-#encoding
-utf-8
-
-#data
-<meta charset="UTF-16BE">
-#encoding
-utf-8
-
-#data
-<html a=ñ>
-<meta charset="utf-8">
-#encoding
-utf-8
-
-#data
-<html ñ>
-<meta charset="utf-8">
-#encoding
-utf-8
-
-#data
-<html>ñ
-<meta charset="utf-8">
-#encoding
-utf-8
--- a/libs/html5lib/tests/testdata/serializer/core.test
+++ b/libs/html5lib/tests/testdata/serializer/core.test
@ -1,125 +0,0 @@
-{"tests": [
-
-{"description": "proper attribute value escaping",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
- "expected": ["<span title='test \"with\" &amp;quot;'>"]
-},
-
-{"description": "proper attribute value non-quoting",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
- "expected": ["<span title=foo>"],
- "xhtml":    ["<span title=\"foo\">"]
-},
-
-{"description": "proper attribute value non-quoting (with <)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
- "expected": ["<span title=foo<bar>"],
- "xhtml":    ["<span title=\"foo&lt;bar\">"]
-},
-
-{"description": "proper attribute value quoting (with =)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
- "expected": ["<span title=\"foo=bar\">"]
-},
-
-{"description": "proper attribute value quoting (with >)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
- "expected": ["<span title=\"foo>bar\">"]
-},
-
-{"description": "proper attribute value quoting (with \")",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
- "expected": ["<span title='foo\"bar'>"]
-},
-
-{"description": "proper attribute value quoting (with ')",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
- "expected": ["<span title=\"foo'bar\">"]
-},
-
-{"description": "proper attribute value quoting (with both \" and ')",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
- "expected": ["<span title=\"foo'bar&quot;baz\">"]
-},
-
-{"description": "proper attribute value quoting (with space)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
- "expected": ["<span title=\"foo bar\">"]
-},
-
-{"description": "proper attribute value quoting (with tab)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
- "expected": ["<span title=\"foo\tbar\">"]
-},
-
-{"description": "proper attribute value quoting (with LF)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
- "expected": ["<span title=\"foo\nbar\">"]
-},
-
-{"description": "proper attribute value quoting (with CR)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
- "expected": ["<span title=\"foo\rbar\">"]
-},
-
-{"description": "proper attribute value non-quoting (with linetab)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
- "expected": ["<span title=foo\u000Bbar>"],
- "xhtml": ["<span title=\"foo\u000Bbar\">"]
-},
-
-{"description": "proper attribute value quoting (with form feed)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
- "expected": ["<span title=\"foo\u000Cbar\">"]
-},
-
-{"description": "void element (as EmptyTag token)",
- "input": [["EmptyTag", "img", {}]],
- "expected": ["<img>"],
- "xhtml":    ["<img />"]
-},
-
-{"description": "void element (as StartTag token)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
- "expected": ["<img>"],
- "xhtml":    ["<img />"]
-},
-
-{"description": "doctype in error",
- "input": [["Doctype", "foo"]],
- "expected": ["<!DOCTYPE foo>"]
-},
-
-{"description": "character data",
- "options": {"encoding":"utf-8"},
- "input": [["Characters", "a<b>c&d"]],
- "expected": ["a&lt;b&gt;c&amp;d"]
-},
-
-{"description": "rcdata",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
- "expected": ["<script>a<b>c&d"],
- "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
-},
-
-{"description": "doctype",
- "input": [["Doctype", "HTML"]],
- "expected": ["<!DOCTYPE HTML>"]
-},
-
-{"description": "HTML 4.01 DOCTYPE",
- "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
- "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
-},
-
-{"description": "HTML 4.01 DOCTYPE without system identifer",
- "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN"]],
- "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
-},
-
-{"description": "IBM DOCTYPE without public identifer",
- "input": [["Doctype", "html",  "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
- "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
-}
-
-]}
--- a/libs/html5lib/tests/testdata/serializer/injectmeta.test
+++ b/libs/html5lib/tests/testdata/serializer/injectmeta.test
@ -1,66 +0,0 @@
-{"tests": [
-
-{"description": "no encoding",
- "options": {"inject_meta_charset": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": [""],
- "xhtml": ["<head></head>"]
-},
-
-{"description": "empytag head",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta charset=utf-8>"],
- "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
-},
-
-{"description": "head w/title",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta charset=utf-8><title>foo</title>"],
- "xhtml":    ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
-},
-
-{"description": "head w/meta-charset",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta charset=utf-8>"],
- "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
-},
-
-{"description": "head w/ two meta-charset",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
- "xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
-},
-
-{"description": "head w/robots",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
- "xhtml":    ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
-},
-
-{"description": "head w/robots & charset",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
- "xhtml":    ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
-},
-
-{"description": "head w/ charset in http-equiv content-type",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
- "xhtml":    ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
-},
-
-{"description": "head w/robots & charset in http-equiv content-type",
- "options": {"inject_meta_charset": true, "encoding":"utf-8"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
- "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
-}
-
-]}
--- a/libs/html5lib/tests/testdata/serializer/optionaltags.test
+++ b/libs/html5lib/tests/testdata/serializer/optionaltags.test
@ -1,965 +0,0 @@
-{"tests": [
-
-{"description": "html start-tag followed by text, with attributes",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
- "expected": ["<html lang=en>foo"]
-},
-
-
-
-{"description": "html start-tag followed by comment",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
- "expected": ["<html><!--foo-->"]
-},
-
-{"description": "html start-tag followed by space character",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
- "expected": ["<html> foo"]
-},
-
-{"description": "html start-tag followed by text",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "html start-tag followed by start-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "html start-tag followed by end-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "html start-tag at EOF (shouldn't ever happen?!)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
- "expected": [""]
-},
-
-
-
-{"description": "html end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
- "expected": ["</html><!--foo-->"]
-},
-
-{"description": "html end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
- "expected": ["</html> foo"]
-},
-
-{"description": "html end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "html end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "html end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "html end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "head start-tag followed by comment",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
- "expected": ["<head><!--foo-->"]
-},
-
-{"description": "head start-tag followed by space character",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
- "expected": ["<head> foo"]
-},
-
-{"description": "head start-tag followed by text",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
- "expected": ["<head>foo"]
-},
-
-{"description": "head start-tag followed by start-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["<head></foo>", "</foo>"]
-},
-
-{"description": "empty head element",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": [""]
-},
-
-{"description": "head start-tag followed by empty-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "head start-tag at EOF (shouldn't ever happen?!)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
- "expected": ["<head>", ""]
-},
-
-
-
-{"description": "head end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
- "expected": ["</head><!--foo-->"]
-},
-
-{"description": "head end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
- "expected": ["</head> foo"]
-},
-
-{"description": "head end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "head end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "head end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "head end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "body start-tag followed by comment",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
- "expected": ["<body><!--foo-->"]
-},
-
-{"description": "body start-tag followed by space character",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
- "expected": ["<body> foo"]
-},
-
-{"description": "body start-tag followed by text",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "body start-tag followed by start-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "body start-tag followed by end-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "body start-tag at EOF (shouldn't ever happen?!)",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
- "expected": [""]
-},
-
-
-
-{"description": "body end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
- "expected": ["</body><!--foo-->"]
-},
-
-{"description": "body end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
- "expected": ["</body> foo"]
-},
-
-{"description": "body end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "body end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "body end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "body end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "li end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
- "expected": ["</li><!--foo-->"]
-},
-
-{"description": "li end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
- "expected": ["</li> foo"]
-},
-
-{"description": "li end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
- "expected": ["</li>foo"]
-},
-
-{"description": "li end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</li><foo>"]
-},
-
-{"description": "li end-tag followed by li start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
- "expected": ["<li>"]
-},
-
-{"description": "li end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "li end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "dt end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
- "expected": ["</dt><!--foo-->"]
-},
-
-{"description": "dt end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
- "expected": ["</dt> foo"]
-},
-
-{"description": "dt end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
- "expected": ["</dt>foo"]
-},
-
-{"description": "dt end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</dt><foo>"]
-},
-
-{"description": "dt end-tag followed by dt start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
- "expected": ["<dt>"]
-},
-
-{"description": "dt end-tag followed by dd start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
- "expected": ["<dd>"]
-},
-
-{"description": "dt end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</dt></foo>"]
-},
-
-{"description": "dt end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
- "expected": ["</dt>"]
-},
-
-
-
-
-{"description": "dd end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
- "expected": ["</dd><!--foo-->"]
-},
-
-{"description": "dd end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
- "expected": ["</dd> foo"]
-},
-
-{"description": "dd end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
- "expected": ["</dd>foo"]
-},
-
-{"description": "dd end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</dd><foo>"]
-},
-
-{"description": "dd end-tag followed by dd start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
- "expected": ["<dd>"]
-},
-
-{"description": "dd end-tag followed by dt start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
- "expected": ["<dt>"]
-},
-
-{"description": "dd end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "dd end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "p end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
- "expected": ["</p><!--foo-->"]
-},
-
-{"description": "p end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
- "expected": ["</p> foo"]
-},
-
-{"description": "p end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
- "expected": ["</p>foo"]
-},
-
-{"description": "p end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</p><foo>"]
-},
-
-{"description": "p end-tag followed by address start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
- "expected": ["<address>"]
-},
-
-{"description": "p end-tag followed by article start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
- "expected": ["<article>"]
-},
-
-{"description": "p end-tag followed by aside start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
- "expected": ["<aside>"]
-},
-
-{"description": "p end-tag followed by blockquote start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
- "expected": ["<blockquote>"]
-},
-
-{"description": "p end-tag followed by datagrid start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
- "expected": ["<datagrid>"]
-},
-
-{"description": "p end-tag followed by dialog start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
- "expected": ["<dialog>"]
-},
-
-{"description": "p end-tag followed by dir start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
- "expected": ["<dir>"]
-},
-
-{"description": "p end-tag followed by div start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
- "expected": ["<div>"]
-},
-
-{"description": "p end-tag followed by dl start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
- "expected": ["<dl>"]
-},
-
-{"description": "p end-tag followed by fieldset start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
- "expected": ["<fieldset>"]
-},
-
-{"description": "p end-tag followed by footer start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
- "expected": ["<footer>"]
-},
-
-{"description": "p end-tag followed by form start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
- "expected": ["<form>"]
-},
-
-{"description": "p end-tag followed by h1 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
- "expected": ["<h1>"]
-},
-
-{"description": "p end-tag followed by h2 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
- "expected": ["<h2>"]
-},
-
-{"description": "p end-tag followed by h3 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
- "expected": ["<h3>"]
-},
-
-{"description": "p end-tag followed by h4 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
- "expected": ["<h4>"]
-},
-
-{"description": "p end-tag followed by h5 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
- "expected": ["<h5>"]
-},
-
-{"description": "p end-tag followed by h6 start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
- "expected": ["<h6>"]
-},
-
-{"description": "p end-tag followed by header start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
- "expected": ["<header>"]
-},
-
-{"description": "p end-tag followed by hr empty-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
- "expected": ["<hr>"]
-},
-
-{"description": "p end-tag followed by menu start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
- "expected": ["<menu>"]
-},
-
-{"description": "p end-tag followed by nav start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
- "expected": ["<nav>"]
-},
-
-{"description": "p end-tag followed by ol start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
- "expected": ["<ol>"]
-},
-
-{"description": "p end-tag followed by p start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
- "expected": ["<p>"]
-},
-
-{"description": "p end-tag followed by pre start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
- "expected": ["<pre>"]
-},
-
-{"description": "p end-tag followed by section start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
- "expected": ["<section>"]
-},
-
-{"description": "p end-tag followed by table start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
- "expected": ["<table>"]
-},
-
-{"description": "p end-tag followed by ul start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
- "expected": ["<ul>"]
-},
-
-{"description": "p end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "p end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "optgroup end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
- "expected": ["</optgroup><!--foo-->"]
-},
-
-{"description": "optgroup end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
- "expected": ["</optgroup> foo"]
-},
-
-{"description": "optgroup end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
- "expected": ["</optgroup>foo"]
-},
-
-{"description": "optgroup end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</optgroup><foo>"]
-},
-
-{"description": "optgroup end-tag followed by optgroup start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
- "expected": ["<optgroup>"]
-},
-
-{"description": "optgroup end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "optgroup end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "option end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
- "expected": ["</option><!--foo-->"]
-},
-
-{"description": "option end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
- "expected": ["</option> foo"]
-},
-
-{"description": "option end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
- "expected": ["</option>foo"]
-},
-
-{"description": "option end-tag followed by optgroup start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
- "expected": ["<optgroup>"]
-},
-
-{"description": "option end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</option><foo>"]
-},
-
-{"description": "option end-tag followed by option start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
- "expected": ["<option>"]
-},
-
-{"description": "option end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "option end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "colgroup start-tag followed by comment",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
- "expected": ["<colgroup><!--foo-->"]
-},
-
-{"description": "colgroup start-tag followed by space character",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
- "expected": ["<colgroup> foo"]
-},
-
-{"description": "colgroup start-tag followed by text",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
- "expected": ["<colgroup>foo"]
-},
-
-{"description": "colgroup start-tag followed by start-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<colgroup><foo>"]
-},
-
-{"description": "first colgroup in a table with a col child",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
- "expected": ["<table><col>"]
-},
-
-{"description": "colgroup with a col child, following another colgroup",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
- "expected": ["</colgroup><col>", "<colgroup><col>"]
-},
-
-{"description": "colgroup start-tag followed by end-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["<colgroup></foo>"]
-},
-
-{"description": "colgroup start-tag at EOF",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
- "expected": ["<colgroup>"]
-},
-
-
-
-{"description": "colgroup end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
- "expected": ["</colgroup><!--foo-->"]
-},
-
-{"description": "colgroup end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
- "expected": ["</colgroup> foo"]
-},
-
-{"description": "colgroup end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
- "expected": ["foo"]
-},
-
-{"description": "colgroup end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<foo>"]
-},
-
-{"description": "colgroup end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "colgroup end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "thead end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
- "expected": ["</thead><!--foo-->"]
-},
-
-{"description": "thead end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
- "expected": ["</thead> foo"]
-},
-
-{"description": "thead end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
- "expected": ["</thead>foo"]
-},
-
-{"description": "thead end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</thead><foo>"]
-},
-
-{"description": "thead end-tag followed by tbody start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
- "expected": ["<tbody>"]
-},
-
-{"description": "thead end-tag followed by tfoot start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
- "expected": ["<tfoot>"]
-},
-
-{"description": "thead end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</thead></foo>"]
-},
-
-{"description": "thead end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
- "expected": ["</thead>"]
-},
-
-
-
-
-{"description": "tbody start-tag followed by comment",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
- "expected": ["<tbody><!--foo-->"]
-},
-
-{"description": "tbody start-tag followed by space character",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
- "expected": ["<tbody> foo"]
-},
-
-{"description": "tbody start-tag followed by text",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
- "expected": ["<tbody>foo"]
-},
-
-{"description": "tbody start-tag followed by start-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["<tbody><foo>"]
-},
-
-{"description": "first tbody in a table with a tr child",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
- "expected": ["<table><tr>"]
-},
-
-{"description": "tbody with a tr child, following another tbody",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
- "expected": ["<tbody><tr>", "</tbody><tr>"]
-},
-
-{"description": "tbody with a tr child, following a thead",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
- "expected": ["<tbody><tr>", "</thead><tr>"]
-},
-
-{"description": "tbody with a tr child, following a tfoot",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
- "expected": ["<tbody><tr>", "</tfoot><tr>"]
-},
-
-{"description": "tbody start-tag followed by end-tag",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["<tbody></foo>"]
-},
-
-{"description": "tbody start-tag at EOF",
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
- "expected": ["<tbody>"]
-},
-
-
-
-{"description": "tbody end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
- "expected": ["</tbody><!--foo-->"]
-},
-
-{"description": "tbody end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
- "expected": ["</tbody> foo"]
-},
-
-{"description": "tbody end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
- "expected": ["</tbody>foo"]
-},
-
-{"description": "tbody end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</tbody><foo>"]
-},
-
-{"description": "tbody end-tag followed by tbody start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
- "expected": ["<tbody>", "</tbody>"]
-},
-
-{"description": "tbody end-tag followed by tfoot start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
- "expected": ["<tfoot>"]
-},
-
-{"description": "tbody end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "tbody end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "tfoot end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
- "expected": ["</tfoot><!--foo-->"]
-},
-
-{"description": "tfoot end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
- "expected": ["</tfoot> foo"]
-},
-
-{"description": "tfoot end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
- "expected": ["</tfoot>foo"]
-},
-
-{"description": "tfoot end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</tfoot><foo>"]
-},
-
-{"description": "tfoot end-tag followed by tbody start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
- "expected": ["<tbody>", "</tfoot>"]
-},
-
-{"description": "tfoot end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "tfoot end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "tr end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
- "expected": ["</tr><!--foo-->"]
-},
-
-{"description": "tr end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
- "expected": ["</tr> foo"]
-},
-
-{"description": "tr end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
- "expected": ["</tr>foo"]
-},
-
-{"description": "tr end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</tr><foo>"]
-},
-
-{"description": "tr end-tag followed by tr start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
- "expected": ["<tr>", "</tr>"]
-},
-
-{"description": "tr end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "tr end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "td end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
- "expected": ["</td><!--foo-->"]
-},
-
-{"description": "td end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
- "expected": ["</td> foo"]
-},
-
-{"description": "td end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
- "expected": ["</td>foo"]
-},
-
-{"description": "td end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</td><foo>"]
-},
-
-{"description": "td end-tag followed by td start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
- "expected": ["<td>", "</td>"]
-},
-
-{"description": "td end-tag followed by th start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
- "expected": ["<th>", "</td>"]
-},
-
-{"description": "td end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "td end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
- "expected": [""]
-},
-
-
-
-
-{"description": "th end-tag followed by comment",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
- "expected": ["</th><!--foo-->"]
-},
-
-{"description": "th end-tag followed by space character",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
- "expected": ["</th> foo"]
-},
-
-{"description": "th end-tag followed by text",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
- "expected": ["</th>foo"]
-},
-
-{"description": "th end-tag followed by start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
- "expected": ["</th><foo>"]
-},
-
-{"description": "th end-tag followed by th start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
- "expected": ["<th>", "</th>"]
-},
-
-{"description": "th end-tag followed by td start-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
- "expected": ["<td>", "</th>"]
-},
-
-{"description": "th end-tag followed by end-tag",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
- "expected": ["</foo>"]
-},
-
-{"description": "th end-tag at EOF",
- "input": [["EndTag", "http://www.w3.org/1999/xhtml"    , "th"]],
- "expected": [""]
-}
-
-]}
--- a/libs/html5lib/tests/testdata/serializer/options.test
+++ b/libs/html5lib/tests/testdata/serializer/options.test
@ -1,60 +0,0 @@
-{"tests":[
-
-{"description": "quote_char=\"'\"",
- "options": {"quote_char": "'"},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
- "expected": ["<span title='test &#39;with&#39; quote_char'>"]
-},
-
-{"description": "quote_attr_values=true",
- "options": {"quote_attr_values": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
- "expected": ["<button disabled>"],
- "xhtml":    ["<button disabled=\"disabled\">"]
-},
-
-{"description": "quote_attr_values=true with irrelevant",
- "options": {"quote_attr_values": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
- "expected": ["<div irrelevant>"],
- "xhtml":    ["<div irrelevant=\"irrelevant\">"]
-},
-
-{"description": "use_trailing_solidus=true with void element",
- "options": {"use_trailing_solidus": true},
- "input": [["EmptyTag", "img", {}]],
- "expected": ["<img />"]
-},
-
-{"description": "use_trailing_solidus=true with non-void element",
- "options": {"use_trailing_solidus": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
- "expected": ["<div>"]
-},
-
-{"description": "minimize_boolean_attributes=false",
- "options": {"minimize_boolean_attributes": false},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
- "expected": ["<div irrelevant=irrelevant>"],
- "xhtml":    ["<div irrelevant=\"irrelevant\">"]
-},
-
-{"description": "minimize_boolean_attributes=false with empty value",
- "options": {"minimize_boolean_attributes": false},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
- "expected": ["<div irrelevant=\"\">"]
-},
-
-{"description": "escape less than signs in attribute values",
- "options": {"escape_lt_in_attrs": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
- "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
-},
-
-{"description": "rcdata",
- "options": {"escape_rcdata": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
- "expected": ["<script>a&lt;b&gt;c&amp;d"]
-}
-
-]}
--- a/libs/html5lib/tests/testdata/serializer/whitespace.test
+++ b/libs/html5lib/tests/testdata/serializer/whitespace.test
@ -1,51 +0,0 @@
-{"tests": [
-
-{"description": "bare text with leading spaces",
- "options": {"strip_whitespace": true},
- "input": [["Characters", "\t\r\n\u000C foo"]],
- "expected": [" foo"]
-},
-
-{"description": "bare text with trailing spaces",
- "options": {"strip_whitespace": true},
- "input": [["Characters", "foo \t\r\n\u000C"]],
- "expected": ["foo "]
-},
-
-{"description": "bare text with inner spaces",
- "options": {"strip_whitespace": true},
- "input": [["Characters", "foo \t\r\n\u000C bar"]],
- "expected": ["foo bar"]
-},
-
-{"description": "text within <pre>",
- "options": {"strip_whitespace": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
- "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
-},
-
-{"description": "text within <pre>, with inner markup",
- "options": {"strip_whitespace": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
- "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
-},
-
-{"description": "text within <textarea>",
- "options": {"strip_whitespace": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
- "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
-},
-
-{"description": "text within <script>",
- "options": {"strip_whitespace": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
- "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
-},
-
-{"description": "text within <style>",
- "options": {"strip_whitespace": true},
- "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
- "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
-}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/README.md
+++ b/libs/html5lib/tests/testdata/tokenizer/README.md
@ -1,104 +0,0 @@
-Tokenizer tests
-===============
-
-The test format is [JSON](http://www.json.org/). This has the advantage
-that the syntax allows backward-compatible extensions to the tests and
-the disadvantage that it is relatively verbose.
-
-Basic Structure
---------------
-
-    {"tests": [
-        {"description": "Test description",
-        "input": "input_string",
-        "output": [expected_output_tokens],
-        "initialStates": [initial_states],
-        "lastStartTag": last_start_tag,
-        "ignoreErrorOrder": ignore_error_order
-        }
-    ]}
-
-Multiple tests per file are allowed simply by adding more objects to the
-"tests" list.
-
-`description`, `input` and `output` are always present. The other values
-are optional.
-
-### Test set-up
-
-`test.input` is a string containing the characters to pass to the
-tokenizer. Specifically, it represents the characters of the **input
-stream**, and so implementations are expected to perform the processing
-described in the spec's **Preprocessing the input stream** section
-before feeding the result to the tokenizer.
-
-If `test.doubleEscaped` is present and `true`, then `test.input` is not
-quite as described above. Instead, it must first be subjected to another
-round of unescaping (i.e., in addition to any unescaping involved in the
-JSON import), and the result of *that* represents the characters of the
-input stream. Currently, the only unescaping required by this option is
-to convert each sequence of the form \\uHHHH (where H is a hex digit)
-into the corresponding Unicode code point. (Note that this option also
-affects the interpretation of `test.output`.)
-
-`test.initialStates` is a list of strings, each being the name of a
-tokenizer state. The test should be run once for each string, using it
-to set the tokenizer's initial state for that run. If
-`test.initialStates` is omitted, it defaults to `["data state"]`.
-
-`test.lastStartTag` is a lowercase string that should be used as "the
-tag name of the last start tag to have been emitted from this
-tokenizer", referenced in the spec's definition of **appropriate end tag
-token**. If it is omitted, it is treated as if "no start tag has been
-emitted from this tokenizer".
-
-### Test results
-
-`test.output` is a list of tokens, ordered with the first produced by
-the tokenizer the first (leftmost) in the list. The list must mach the
-**complete** list of tokens that the tokenizer should produce. Valid
-tokens are:
-
-    ["DOCTYPE", name, public_id, system_id, correctness]
-    ["StartTag", name, {attributes}*, true*]
-    ["StartTag", name, {attributes}]
-    ["EndTag", name]
-    ["Comment", data]
-    ["Character", data]
-    "ParseError"
-
-`public_id` and `system_id` are either strings or `null`. `correctness`
-is either `true` or `false`; `true` corresponds to the force-quirks flag
-being false, and vice-versa.
-
-When the self-closing flag is set, the `StartTag` array has `true` as
-its fourth entry. When the flag is not set, the array has only three
-entries for backwards compatibility.
-
-All adjacent character tokens are coalesced into a single
-`["Character", data]` token.
-
-If `test.doubleEscaped` is present and `true`, then every string within
-`test.output` must be further unescaped (as described above) before
-comparing with the tokenizer's output.
-
-`test.ignoreErrorOrder` is a boolean value indicating that the order of
-`ParseError` tokens relative to other tokens in the output stream is
-unimportant, and implementations should ignore such differences between
-their output and `expected_output_tokens`. (This is used for errors
-emitted by the input stream preprocessing stage, since it is useful to
-test that code but it is undefined when the errors occur). If it is
-omitted, it defaults to `false`.
-
-xmlViolation tests
------------------
-
-`tokenizer/xmlViolation.test` differs from the above in a couple of
-ways:
-
-   The name of the single member of the top-level JSON object is
-    "xmlViolationTests" instead of "tests".
-   Each test's expected output assumes that implementation is applying
-    the tweaks given in the spec's "Coercing an HTML DOM into an
-    infoset" section.
-
--- a/libs/html5lib/tests/testdata/tokenizer/contentModelFlags.test
+++ b/libs/html5lib/tests/testdata/tokenizer/contentModelFlags.test
@ -1,81 +0,0 @@
-{"tests": [
-
-{"description":"PLAINTEXT content model flag",
-"initialStates":["PLAINTEXT state"],
-"lastStartTag":"plaintext",
-"input":"<head>&body;",
-"output":[["Character", "<head>&body;"]]},
-
-{"description":"End tag closing RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xMp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp ",
-"output":[["Character", "foo"], "ParseError"]},
-
-{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp",
-"output":[["Character", "foo</xmp"]]},
-
-{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp/",
-"output":[["Character", "foo"], "ParseError"]},
-
-{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp<",
-"output":[["Character", "foo</xmp<"]]},
-
-{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmp>",
-"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
-
-{"description":"Partial end tags leading straight into partial end tags",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"</xmp</xmp</xmp>",
-"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
-
-{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmpaar>",
-"output":[["Character", "</foo>bar</xmpaar>"]]},
-
-{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo</xmp></baz>",
-"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
-
-{"description":"RAWTEXT w/ something looking like an entity",
-"initialStates":["RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"&foo;",
-"output":[["Character", "&foo;"]]},
-
-{"description":"RCDATA w/ an entity",
-"initialStates":["RCDATA state"],
-"lastStartTag":"textarea",
-"input":"&lt;",
-"output":[["Character", "<"]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/domjs.test
+++ b/libs/html5lib/tests/testdata/tokenizer/domjs.test
@ -1,96 +0,0 @@
-{
-    "tests": [
-        {
-            "description":"CR in bogus comment state",
-            "input":"<?\u000d",
-            "output":["ParseError", ["Comment", "?\u000a"]]
-        },
-        {
-            "description":"CRLF in bogus comment state",
-            "input":"<?\u000d\u000a",
-            "output":["ParseError", ["Comment", "?\u000a"]]
-        },
-        {
-            "description":"CRLFLF in bogus comment state",
-            "input":"<?\u000d\u000a\u000a",
-            "output":["ParseError", ["Comment", "?\u000a\u000a"]]
-        },
-        {
-            "description":"NUL in RCDATA and RAWTEXT",
-            "doubleEscaped":true,
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "input":"\\u0000",
-            "output":["ParseError", ["Character", "\\uFFFD"]]
-        },
-        {
-            "description":"leading U+FEFF must pass through",
-            "doubleEscaped":true,
-            "input":"\\uFEFFfoo\\uFEFFbar",
-            "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
-        },
-        {
-            "description":"Non BMP-charref in in RCDATA",
-            "initialStates":["RCDATA state"],
-            "input":"&NotEqualTilde;",
-            "output":[["Character", "\u2242\u0338"]]
-        },
-        {
-            "description":"Bad charref in in RCDATA",
-            "initialStates":["RCDATA state"],
-            "input":"&NotEqualTild;",
-            "output":["ParseError", ["Character", "&NotEqualTild;"]]
-        },
-        {
-            "description":"lowercase endtags in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "lastStartTag":"xmp",
-            "input":"</XMP>",
-            "output":[["EndTag","xmp"]]
-        },
-        {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "lastStartTag":"xmp",
-            "input":"</ XMP>",
-            "output":[["Character","</ XMP>"]]
-        },
-        {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "lastStartTag":"xmp",
-            "input":"</xm>",
-            "output":[["Character","</xm>"]]
-        },
-        {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "lastStartTag":"xmp",
-            "input":"</xm ",
-            "output":[["Character","</xm "]]
-        },
-        {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
-            "lastStartTag":"xmp",
-            "input":"</xm/",
-            "output":[["Character","</xm/"]]
-        },
-        {
-            "description":"Non BMP-charref in attribute",
-            "input":"<p id=\"&NotEqualTilde;\">",
-            "output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
-        },
-        {
-            "description":"--!NUL in comment ",
-            "doubleEscaped":true,
-            "input":"<!----!\\u0000-->",
-            "output":["ParseError", "ParseError", ["Comment", "--!\\uFFFD"]]
-        },
-        {
-            "description":"space EOF after doctype ",
-            "input":"<!DOCTYPE html ",
-            "output":["ParseError", ["DOCTYPE", "html", null, null , false]]
-        }
-
-    ]
-}
--- a/libs/html5lib/tests/testdata/tokenizer/entities.test
+++ b/libs/html5lib/tests/testdata/tokenizer/entities.test
@ -1,283 +0,0 @@
-{"tests": [
-
-{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
-"input":"<h a='&noti;'>",
-"output": [["StartTag", "h", {"a": "&noti;"}]]},
-
-{"description": "Entity name followed by the equals sign in an attribute value.",
-"input":"<h a='&lang='>",
-"output": [["StartTag", "h", {"a": "&lang="}]]},
-
-{"description": "CR as numeric entity",
-"input":"&#013;",
-"output": ["ParseError", ["Character", "\r"]]},
-
-{"description": "CR as hexadecimal numeric entity",
-"input":"&#x00D;",
-"output": ["ParseError", ["Character", "\r"]]},
-
-{"description": "Windows-1252 EURO SIGN numeric entity.",
-"input":"&#0128;",
-"output": ["ParseError", ["Character", "\u20AC"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
-"input":"&#0129;",
-"output": ["ParseError", ["Character", "\u0081"]]},
-
-{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
-"input":"&#0130;",
-"output": ["ParseError", ["Character", "\u201A"]]},
-
-{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
-"input":"&#0131;",
-"output": ["ParseError", ["Character", "\u0192"]]},
-
-{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
-"input":"&#0132;",
-"output": ["ParseError", ["Character", "\u201E"]]},
-
-{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
-"input":"&#0133;",
-"output": ["ParseError", ["Character", "\u2026"]]},
-
-{"description": "Windows-1252 DAGGER numeric entity.",
-"input":"&#0134;",
-"output": ["ParseError", ["Character", "\u2020"]]},
-
-{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
-"input":"&#0135;",
-"output": ["ParseError", ["Character", "\u2021"]]},
-
-{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
-"input":"&#0136;",
-"output": ["ParseError", ["Character", "\u02C6"]]},
-
-{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
-"input":"&#0137;",
-"output": ["ParseError", ["Character", "\u2030"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
-"input":"&#0138;",
-"output": ["ParseError", ["Character", "\u0160"]]},
-
-{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
-"input":"&#0139;",
-"output": ["ParseError", ["Character", "\u2039"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
-"input":"&#0140;",
-"output": ["ParseError", ["Character", "\u0152"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
-"input":"&#0141;",
-"output": ["ParseError", ["Character", "\u008D"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
-"input":"&#0142;",
-"output": ["ParseError", ["Character", "\u017D"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
-"input":"&#0143;",
-"output": ["ParseError", ["Character", "\u008F"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
-"input":"&#0144;",
-"output": ["ParseError", ["Character", "\u0090"]]},
-
-{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
-"input":"&#0145;",
-"output": ["ParseError", ["Character", "\u2018"]]},
-
-{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
-"input":"&#0146;",
-"output": ["ParseError", ["Character", "\u2019"]]},
-
-{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
-"input":"&#0147;",
-"output": ["ParseError", ["Character", "\u201C"]]},
-
-{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
-"input":"&#0148;",
-"output": ["ParseError", ["Character", "\u201D"]]},
-
-{"description": "Windows-1252 BULLET numeric entity.",
-"input":"&#0149;",
-"output": ["ParseError", ["Character", "\u2022"]]},
-
-{"description": "Windows-1252 EN DASH numeric entity.",
-"input":"&#0150;",
-"output": ["ParseError", ["Character", "\u2013"]]},
-
-{"description": "Windows-1252 EM DASH numeric entity.",
-"input":"&#0151;",
-"output": ["ParseError", ["Character", "\u2014"]]},
-
-{"description": "Windows-1252 SMALL TILDE numeric entity.",
-"input":"&#0152;",
-"output": ["ParseError", ["Character", "\u02DC"]]},
-
-{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
-"input":"&#0153;",
-"output": ["ParseError", ["Character", "\u2122"]]},
-
-{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
-"input":"&#0154;",
-"output": ["ParseError", ["Character", "\u0161"]]},
-
-{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
-"input":"&#0155;",
-"output": ["ParseError", ["Character", "\u203A"]]},
-
-{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
-"input":"&#0156;",
-"output": ["ParseError", ["Character", "\u0153"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
-"input":"&#0157;",
-"output": ["ParseError", ["Character", "\u009D"]]},
-
-{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
-"input":"&#x080;",
-"output": ["ParseError", ["Character", "\u20AC"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
-"input":"&#x081;",
-"output": ["ParseError", ["Character", "\u0081"]]},
-
-{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x082;",
-"output": ["ParseError", ["Character", "\u201A"]]},
-
-{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
-"input":"&#x083;",
-"output": ["ParseError", ["Character", "\u0192"]]},
-
-{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x084;",
-"output": ["ParseError", ["Character", "\u201E"]]},
-
-{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
-"input":"&#x085;",
-"output": ["ParseError", ["Character", "\u2026"]]},
-
-{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
-"input":"&#x086;",
-"output": ["ParseError", ["Character", "\u2020"]]},
-
-{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
-"input":"&#x087;",
-"output": ["ParseError", ["Character", "\u2021"]]},
-
-{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
-"input":"&#x088;",
-"output": ["ParseError", ["Character", "\u02C6"]]},
-
-{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
-"input":"&#x089;",
-"output": ["ParseError", ["Character", "\u2030"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
-"input":"&#x08A;",
-"output": ["ParseError", ["Character", "\u0160"]]},
-
-{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x08B;",
-"output": ["ParseError", ["Character", "\u2039"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
-"input":"&#x08C;",
-"output": ["ParseError", ["Character", "\u0152"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
-"input":"&#x08D;",
-"output": ["ParseError", ["Character", "\u008D"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
-"input":"&#x08E;",
-"output": ["ParseError", ["Character", "\u017D"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
-"input":"&#x08F;",
-"output": ["ParseError", ["Character", "\u008F"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
-"input":"&#x090;",
-"output": ["ParseError", ["Character", "\u0090"]]},
-
-{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x091;",
-"output": ["ParseError", ["Character", "\u2018"]]},
-
-{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x092;",
-"output": ["ParseError", ["Character", "\u2019"]]},
-
-{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x093;",
-"output": ["ParseError", ["Character", "\u201C"]]},
-
-{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x094;",
-"output": ["ParseError", ["Character", "\u201D"]]},
-
-{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
-"input":"&#x095;",
-"output": ["ParseError", ["Character", "\u2022"]]},
-
-{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
-"input":"&#x096;",
-"output": ["ParseError", ["Character", "\u2013"]]},
-
-{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
-"input":"&#x097;",
-"output": ["ParseError", ["Character", "\u2014"]]},
-
-{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
-"input":"&#x098;",
-"output": ["ParseError", ["Character", "\u02DC"]]},
-
-{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
-"input":"&#x099;",
-"output": ["ParseError", ["Character", "\u2122"]]},
-
-{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
-"input":"&#x09A;",
-"output": ["ParseError", ["Character", "\u0161"]]},
-
-{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
-"input":"&#x09B;",
-"output": ["ParseError", ["Character", "\u203A"]]},
-
-{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
-"input":"&#x09C;",
-"output": ["ParseError", ["Character", "\u0153"]]},
-
-{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
-"input":"&#x09D;",
-"output": ["ParseError", ["Character", "\u009D"]]},
-
-{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
-"input":"&#x09E;",
-"output": ["ParseError", ["Character", "\u017E"]]},
-
-{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
-"input":"&#x09F;",
-"output": ["ParseError", ["Character", "\u0178"]]},
-
-{"description": "Decimal numeric entity followed by hex character a.",
-"input":"&#97a",
-"output": ["ParseError", ["Character", "aa"]]},
-
-{"description": "Decimal numeric entity followed by hex character A.",
-"input":"&#97A",
-"output": ["ParseError", ["Character", "aA"]]},
-
-{"description": "Decimal numeric entity followed by hex character f.",
-"input":"&#97f",
-"output": ["ParseError", ["Character", "af"]]},
-
-{"description": "Decimal numeric entity followed by hex character A.",
-"input":"&#97F",
-"output": ["ParseError", ["Character", "aF"]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/escapeFlag.test
+++ b/libs/html5lib/tests/testdata/tokenizer/escapeFlag.test
@ -1,33 +0,0 @@
-{"tests": [
-
-{"description":"Commented close tag in RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo<!--</xmp>--></xmp>",
-"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
-
-{"description":"Bogus comment in RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo<!-->baz</xmp>",
-"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
-
-{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo<!--></xmp><!-->baz</xmp>",
-"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
-
-{"description":"Commented entities in RCDATA",
-"initialStates":["RCDATA state"],
-"lastStartTag":"xmp",
-"input":" &amp; <!-- &amp; --> &amp; </xmp>",
-"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
-
-{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
-"initialStates":["RCDATA state", "RAWTEXT state"],
-"lastStartTag":"xmp",
-"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
-"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/namedEntities.test
+++ b/libs/html5lib/tests/testdata/tokenizer/namedEntities.test
--- a/libs/html5lib/tests/testdata/tokenizer/numericEntities.test
+++ b/libs/html5lib/tests/testdata/tokenizer/numericEntities.test
--- a/libs/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
+++ b/libs/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
@ -1,7 +0,0 @@
-{"tests": [
-
-{"description":"<!---- >",
-"input":"<!---- >",
-"output":["ParseError", "ParseError", ["Comment","-- >"]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/test1.test
+++ b/libs/html5lib/tests/testdata/tokenizer/test1.test
@ -1,196 +0,0 @@
-{"tests": [
-
-{"description":"Correct Doctype lowercase",
-"input":"<!DOCTYPE html>",
-"output":[["DOCTYPE", "html", null, null, true]]},
-
-{"description":"Correct Doctype uppercase",
-"input":"<!DOCTYPE HTML>",
-"output":[["DOCTYPE", "html", null, null, true]]},
-
-{"description":"Correct Doctype mixed case",
-"input":"<!DOCTYPE HtMl>", 
-"output":[["DOCTYPE", "html", null, null, true]]},
-
-{"description":"Correct Doctype case with EOF",
-"input":"<!DOCTYPE HtMl", 
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Truncated doctype start",
-"input":"<!DOC>", 
-"output":["ParseError", ["Comment", "DOC"]]},
-
-{"description":"Doctype in error",
-"input":"<!DOCTYPE foo>", 
-"output":[["DOCTYPE", "foo", null, null, true]]},
-
-{"description":"Single Start Tag",
-"input":"<h>",
-"output":[["StartTag", "h", {}]]},
-
-{"description":"Empty end tag",
-"input":"</>",
-"output":["ParseError"]},
-
-{"description":"Empty start tag",
-"input":"<>",
-"output":["ParseError", ["Character", "<>"]]},
-
-{"description":"Start Tag w/attribute",
-"input":"<h a='b'>",
-"output":[["StartTag", "h", {"a":"b"}]]},
-
-{"description":"Start Tag w/attribute no quotes",
-"input":"<h a=b>",
-"output":[["StartTag", "h", {"a":"b"}]]},
-
-{"description":"Start/End Tag",
-"input":"<h></h>",
-"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
-
-{"description":"Two unclosed start tags",
-"input":"<p>One<p>Two",
-"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
-
-{"description":"End Tag w/attribute",
-"input":"<h></h a='b'>",
-"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
-
-{"description":"Multiple atts",
-"input":"<h a='b' c='d'>",
-"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
-
-{"description":"Multiple atts no space",
-"input":"<h a='b'c='d'>",
-"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
-
-{"description":"Repeated attr",
- "input":"<h a='b' a='d'>",
- "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
-
-{"description":"Simple comment",
- "input":"<!--comment-->",
- "output":[["Comment", "comment"]]},
-
-{"description":"Comment, Central dash no space",
- "input":"<!----->",
- "output":["ParseError", ["Comment", "-"]]},
-
-{"description":"Comment, two central dashes",
-"input":"<!-- --comment -->",
-"output":["ParseError", ["Comment", " --comment "]]},
-
-{"description":"Unfinished comment",
-"input":"<!--comment",
-"output":["ParseError", ["Comment", "comment"]]},
-
-{"description":"Start of a comment",
-"input":"<!-",
-"output":["ParseError", ["Comment", "-"]]},
-
-{"description":"Short comment",
- "input":"<!-->",
- "output":["ParseError", ["Comment", ""]]},
-
-{"description":"Short comment two",
- "input":"<!--->",
- "output":["ParseError", ["Comment", ""]]},
-
-{"description":"Short comment three",
- "input":"<!---->",
- "output":[["Comment", ""]]},
-
-
-{"description":"Ampersand EOF",
-"input":"&",
-"output":[["Character", "&"]]},
-
-{"description":"Ampersand ampersand EOF",
-"input":"&&",
-"output":[["Character", "&&"]]},
-
-{"description":"Ampersand space EOF",
-"input":"& ",
-"output":[["Character", "& "]]},
-
-{"description":"Unfinished entity",
-"input":"&f",
-"output":[["Character", "&f"]]},
-
-{"description":"Ampersand, number sign",
-"input":"&#",
-"output":["ParseError", ["Character", "&#"]]},
-
-{"description":"Unfinished numeric entity",
-"input":"&#x",
-"output":["ParseError", ["Character", "&#x"]]},
-
-{"description":"Entity with trailing semicolon (1)",
-"input":"I'm &not;it",
-"output":[["Character","I'm \u00ACit"]]},
-
-{"description":"Entity with trailing semicolon (2)",
-"input":"I'm &notin;",
-"output":[["Character","I'm \u2209"]]},
-
-{"description":"Entity without trailing semicolon (1)",
-"input":"I'm &notit",
-"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
-
-{"description":"Entity without trailing semicolon (2)",
-"input":"I'm &notin",
-"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
-
-{"description":"Partial entity match at end of file",
-"input":"I'm &no",
-"output":[["Character","I'm &no"]]},
-
-{"description":"Non-ASCII character reference name",
-"input":"&\u00AC;",
-"output":[["Character", "&\u00AC;"]]},
-
-{"description":"ASCII decimal entity",
-"input":"&#0036;",
-"output":[["Character","$"]]},
-
-{"description":"ASCII hexadecimal entity",
-"input":"&#x3f;",
-"output":[["Character","?"]]},
-
-{"description":"Hexadecimal entity in attribute",
-"input":"<h a='&#x3f;'></h>",
-"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
-
-{"description":"Entity in attribute without semicolon ending in x",
-"input":"<h a='&notx'>",
-"output":[["StartTag", "h", {"a":"&notx"}]]},
-
-{"description":"Entity in attribute without semicolon ending in 1",
-"input":"<h a='&not1'>",
-"output":[["StartTag", "h", {"a":"&not1"}]]},
-
-{"description":"Entity in attribute without semicolon ending in i",
-"input":"<h a='&noti'>",
-"output":[["StartTag", "h", {"a":"&noti"}]]},
-
-{"description":"Entity in attribute without semicolon",
-"input":"<h a='&COPY'>",
-"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
-
-{"description":"Unquoted attribute ending in ampersand",
-"input":"<s o=& t>",
-"output":[["StartTag","s",{"o":"&","t":""}]]},
-
-{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
-"input":"<a a=a&>foo",
-"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
-
-{"description":"plaintext element",
- "input":"<plaintext>foobar",
- "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
-
-{"description":"Open angled bracket in unquoted attribute value state",
- "input":"<a a=f<>",
- "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/test2.test
+++ b/libs/html5lib/tests/testdata/tokenizer/test2.test
@ -1,179 +0,0 @@
-{"tests": [
-
-{"description":"DOCTYPE without name",
-"input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]},
-
-{"description":"DOCTYPE without space before name",
-"input":"<!DOCTYPEhtml>",
-"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
-
-{"description":"Incorrect DOCTYPE without a space before name",
-"input":"<!DOCTYPEfoo>",
-"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
-
-{"description":"DOCTYPE with publicId",
-"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
-
-{"description":"DOCTYPE with EOF after PUBLIC",
-"input":"<!DOCTYPE html PUBLIC",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"DOCTYPE with EOF after PUBLIC '",
-"input":"<!DOCTYPE html PUBLIC '",
-"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
-
-{"description":"DOCTYPE with EOF after PUBLIC 'x",
-"input":"<!DOCTYPE html PUBLIC 'x",
-"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
-
-{"description":"DOCTYPE with systemId",
-"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
-
-{"description":"DOCTYPE with publicId and systemId",
-"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
-
-{"description":"DOCTYPE with > in double-quoted publicId",
-"input":"<!DOCTYPE html PUBLIC \">x",
-"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
-
-{"description":"DOCTYPE with > in single-quoted publicId",
-"input":"<!DOCTYPE html PUBLIC '>x",
-"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
-
-{"description":"DOCTYPE with > in double-quoted systemId",
-"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
-"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
-
-{"description":"DOCTYPE with > in single-quoted systemId",
-"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
-"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
-
-{"description":"Incomplete doctype",
-"input":"<!DOCTYPE html ",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Numeric entity representing the NUL character",
-"input":"&#0000;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Hexadecimal entity representing the NUL character",
-"input":"&#x0000;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
-"input":"&#2225222;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
-"input":"&#x1010FFFF;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Hexadecimal entity pair representing a surrogate pair",
-"input":"&#xD869;&#xDED6;",
-"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Hexadecimal entity with mixed uppercase and lowercase",
-"input":"&#xaBcD;",
-"output":[["Character", "\uABCD"]]},
-
-{"description":"Entity without a name",
-"input":"&;",
-"output":[["Character", "&;"]]},
-
-{"description":"Unescaped ampersand in attribute value",
-"input":"<h a='&'>",
-"output":[["StartTag", "h", { "a":"&" }]]},
-
-{"description":"StartTag containing <",
-"input":"<a<b>",
-"output":[["StartTag", "a<b", { }]]},
-
-{"description":"Non-void element containing trailing /",
-"input":"<h/>",
-"output":[["StartTag","h",{},true]]},
-
-{"description":"Void element with permitted slash",
-"input":"<br/>",
-"output":[["StartTag","br",{},true]]},
-
-{"description":"Void element with permitted slash (with attribute)",
-"input":"<br foo='bar'/>",
-"output":[["StartTag","br",{"foo":"bar"},true]]},
-
-{"description":"StartTag containing /",
-"input":"<h/a='b'>",
-"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
-
-{"description":"Double-quoted attribute value",
-"input":"<h a=\"b\">",
-"output":[["StartTag", "h", { "a":"b" }]]},
-
-{"description":"Unescaped </",
-"input":"</",
-"output":["ParseError", ["Character", "</"]]},
-
-{"description":"Illegal end tag name",
-"input":"</1>",
-"output":["ParseError", ["Comment", "1"]]},
-
-{"description":"Simili processing instruction",
-"input":"<?namespace>",
-"output":["ParseError", ["Comment", "?namespace"]]},
-
-{"description":"A bogus comment stops at >, even if preceeded by two dashes",
-"input":"<?foo-->",
-"output":["ParseError", ["Comment", "?foo--"]]},
-
-{"description":"Unescaped <",
-"input":"foo < bar",
-"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
-
-{"description":"Null Byte Replacement",
-"input":"\u0000",
-"output":["ParseError", ["Character", "\u0000"]]},
-
-{"description":"Comment with dash",
-"input":"<!---x",
-"output":["ParseError", ["Comment", "-x"]]},
-
-{"description":"Entity + newline",
-"input":"\nx\n&gt;\n",
-"output":[["Character","\nx\n>\n"]]},
-
-{"description":"Start tag with no attributes but space before the greater-than sign",
-"input":"<h >",
-"output":[["StartTag", "h", {}]]},
-
-{"description":"Empty attribute followed by uppercase attribute",
-"input":"<h a B=''>",
-"output":[["StartTag", "h", {"a":"", "b":""}]]},
-
-{"description":"Double-quote after attribute name",
-"input":"<h a \">",
-"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
-
-{"description":"Single-quote after attribute name",
-"input":"<h a '>",
-"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
-
-{"description":"Empty end tag with following characters",
-"input":"a</>bc",
-"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
-
-{"description":"Empty end tag with following tag",
-"input":"a</><b>c",
-"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
-
-{"description":"Empty end tag with following comment",
-"input":"a</><!--b-->c",
-"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
-
-{"description":"Empty end tag with following end tag",
-"input":"a</></b>c",
-"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/test3.test
+++ b/libs/html5lib/tests/testdata/tokenizer/test3.test
--- a/libs/html5lib/tests/testdata/tokenizer/test4.test
+++ b/libs/html5lib/tests/testdata/tokenizer/test4.test
@ -1,344 +0,0 @@
-{"tests": [
-
-{"description":"< in attribute name",
-"input":"<z/0  <>",
-"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
-
-{"description":"< in attribute value",
-"input":"<z x=<>",
-"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
-
-{"description":"= in unquoted attribute value",
-"input":"<z z=z=z>",
-"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
-
-{"description":"= attribute",
-"input":"<z =>",
-"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
-
-{"description":"== attribute",
-"input":"<z ==>",
-"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
-
-{"description":"=== attribute",
-"input":"<z ===>",
-"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
-
-{"description":"==== attribute",
-"input":"<z ====>",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
-
-{"description":"\" after ampersand in double-quoted attribute value",
-"input":"<z z=\"&\">",
-"output":[["StartTag", "z", {"z": "&"}]]},
-
-{"description":"' after ampersand in double-quoted attribute value",
-"input":"<z z=\"&'\">",
-"output":[["StartTag", "z", {"z": "&'"}]]},
-
-{"description":"' after ampersand in single-quoted attribute value",
-"input":"<z z='&'>",
-"output":[["StartTag", "z", {"z": "&"}]]},
-
-{"description":"\" after ampersand in single-quoted attribute value",
-"input":"<z z='&\"'>",
-"output":[["StartTag", "z", {"z": "&\""}]]},
-
-{"description":"Text after bogus character reference",
-"input":"<z z='&xlink_xmlns;'>bar<z>",
-"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
-
-{"description":"Text after hex character reference",
-"input":"<z z='&#x0020; foo'>bar<z>",
-"output":[["StartTag","z",{"z":"  foo"}],["Character","bar"],["StartTag","z",{}]]},
-
-{"description":"Attribute name starting with \"",
-"input":"<foo \"='bar'>",
-"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
-
-{"description":"Attribute name starting with '",
-"input":"<foo '='bar'>",
-"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
-
-{"description":"Attribute name containing \"",
-"input":"<foo a\"b='bar'>",
-"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
-
-{"description":"Attribute name containing '",
-"input":"<foo a'b='bar'>",
-"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
-
-{"description":"Unquoted attribute value containing '",
-"input":"<foo a=b'c>",
-"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
-
-{"description":"Unquoted attribute value containing \"",
-"input":"<foo a=b\"c>",
-"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
-
-{"description":"Double-quoted attribute value not followed by whitespace",
-"input":"<foo a=\"b\"c>",
-"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
-
-{"description":"Single-quoted attribute value not followed by whitespace",
-"input":"<foo a='b'c>",
-"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
-
-{"description":"Quoted attribute followed by permitted /",
-"input":"<br a='b'/>",
-"output":[["StartTag","br",{"a":"b"},true]]},
-
-{"description":"Quoted attribute followed by non-permitted /",
-"input":"<bar a='b'/>",
-"output":[["StartTag","bar",{"a":"b"},true]]},
-
-{"description":"CR EOF after doctype name",
-"input":"<!doctype html \r",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"CR EOF in tag name",
-"input":"<z\r",
-"output":["ParseError"]},
-
-{"description":"Slash EOF in tag name",
-"input":"<z/",
-"output":["ParseError"]},
-
-{"description":"Zero hex numeric entity",
-"input":"&#x0",
-"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Zero decimal numeric entity",
-"input":"&#0",
-"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Zero-prefixed hex numeric entity",
-"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
-"output":[["Character", "A"]]},
-
-{"description":"Zero-prefixed decimal numeric entity",
-"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
-"output":[["Character", "A"]]},
-
-{"description":"Empty hex numeric entities",
-"input":"&#x &#X ",
-"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
-
-{"description":"Empty decimal numeric entities",
-"input":"&# &#; ",
-"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
-
-{"description":"Non-BMP numeric entity",
-"input":"&#x10000;",
-"output":[["Character", "\uD800\uDC00"]]},
-
-{"description":"Maximum non-BMP numeric entity",
-"input":"&#X10FFFF;",
-"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
-
-{"description":"Above maximum numeric entity",
-"input":"&#x110000;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"32-bit hex numeric entity",
-"input":"&#x80000041;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"33-bit hex numeric entity",
-"input":"&#x100000041;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"33-bit decimal numeric entity",
-"input":"&#4294967361;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"65-bit hex numeric entity",
-"input":"&#x10000000000000041;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"65-bit decimal numeric entity",
-"input":"&#18446744073709551681;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
-{"description":"Surrogate code point edge cases",
-"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
-"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
-
-{"description":"Uppercase start tag name",
-"input":"<X>",
-"output":[["StartTag", "x", {}]]},
-
-{"description":"Uppercase end tag name",
-"input":"</X>",
-"output":[["EndTag", "x"]]},
-
-{"description":"Uppercase attribute name",
-"input":"<x X>",
-"output":[["StartTag", "x", { "x":"" }]]},
-
-{"description":"Tag/attribute name case edge values",
-"input":"<x@AZ[`az{ @AZ[`az{>",
-"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
-
-{"description":"Duplicate different-case attributes",
-"input":"<x x=1 x=2 X=3>",
-"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
-
-{"description":"Uppercase close tag attributes",
-"input":"</x X>",
-"output":["ParseError", ["EndTag", "x"]]},
-
-{"description":"Duplicate close tag attributes",
-"input":"</x x x>",
-"output":["ParseError", "ParseError", ["EndTag", "x"]]},
-
-{"description":"Permitted slash",
-"input":"<br/>",
-"output":[["StartTag","br",{},true]]},
-
-{"description":"Non-permitted slash",
-"input":"<xr/>",
-"output":[["StartTag","xr",{},true]]},
-
-{"description":"Permitted slash but in close tag",
-"input":"</br/>",
-"output":["ParseError", ["EndTag", "br"]]},
-
-{"description":"Doctype public case-sensitivity (1)",
-"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
-"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
-
-{"description":"Doctype public case-sensitivity (2)",
-"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
-"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
-
-{"description":"Doctype system case-sensitivity (1)",
-"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
-"output":[["DOCTYPE", "html", null, "XyZ", true]]},
-
-{"description":"Doctype system case-sensitivity (2)",
-"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
-"output":[["DOCTYPE", "html", null, "xYz", true]]},
-
-{"description":"U+0000 in lookahead region after non-matching character",
-"input":"<!doc>\u0000",
-"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
-"ignoreErrorOrder":true},
-
-{"description":"U+0000 in lookahead region",
-"input":"<!doc\u0000",
-"output":["ParseError", ["Comment", "doc\uFFFD"]],
-"ignoreErrorOrder":true},
-
-{"description":"U+0080 in lookahead region",
-"input":"<!doc\u0080",
-"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
-"ignoreErrorOrder":true},
-
-{"description":"U+FDD1 in lookahead region",
-"input":"<!doc\uFDD1",
-"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
-"ignoreErrorOrder":true},
-
-{"description":"U+1FFFF in lookahead region",
-"input":"<!doc\uD83F\uDFFF",
-"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
-"ignoreErrorOrder":true},
-
-{"description":"CR followed by non-LF",
-"input":"\r?",
-"output":[["Character", "\n?"]]},
-
-{"description":"CR at EOF",
-"input":"\r",
-"output":[["Character", "\n"]]},
-
-{"description":"LF at EOF",
-"input":"\n",
-"output":[["Character", "\n"]]},
-
-{"description":"CR LF",
-"input":"\r\n",
-"output":[["Character", "\n"]]},
-
-{"description":"CR CR",
-"input":"\r\r",
-"output":[["Character", "\n\n"]]},
-
-{"description":"LF LF",
-"input":"\n\n",
-"output":[["Character", "\n\n"]]},
-
-{"description":"LF CR",
-"input":"\n\r",
-"output":[["Character", "\n\n"]]},
-
-{"description":"text CR CR CR text",
-"input":"text\r\r\rtext",
-"output":[["Character", "text\n\n\ntext"]]},
-
-{"description":"Doctype publik",
-"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Doctype publi",
-"input":"<!DOCTYPE html PUBLI",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Doctype sistem",
-"input":"<!DOCTYPE html SISTEM \"AbC\">",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Doctype sys",
-"input":"<!DOCTYPE html SYS",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
-
-{"description":"Doctype html x>text",
-"input":"<!DOCTYPE html x>text",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
-
-{"description":"Grave accent in unquoted attribute",
-"input":"<a a=aa`>",
-"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
-
-{"description":"EOF in tag name state ",
-"input":"<a",
-"output":["ParseError"]},
-
-{"description":"EOF in tag name state",
-"input":"<a",
-"output":["ParseError"]},
-
-{"description":"EOF in before attribute name state",
-"input":"<a ",
-"output":["ParseError"]},
-
-{"description":"EOF in attribute name state",
-"input":"<a a",
-"output":["ParseError"]},
-
-{"description":"EOF in after attribute name state",
-"input":"<a a ",
-"output":["ParseError"]},
-
-{"description":"EOF in before attribute value state",
-"input":"<a a =",
-"output":["ParseError"]},
-
-{"description":"EOF in attribute value (double quoted) state",
-"input":"<a a =\"a",
-"output":["ParseError"]},
-
-{"description":"EOF in attribute value (single quoted) state",
-"input":"<a a ='a",
-"output":["ParseError"]},
-
-{"description":"EOF in attribute value (unquoted) state",
-"input":"<a a =a",
-"output":["ParseError"]},
-
-{"description":"EOF in after attribute value state",
-"input":"<a a ='a'",
-"output":["ParseError"]}
-
-]}
--- a/libs/html5lib/tests/testdata/tokenizer/unicodeChars.test
+++ b/libs/html5lib/tests/testdata/tokenizer/unicodeChars.test
--- a/libs/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
+++ b/libs/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
@ -1,31 +0,0 @@
-{"tests" : [
-{"description": "Invalid Unicode character U+DFFF",
-"doubleEscaped":true,
-"input": "\\uDFFF",
-"output":["ParseError", ["Character", "\\uDFFF"]],
-"ignoreErrorOrder":true},
-
-{"description": "Invalid Unicode character U+D800",
-"doubleEscaped":true,
-"input": "\\uD800",
-"output":["ParseError", ["Character", "\\uD800"]],
-"ignoreErrorOrder":true},
-
-{"description": "Invalid Unicode character U+DFFF with valid preceding character",
-"doubleEscaped":true,
-"input": "a\\uDFFF",
-"output":[["Character", "a"], "ParseError", ["Character", "\\uDFFF"]],
-"ignoreErrorOrder":true},
-
-{"description": "Invalid Unicode character U+D800 with valid following character",
-"doubleEscaped":true,
-"input": "\\uD800a",
-"output":["ParseError", ["Character", "\\uD800a"]],
-"ignoreErrorOrder":true},
-
-{"description":"CR followed by U+0000",
-"input":"\r\u0000",
-"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
-"ignoreErrorOrder":true}
-]
-}
--- a/libs/html5lib/tests/testdata/tokenizer/xmlViolation.test
+++ b/libs/html5lib/tests/testdata/tokenizer/xmlViolation.test
@ -1,22 +0,0 @@
-{"xmlViolationTests": [
-
-{"description":"Non-XML character",
-"input":"a\uFFFFb",
-"ignoreErrorOrder":true,
-"output":["ParseError",["Character","a\uFFFDb"]]},
-
-{"description":"Non-XML space",
-"input":"a\u000Cb",
-"ignoreErrorOrder":true,
-"output":[["Character","a b"]]},
-
-{"description":"Double hyphen in comment",
-"input":"<!-- foo -- bar -->",
-"output":["ParseError",["Comment"," foo - - bar "]]},
-
-{"description":"FF between attributes",
-"input":"<a b=''\u000Cc=''>",
-"output":[["StartTag","a",{"b":"","c":""}]]}
-]}
-
-
--- a/libs/html5lib/tests/testdata/tree-construction/README.md
+++ b/libs/html5lib/tests/testdata/tree-construction/README.md
@ -1,104 +0,0 @@
-Tree Construction Tests
-=======================
-
-Each file containing tree construction tests consists of any number of
-tests separated by two newlines (LF) and a single newline before the end
-of the file. For instance:
-
-    [TEST]LF
-    LF
-    [TEST]LF
-    LF
-    [TEST]LF
-
-Where [TEST] is the following format:
-
-Each test must begin with a string "\#data" followed by a newline (LF).
-All subsequent lines until a line that says "\#errors" are the test data
-and must be passed to the system being tested unchanged, except with the
-final newline (on the last line) removed.
-
-Then there must be a line that says "\#errors". It must be followed by
-one line per parse error that a conformant checker would return. It
-doesn't matter what those lines are, although they can't be
-"\#document-fragment", "\#document", "\#script-off", "\#script-on", or
-empty, the only thing that matters is that there be the right number
-of parse errors.
-
-Then there \*may\* be a line that says "\#document-fragment", which must
-be followed by a newline (LF), followed by a string of characters that
-indicates the context element, followed by a newline (LF). If the string 
-of characters starts with "svg ", the context element is in the SVG
-namespace and the substring after "svg " is the local name. If the
-string of characters starts with "math ", the context element is in the
-MathML namespace and the substring after "math " is the local name.
-Otherwise, the context element is in the HTML namespace and the string
-is the local name. If this line is present the "\#data" must be parsed
-using the HTML fragment parsing algorithm with the context element as
-context.
-
-Then there \*may\* be a line that says "\#script-off" or
-"\#script-on". If a line that says "\#script-off" is present, the
-parser must set the scripting flag to disabled. If a line that says
-"\#script-on" is present, it must set it to enabled. Otherwise, the
-test should be run in both modes.
-
-Then there must be a line that says "\#document", which must be followed
-by a dump of the tree of the parsed DOM. Each node must be represented
-by a single line. Each line must start with "| ", followed by two spaces
-per parent node that the node has before the root document node.
-
-   Element nodes must be represented by a "`<`" then the *tag name
-    string* "`>`", and all the attributes must be given, sorted
-    lexicographically by UTF-16 code unit according to their *attribute
-    name string*, on subsequent lines, as if they were children of the
-    element node.
-   Attribute nodes must have the *attribute name string*, then an "="
-    sign, then the attribute value in double quotes (").
-   Text nodes must be the string, in double quotes. Newlines aren't
-    escaped.
-   Comments must be "`<`" then "`!-- `" then the data then "` -->`".
-   DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
-    system id or public id is non-empty a space, public id in
-    double-quotes, another space an the system id in double-quotes, and
-    then in any case "`>`".
-   Processing instructions must be "`<?`", then the target, then a
-    space, then the data and then "`>`". (The HTML parser cannot emit
-    processing instructions, but scripts can, and the WebVTT to DOM
-    rules can emit them.)
-   Template contents are represented by the string "content" with the
-    children below it.
-
-The *tag name string* is the local name prefixed by a namespace
-designator. For the HTML namespace, the namespace designator is the
-empty string, i.e. there's no prefix. For the SVG namespace, the
-namespace designator is "svg ". For the MathML namespace, the namespace
-designator is "math ".
-
-The *attribute name string* is the local name prefixed by a namespace
-designator. For no namespace, the namespace designator is the empty
-string, i.e. there's no prefix. For the XLink namespace, the namespace
-designator is "xlink ". For the XML namespace, the namespace designator
-is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
-". Note the difference between "xlink:href" which is an attribute in no
-namespace with the local name "xlink:href" and "xlink href" which is an
-attribute in the xlink namespace with the local name "href".
-
-If there is also a "\#document-fragment" the bit following "\#document"
-must be a representation of the HTML fragment serialization for the
-context element given by "\#document-fragment".
-
-For example:
-
-    #data
-    <p>One<p>Two
-    #errors
-    3: Missing document type declaration
-    #document
-    | <html>
-    |   <head>
-    |   <body>
-    |     <p>
-    |       "One"
-    |     <p>
-    |       "Two"
--- a/libs/html5lib/tests/testdata/tree-construction/adoption01.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/adoption01.dat
@ -1,354 +0,0 @@
-#data
-<a><p></a></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,10): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <p>
-|       <a>
-
-#data
-<a>1<p>2</a>3</p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,12): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <p>
-|       <a>
-|         "2"
-|       "3"
-
-#data
-<a>1<button>2</a>3</button>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,17): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <button>
-|       <a>
-|         "2"
-|       "3"
-
-#data
-<a>1<b>2</a>3</b>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,12): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|       <b>
-|         "2"
-|     <b>
-|       "3"
-
-#data
-<a>1<div>2<div>3</a>4</div>5</div>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,20): adoption-agency-1.3
-(1,20): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <div>
-|       <a>
-|         "2"
-|       <div>
-|         <a>
-|           "3"
-|         "4"
-|       "5"
-
-#data
-<table><a>1<p>2</a>3</p>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,11): unexpected-character-implies-table-voodoo
-(1,14): unexpected-start-tag-implies-table-voodoo
-(1,15): unexpected-character-implies-table-voodoo
-(1,19): unexpected-end-tag-implies-table-voodoo
-(1,19): adoption-agency-1.3
-(1,20): unexpected-character-implies-table-voodoo
-(1,24): unexpected-end-tag-implies-table-voodoo
-(1,24): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <p>
-|       <a>
-|         "2"
-|       "3"
-|     <table>
-
-#data
-<b><b><a><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <b>
-|         <a>
-|         <p>
-|           <a>
-
-#data
-<b><a><b><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <a>
-|         <b>
-|       <b>
-|         <p>
-|           <a>
-
-#data
-<a><b><b><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|         <b>
-|     <b>
-|       <b>
-|         <p>
-|           <a>
-
-#data
-<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,30): unexpected-end-tag
-(1,35): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "1"
-|       <s>
-|         id="A"
-|         "2"
-|         <b>
-|           id="B"
-|           "3"
-|     <s>
-|       id="A"
-|       <b>
-|         id="B"
-|         "4"
-|     <b>
-|       id="B"
-|       "5"
-
-#data
-<table><a>1<td>2</td>3</table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,11): unexpected-character-implies-table-voodoo
-(1,15): unexpected-cell-in-table-body
-(1,30): unexpected-implied-end-tag-in-table-view
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <a>
-|       "3"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "2"
-
-#data
-<table>A<td>B</td>C</table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,8): unexpected-character-implies-table-voodoo
-(1,12): unexpected-cell-in-table-body
-(1,22): unexpected-character-implies-table-voodoo
-#document
-| <html>
-|   <head>
-|   <body>
-|     "AC"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "B"
-
-#data
-<a><svg><tr><input></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,23): unexpected-end-tag
-(1,23): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <svg svg>
-|         <svg tr>
-|           <svg input>
-
-#data
-<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <a>
-|         <b>
-|       <b>
-|         <div>
-|           <a>
-|           <div>
-|             <a>
-|             <div>
-|               <a>
-|               <div>
-|                 <a>
-|                 <div>
-|                   <a>
-|                   <div>
-|                     <a>
-|                     <div>
-|                       <a>
-|                       <div>
-|                         <a>
-|                           <div>
-|                             <div>
-
-#data
-<div><a><b><u><i><code><div></a>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,32): adoption-agency-1.3
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <a>
-|         <b>
-|           <u>
-|             <i>
-|               <code>
-|       <u>
-|         <i>
-|           <code>
-|             <div>
-|               <a>
-
-#data
-<b><b><b><b>x</b></b></b></b>y
-#errors
-(1,3): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <b>
-|         <b>
-|           <b>
-|             "x"
-|     "y"
-
-#data
-<p><b><b><b><b><p>x
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag
-(1,19): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         <b>
-|           <b>
-|             <b>
-|     <p>
-|       <b>
-|         <b>
-|           <b>
-|             "x"
-
-#data
-<b><em><foo><foob><fooc><aside></b></em>
-#errors
-(1,35): adoption-agency-1.3
-(1,40): adoption-agency-1.3
-(1,40): expected-closing-tag-but-got-eof
-#document-fragment
-div
-#document
-| <b>
-|   <em>
-|     <foo>
-|       <foob>
-|         <fooc>
-| <aside>
-|   <b>
--- a/libs/html5lib/tests/testdata/tree-construction/adoption02.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/adoption02.dat
@ -1,39 +0,0 @@
-#data
-<b>1<i>2<p>3</b>4
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "1"
-|       <i>
-|         "2"
-|     <i>
-|       <p>
-|         <b>
-|           "3"
-|         "4"
-
-#data
-<a><div><style></style><address><a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,35): unexpected-start-tag-implies-end-tag
-(1,35): adoption-agency-1.3
-(1,35): adoption-agency-1.3
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <div>
-|       <a>
-|         <style>
-|       <address>
-|         <a>
-|         <a>
--- a/libs/html5lib/tests/testdata/tree-construction/comments01.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/comments01.dat
@ -1,178 +0,0 @@
-#data
-FOO<!-- BAR -->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR --!>BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-bang-after-double-dash-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR --   >BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,21): eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR --   >BAZ -->
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-(1,31): unexpected-bang-after-double-dash-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-(1,31): unexpected-char-in-comment
-(1,35): eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
-
-#data
-FOO<!---->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-FOO<!--->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,9): incorrect-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-FOO<!-->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,8): incorrect-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-<?xml version="1.0">Hi
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,22): expected-doctype-but-got-chars
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-|   <head>
-|   <body>
-|     "Hi"
-
-#data
-<?xml version="1.0">
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,20): expected-doctype-but-got-eof
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?xml version
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,13): expected-doctype-but-got-eof
-#document
-| <!-- ?xml version -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-FOO<!----->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,10): unexpected-dash-after-double-dash-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!-- - -->
-|     "BAZ"
-
-#data
-<html><!-- comment --><title>Comment before head</title>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <!--  comment  -->
-|   <head>
-|     <title>
-|       "Comment before head"
-|   <body>
--- a/libs/html5lib/tests/testdata/tree-construction/doctype01.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/doctype01.dat
@ -1,424 +0,0 @@
-#data
-<!DOCTYPE html>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!dOctYpE HtMl>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPEhtml>Hello
-#errors
-(1,9): need-space-after-doctype
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE>Hello
-#errors
-(1,9): need-space-after-doctype
-(1,10): expected-doctype-name-but-got-right-bracket
-(1,10): unknown-doctype
-#document
-| <!DOCTYPE >
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE >Hello
-#errors
-(1,11): expected-doctype-name-but-got-right-bracket
-(1,11): unknown-doctype
-#document
-| <!DOCTYPE >
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato>Hello
-#errors
-(1,17): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato >Hello
-#errors
-(1,18): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato taco>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,22): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato taco "ddd>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,27): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato sYstEM>Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,24): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato sYstEM    >Hello
-#errors
-(1,28): unexpected-char-in-doctype
-(1,28): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE   potato       sYstEM  ggg>Hello
-#errors
-(1,34): unexpected-char-in-doctype
-(1,37): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM taco  >Hello
-#errors
-(1,25): unexpected-char-in-doctype
-(1,31): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM 'taco"'>Hello
-#errors
-(1,32): unknown-doctype
-#document
-| <!DOCTYPE potato "" "taco"">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "taco">Hello
-#errors
-(1,31): unknown-doctype
-#document
-| <!DOCTYPE potato "" "taco">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "tai'co">Hello
-#errors
-(1,33): unknown-doctype
-#document
-| <!DOCTYPE potato "" "tai'co">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEMtaco "ddd">Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,34): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato grass SYSTEM taco>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,35): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc>Hello
-#errors
-(1,24): unexpected-end-of-doctype
-(1,24): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc >Hello
-#errors
-(1,25): unexpected-end-of-doctype
-(1,25): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIcgoof>Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,28): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC goof>Hello
-#errors
-(1,25): unexpected-char-in-doctype
-(1,29): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "go'of">Hello
-#errors
-(1,32): unknown-doctype
-#document
-| <!DOCTYPE potato "go'of" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go'of'>Hello
-#errors
-(1,29): unexpected-char-in-doctype
-(1,32): unknown-doctype
-#document
-| <!DOCTYPE potato "go" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
-#errors
-(1,38): unknown-doctype
-#document
-| <!DOCTYPE potato "go:hh   of" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
-#errors
-(1,38): unexpected-char-in-doctype
-(1,48): unknown-doctype
-#document
-| <!DOCTYPE potato "W3C-//dfdf" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-   "http://www.w3.org/TR/html4/strict.dtd">Hello
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE ...>Hello
-#errors
-(1,14): unknown-doctype
-#document
-| <!DOCTYPE ...>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-#errors
-(2,58): unknown-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-#errors
-(2,54): unknown-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
-<!-- internal declarations -->
-]>
-#errors
-(1,23): expected-space-or-right-bracket-in-doctype
-(2,30): unknown-doctype
-#document
-| <!DOCTYPE root-element>
-| <html>
-|   <head>
-|   <body>
-|     "]>"
-
-#data
-<!DOCTYPE html PUBLIC
-  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
-    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-#errors
-(3,53): unknown-doctype
-#document
-| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
-#errors
-(1,63): unknown-doctype
-#document
-| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "Mine!"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
-#errors
-(1,50): unexpected-char-in-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,50): unexpected-char-in-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,21): unexpected-char-in-doctype
-(1,49): unexpected-char-in-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,21): unexpected-char-in-doctype
-(1,49): unexpected-char-in-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
--- a/libs/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
--- a/libs/html5lib/tests/testdata/tree-construction/entities01.dat
+++ b/libs/html5lib/tests/testdata/tree-construction/entities01.dat
@ -1,795 +0,0 @@
-#data
-FOO&gt;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>BAR"
-
-#data
-FOO&gtBAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): named-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>BAR"
-
-#data
-FOO&gt BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): named-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO> BAR"
-
-#data
-FOO&gt;;;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>;;BAR"
-
-#data
-I'm &notit; I tell you
-#errors
-(1,4): expected-doctype-but-got-chars
-(1,9): named-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "I'm ¬it; I tell you"
-
-#data
-I'm &notin; I tell you
-#errors
-(1,4): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "I'm ∉ I tell you"
-
-#data
-FOO& BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO& BAR"
-
-#data
-FOO&<BAR>
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,9): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&"
-|     <bar>
-
-#data
-FOO&&&&gt;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&&&>BAR"
-
-#data
-FOO&#41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO)BAR"
-
-#data
-FOO&#x41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOABAR"
-
-#data
-FOO&#X41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOABAR"
-
-#data
-FOO&#BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,5): expected-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#BAR"
-
-#data
-FOO&#ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,5): expected-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#ZOO"
-
-#data
-FOO&#xBAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,7): expected-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOºR"
-
-#data
-FOO&#xZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): expected-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#xZOO"
-
-#data
-FOO&#XZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): expected-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#XZOO"
-
-#data
-FOO&#41BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,7): numeric-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO)BAR"
-
-#data
-FOO&#x41BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,10): numeric-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO䆺R"
-
-#data
-FOO&#x41ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,8): numeric-entity-without-semicolon
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOAZOO"
-
-#data
-FOO&#x0000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#x0078;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOxZOO"
-
-#data
-FOO&#x0079;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOyZOO"
-
-#data
-FOO&#x0080;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO€ZOO"
-
-#data
-FOO&#x0081;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x0082;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‚ZOO"
-
-#data
-FOO&#x0083;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOƒZOO"
-
-#data
-FOO&#x0084;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO„ZOO"
-
-#data
-FOO&#x0085;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO…ZOO"
-
-#data
-FOO&#x0086;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO†ZOO"
-
-#data
-FOO&#x0087;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‡ZOO"
-
-#data
-FOO&#x0088;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOˆZOO"
-
-#data
-FOO&#x0089;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‰ZOO"
-
-#data
-FOO&#x008A;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŠZOO"
-
-#data
-FOO&#x008B;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‹ZOO"
-
-#data
-FOO&#x008C;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŒZOO"
-
-#data
-FOO&#x008D;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x008E;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŽZOO"
-
-#data
-FOO&#x008F;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x0090;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x0091;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‘ZOO"
-
-#data
-FOO&#x0092;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO’ZOO"
-
-#data
-FOO&#x0093;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO“ZOO"
-
-#data
-FOO&#x0094;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO”ZOO"
-
-#data
-FOO&#x0095;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO•ZOO"
-
-#data
-FOO&#x0096;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO–ZOO"
-
-#data
-FOO&#x0097;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO—ZOO"
-
-#data
-FOO&#x0098;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO˜ZOO"
-
-#data
-FOO&#x0099;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO™ZOO"
-
-#data
-FOO&#x009A;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOšZOO"
-
-#data
-FOO&#x009B;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO›ZOO"
-
-#data
-FOO&#x009C;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOœZOO"
-
-#data
-FOO&#x009D;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x009E;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOžZOO"
-
-#data
-FOO&#x009F;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŸZOO"
-
-#data
-FOO&#x00A0;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO ZOO"
-
-#data
-FOO&#xD7FF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO퟿ZOO"
-
-#data
-FOO&#xD800;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#xD801;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#xDFFE;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#xDFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#xE000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x10FFFE;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􏿾ZOO"
-
-#data
-FOO&#x1087D4;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􈟔ZOO"
-
-#data
-FOO&#x10FFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􏿿ZOO"
-
-#data
-FOO&#x110000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#xFFFFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#11111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>"
-
-#data
-FOO&#1111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>"
-
-#data
-FOO&#111111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>"
-
-#data
-FOO&#11111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,16): numeric-entity-without-semicolon
-(1,16): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#1111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): numeric-entity-without-semicolon
-(1,15): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
-
-#data
-FOO&#111111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,17): numeric-entity-without-semicolon
-(1,17): illegal-codepoint-for-numeric-entity
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO<4F>ZOO"
--- a/Show More
+++ b/Show More