mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-18 20:05:12 -04:00
Upgraded some embedded dependencies to be ready for Python 3.10. This doesn't mean that it's fully supported right now.
This commit is contained in:
parent
2d214bfbd5
commit
402c82d84f
244 changed files with 8217 additions and 96583 deletions
|
@ -20,8 +20,8 @@ def check_python_version():
|
|||
print("Python " + minimum_py3_str + " or greater required. "
|
||||
"Current version is " + platform.python_version() + ". Please upgrade Python.")
|
||||
sys.exit(1)
|
||||
elif int(python_version[0]) == 3 and int(python_version[1]) == 9:
|
||||
print("Python 3.9.x is unsupported. Current version is " + platform.python_version() +
|
||||
elif int(python_version[0]) == 3 and int(python_version[1]) > 8:
|
||||
print("Python version greater than 3.8.x is unsupported. Current version is " + platform.python_version() +
|
||||
". Keep in mind that even if it works, you're on your own.")
|
||||
elif (int(python_version[0]) == minimum_py3_tuple[0] and int(python_version[1]) < minimum_py3_tuple[1]) or \
|
||||
(int(python_version[0]) != minimum_py3_tuple[0]):
|
||||
|
|
|
@ -4,12 +4,6 @@
|
|||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
__title__ = 'babelfish'
|
||||
__version__ = '0.5.5-dev'
|
||||
__author__ = 'Antoine Bertin'
|
||||
__license__ = 'BSD'
|
||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
|
|
|
@ -2,17 +2,22 @@
|
|||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
import collections
|
||||
from pkg_resources import iter_entry_points, EntryPoint
|
||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||
|
||||
try:
|
||||
# Python 3.3+
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
except ImportError:
|
||||
from collections import Mapping, MutableMapping
|
||||
|
||||
|
||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||
class CaseInsensitiveDict(collections.MutableMapping):
|
||||
class CaseInsensitiveDict(MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||
``collections.abc.MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
|
@ -63,7 +68,7 @@ class CaseInsensitiveDict(collections.MutableMapping):
|
|||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, collections.Mapping):
|
||||
if isinstance(other, Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
|
|
|
@ -14,9 +14,9 @@ class OpenSubtitlesConverter(LanguageReverseConverter):
|
|||
def __init__(self):
|
||||
self.alpha3b_converter = language_converters['alpha3b']
|
||||
self.alpha2_converter = language_converters['alpha2']
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne', ('chi', 'TW'): 'zht'}
|
||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME'), 'zht': ('zho', 'TW')})
|
||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import os.path
|
||||
import tempfile
|
||||
import zipfile
|
||||
import requests
|
||||
|
||||
|
||||
DATA_DIR = os.path.dirname(__file__)
|
||||
|
||||
# iso-3166-1.txt
|
||||
print('Downloading ISO-3166-1 standard (ISO country codes)...')
|
||||
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
|
||||
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
|
||||
f.write(r.content.strip())
|
||||
|
||||
# iso-639-3.tab
|
||||
print('Downloading ISO-639-3 standard (ISO language codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso-639-3.tab', DATA_DIR)
|
||||
|
||||
# iso-15924
|
||||
print('Downloading ISO-15924 standard (ISO script codes)...')
|
||||
with tempfile.TemporaryFile() as f:
|
||||
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
|
||||
f.write(r.content)
|
||||
with zipfile.ZipFile(f) as z:
|
||||
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
|
||||
|
||||
# opensubtitles supported languages
|
||||
print('Downloading OpenSubtitles supported languages...')
|
||||
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
|
||||
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
|
||||
f.write(r.content)
|
||||
|
||||
print('Done!')
|
|
@ -4,6 +4,7 @@
|
|||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
|
|
@ -1,377 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import sys
|
||||
import pickle
|
||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
||||
|
||||
|
||||
if sys.version_info[:2] <= (2, 6):
|
||||
_MAX_LENGTH = 80
|
||||
|
||||
def safe_repr(obj, short=False):
|
||||
try:
|
||||
result = repr(obj)
|
||||
except Exception:
|
||||
result = object.__repr__(obj)
|
||||
if not short or len(result) < _MAX_LENGTH:
|
||||
return result
|
||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
||||
|
||||
class _AssertRaisesContext(object):
|
||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
||||
|
||||
def __init__(self, expected, test_case, expected_regexp=None):
|
||||
self.expected = expected
|
||||
self.failureException = test_case.failureException
|
||||
self.expected_regexp = expected_regexp
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_type is None:
|
||||
try:
|
||||
exc_name = self.expected.__name__
|
||||
except AttributeError:
|
||||
exc_name = str(self.expected)
|
||||
raise self.failureException(
|
||||
"{0} not raised".format(exc_name))
|
||||
if not issubclass(exc_type, self.expected):
|
||||
# let unexpected exceptions pass through
|
||||
return False
|
||||
self.exception = exc_value # store for later retrieval
|
||||
if self.expected_regexp is None:
|
||||
return True
|
||||
|
||||
expected_regexp = self.expected_regexp
|
||||
if isinstance(expected_regexp, basestring):
|
||||
expected_regexp = re.compile(expected_regexp)
|
||||
if not expected_regexp.search(str(exc_value)):
|
||||
raise self.failureException('"%s" does not match "%s"' %
|
||||
(expected_regexp.pattern, str(exc_value)))
|
||||
return True
|
||||
|
||||
class _Py26FixTestCase(object):
|
||||
def assertIsNone(self, obj, msg=None):
|
||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
||||
if obj is not None:
|
||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNotNone(self, obj, msg=None):
|
||||
"""Included for symmetry with assertIsNone."""
|
||||
if obj is None:
|
||||
standardMsg = 'unexpectedly None'
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
||||
if member not in container:
|
||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertNotIn(self, member, container, msg=None):
|
||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
||||
if member in container:
|
||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
||||
safe_repr(container))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIs(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
||||
if expr1 is not expr2:
|
||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
||||
safe_repr(expr2))
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
def assertIsNot(self, expr1, expr2, msg=None):
|
||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
||||
if expr1 is expr2:
|
||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
||||
self.fail(self._formatMessage(msg, standardMsg))
|
||||
|
||||
else:
|
||||
class _Py26FixTestCase(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestScript(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
||||
|
||||
def test_pickle(self):
|
||||
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
|
||||
|
||||
|
||||
class TestCountry(TestCase, _Py26FixTestCase):
|
||||
def test_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Country('US'), Country('US'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Country('GB'), Country('US'))
|
||||
self.assertIsNotNone(Country('US'))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Country('US')), hash('US'))
|
||||
|
||||
def test_pickle(self):
|
||||
for country in [Country('GB'), Country('US')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
||||
|
||||
|
||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
||||
def test_languages(self):
|
||||
self.assertEqual(len(LANGUAGES), 7874)
|
||||
|
||||
def test_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
||||
|
||||
def test_unknown_language(self):
|
||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
||||
|
||||
def test_converter_alpha2(self):
|
||||
self.assertEqual(Language('eng').alpha2, 'en')
|
||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
||||
|
||||
def test_converter_alpha3b(self):
|
||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
||||
|
||||
def test_converter_alpha3t(self):
|
||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
||||
|
||||
def test_converter_name(self):
|
||||
self.assertEqual(Language('eng').name, 'English')
|
||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
||||
|
||||
def test_converter_scope(self):
|
||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
||||
self.assertEqual(Language('eng').scope, 'individual')
|
||||
self.assertEqual(Language('und').scope, 'special')
|
||||
|
||||
def test_converter_type(self):
|
||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
||||
self.assertEqual(Language('eng').type, 'living')
|
||||
self.assertEqual(Language('und').type, 'special')
|
||||
|
||||
def test_converter_opensubtitles(self):
|
||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
||||
# unofficially accepted as Serbian from Montenegro
|
||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
|
||||
|
||||
# test with all the LANGUAGES from the opensubtitles api
|
||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
||||
f.readline()
|
||||
for l in f:
|
||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
||||
if not int(upload_enabled) and not int(web_enabled):
|
||||
# do not test LANGUAGES that are too esoteric / not widely available
|
||||
continue
|
||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
||||
if alpha2:
|
||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
||||
f.close()
|
||||
|
||||
def test_converter_opensubtitles_codes(self):
|
||||
for code in language_converters['opensubtitles'].from_opensubtitles.keys():
|
||||
self.assertIn(code, language_converters['opensubtitles'].codes)
|
||||
|
||||
def test_fromietf_country_script(self):
|
||||
language = Language.fromietf('fra-FR-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_no_script(self):
|
||||
language = Language.fromietf('fra-FR')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertEqual(language.country, Country('FR'))
|
||||
self.assertIsNone(language.script)
|
||||
|
||||
def test_fromietf_no_country_script(self):
|
||||
language = Language.fromietf('fra-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_alpha2_language(self):
|
||||
language = Language.fromietf('fr-Latn')
|
||||
self.assertEqual(language.alpha3, 'fra')
|
||||
self.assertIsNone(language.country)
|
||||
self.assertEqual(language.script, Script('Latn'))
|
||||
|
||||
def test_fromietf_wrong_language(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
||||
|
||||
def test_fromietf_wrong_country(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
||||
|
||||
def test_fromietf_wrong_script(self):
|
||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
||||
|
||||
def test_eq(self):
|
||||
self.assertEqual(Language('eng'), Language('eng'))
|
||||
|
||||
def test_ne(self):
|
||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
||||
self.assertIsNotNone(Language('fra'))
|
||||
|
||||
def test_nonzero(self):
|
||||
self.assertFalse(bool(Language('und')))
|
||||
self.assertTrue(bool(Language('eng')))
|
||||
|
||||
def test_language_hasattr(self):
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
||||
|
||||
def test_country_hasattr(self):
|
||||
self.assertTrue(hasattr(Country('US'), 'name'))
|
||||
self.assertTrue(hasattr(Country('FR'), 'alpha2'))
|
||||
self.assertFalse(hasattr(Country('BE'), 'none'))
|
||||
|
||||
def test_country(self):
|
||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
||||
|
||||
def test_eq_with_country(self):
|
||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
||||
|
||||
def test_ne_with_country(self):
|
||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
||||
|
||||
def test_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
||||
|
||||
def test_eq_with_script(self):
|
||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
||||
|
||||
def test_ne_with_script(self):
|
||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
||||
|
||||
def test_eq_with_country_and_script(self):
|
||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
||||
|
||||
def test_ne_with_country_and_script(self):
|
||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
||||
|
||||
def test_hash(self):
|
||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
||||
|
||||
def test_pickle(self):
|
||||
for lang in [Language('fra'),
|
||||
Language('eng', 'US'),
|
||||
Language('srp', script='Latn'),
|
||||
Language('eng', 'US', 'Latn')]:
|
||||
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
|
||||
|
||||
def test_str(self):
|
||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
||||
|
||||
def test_register_converter(self):
|
||||
class TestConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if alpha3 not in self.to_test:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
return self.to_test[alpha3]
|
||||
|
||||
def reverse(self, test):
|
||||
if test not in self.from_test:
|
||||
raise LanguageReverseError(test)
|
||||
return (self.from_test[test], None)
|
||||
language = Language('fra')
|
||||
self.assertFalse(hasattr(language, 'test'))
|
||||
language_converters['test'] = TestConverter()
|
||||
self.assertTrue(hasattr(language, 'test'))
|
||||
self.assertIn('test', language_converters)
|
||||
self.assertEqual(Language('fra').test, 'test1')
|
||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
||||
del language_converters['test']
|
||||
self.assertNotIn('test', language_converters)
|
||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
||||
|
||||
|
||||
def suite():
|
||||
suite = TestSuite()
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
TextTestRunner().run(suite())
|
|
@ -11,10 +11,12 @@ from werkzeug.wrappers import Response as ResponseBase
|
|||
from flask_restful.utils import http_status_message, unpack, OrderedDict
|
||||
from flask_restful.representations.json import output_json
|
||||
import sys
|
||||
from flask.helpers import _endpoint_from_view_func
|
||||
from types import MethodType
|
||||
import operator
|
||||
from collections import Mapping
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
|
||||
|
||||
__all__ = ('Api', 'Resource', 'marshal', 'marshal_with', 'marshal_with_field', 'abort')
|
||||
|
@ -58,7 +60,7 @@ class Api(object):
|
|||
to handle 404 errors throughout your app
|
||||
:param serve_challenge_on_401: Whether to serve a challenge response to
|
||||
clients on receiving 401. This usually leads to a username/password
|
||||
popup in web browers.
|
||||
popup in web browsers.
|
||||
:param url_part_order: A string that controls the order that the pieces
|
||||
of the url are concatenated when the full url is constructed. 'b'
|
||||
is the blueprint (or blueprint registration) prefix, 'a' is the api
|
||||
|
@ -153,7 +155,7 @@ class Api(object):
|
|||
rule = blueprint_setup.url_prefix + rule
|
||||
options.setdefault('subdomain', blueprint_setup.subdomain)
|
||||
if endpoint is None:
|
||||
endpoint = _endpoint_from_view_func(view_func)
|
||||
endpoint = view_func.__name__
|
||||
defaults = blueprint_setup.url_defaults
|
||||
if 'defaults' in options:
|
||||
defaults = dict(defaults, **options.pop('defaults'))
|
||||
|
@ -287,6 +289,13 @@ class Api(object):
|
|||
|
||||
headers = Headers()
|
||||
if isinstance(e, HTTPException):
|
||||
if e.response is not None:
|
||||
# If HTTPException is initialized with a response, then return e.get_response().
|
||||
# This prevents specified error response from being overridden.
|
||||
# eg. HTTPException(response=Response("Hello World"))
|
||||
resp = e.get_response()
|
||||
return resp
|
||||
|
||||
code = e.code
|
||||
default_data = {
|
||||
'message': getattr(e, 'description', http_status_message(code))
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
__version__ = '0.3.7'
|
||||
__version__ = '0.3.9'
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
from datetime import datetime
|
||||
from calendar import timegm
|
||||
import pytz
|
||||
from decimal import Decimal as MyDecimal, ROUND_HALF_EVEN
|
||||
from email.utils import formatdate
|
||||
import six
|
||||
|
@ -9,8 +7,7 @@ try:
|
|||
except ImportError:
|
||||
# python3
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from flask_restful import inputs, marshal
|
||||
from flask_restful import marshal
|
||||
from flask import url_for, request
|
||||
|
||||
__all__ = ["String", "FormattedString", "Url", "DateTime", "Float",
|
||||
|
|
|
@ -269,7 +269,7 @@ def datetime_from_rfc822(datetime_str):
|
|||
|
||||
|
||||
def datetime_from_iso8601(datetime_str):
|
||||
"""Turns an ISO8601 formatted date into a datetime object.
|
||||
"""Turns an ISO8601 formatted datetime into a datetime object.
|
||||
|
||||
Example::
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from copy import deepcopy
|
||||
|
||||
import collections
|
||||
try:
|
||||
from collections.abc import MutableSequence
|
||||
except ImportError:
|
||||
from collections import MutableSequence
|
||||
from flask import current_app, request
|
||||
from werkzeug.datastructures import MultiDict, FileStorage
|
||||
from werkzeug import exceptions
|
||||
|
@ -146,7 +149,7 @@ class Argument(object):
|
|||
except TypeError:
|
||||
try:
|
||||
if self.type is decimal.Decimal:
|
||||
return self.type(str(value), self.name)
|
||||
return self.type(str(value))
|
||||
else:
|
||||
return self.type(value, self.name)
|
||||
except TypeError:
|
||||
|
@ -194,7 +197,7 @@ class Argument(object):
|
|||
values = source.getlist(name)
|
||||
else:
|
||||
values = source.get(name)
|
||||
if not (isinstance(values, collections.MutableSequence) and self.action == 'append'):
|
||||
if not (isinstance(values, MutableSequence) and self.action == 'append'):
|
||||
values = [values]
|
||||
|
||||
for value in values:
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import sys
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
from collections.abc import OrderedDict
|
||||
except ImportError:
|
||||
from ordereddict import OrderedDict
|
||||
from collections import OrderedDict
|
||||
|
||||
from werkzeug.http import HTTP_STATUS_CODES
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ See: http://python-future.org
|
|||
Credits
|
||||
-------
|
||||
|
||||
:Author: Ed Schofield
|
||||
:Author: Ed Schofield, Jordan M. Adler, et al
|
||||
:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
|
||||
Ltd, Singapore. http://pythoncharmers.com
|
||||
:Others: See docs/credits.rst or http://python-future.org/credits.html
|
||||
|
@ -76,7 +76,7 @@ Credits
|
|||
|
||||
Licensing
|
||||
---------
|
||||
Copyright 2013-2018 Python Charmers Pty Ltd, Australia.
|
||||
Copyright 2013-2019 Python Charmers Pty Ltd, Australia.
|
||||
The software is distributed under an MIT licence. See LICENSE.txt.
|
||||
|
||||
"""
|
||||
|
@ -84,10 +84,10 @@ The software is distributed under an MIT licence. See LICENSE.txt.
|
|||
__title__ = 'future'
|
||||
__author__ = 'Ed Schofield'
|
||||
__license__ = 'MIT'
|
||||
__copyright__ = 'Copyright 2013-2018 Python Charmers Pty Ltd'
|
||||
__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd'
|
||||
__ver_major__ = 0
|
||||
__ver_minor__ = 17
|
||||
__ver_patch__ = 0
|
||||
__ver_minor__ = 18
|
||||
__ver_patch__ = 2
|
||||
__ver_sub__ = ''
|
||||
__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
|
||||
__ver_patch__, __ver_sub__)
|
||||
|
|
|
@ -10,7 +10,7 @@ __future_module__ = True
|
|||
from future.standard_library import import_top_level_modules
|
||||
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
if sys.version_info[0] >= 3:
|
||||
import_top_level_modules()
|
||||
|
||||
|
||||
|
|
|
@ -800,7 +800,7 @@ class Message(object):
|
|||
# There was no Content-Type header, and we don't know what type
|
||||
# to set it to, so raise an exception.
|
||||
raise errors.HeaderParseError('No Content-Type header found')
|
||||
newparams = []
|
||||
newparams = list()
|
||||
foundp = False
|
||||
for pk, pv in params:
|
||||
if pk.lower() == 'boundary':
|
||||
|
@ -814,10 +814,10 @@ class Message(object):
|
|||
# instead???
|
||||
newparams.append(('boundary', '"%s"' % boundary))
|
||||
# Replace the existing Content-Type header with the new value
|
||||
newheaders = []
|
||||
newheaders = list()
|
||||
for h, v in self._headers:
|
||||
if h.lower() == 'content-type':
|
||||
parts = []
|
||||
parts = list()
|
||||
for k, v in newparams:
|
||||
if v == '':
|
||||
parts.append(k)
|
||||
|
|
|
@ -79,11 +79,15 @@ from future.backports.misc import create_connection as socket_create_connection
|
|||
import io
|
||||
import os
|
||||
import socket
|
||||
import collections
|
||||
from future.backports.urllib.parse import urlsplit
|
||||
import warnings
|
||||
from array import array
|
||||
|
||||
if PY2:
|
||||
from collections import Iterable
|
||||
else:
|
||||
from collections.abc import Iterable
|
||||
|
||||
__all__ = ["HTTPResponse", "HTTPConnection",
|
||||
"HTTPException", "NotConnected", "UnknownProtocol",
|
||||
"UnknownTransferEncoding", "UnimplementedFileMode",
|
||||
|
@ -696,9 +700,19 @@ class HTTPResponse(io.RawIOBase):
|
|||
while total_bytes < len(b):
|
||||
if MAXAMOUNT < len(mvb):
|
||||
temp_mvb = mvb[0:MAXAMOUNT]
|
||||
n = self.fp.readinto(temp_mvb)
|
||||
if PY2:
|
||||
data = self.fp.read(len(temp_mvb))
|
||||
n = len(data)
|
||||
temp_mvb[:n] = data
|
||||
else:
|
||||
n = self.fp.readinto(temp_mvb)
|
||||
else:
|
||||
n = self.fp.readinto(mvb)
|
||||
if PY2:
|
||||
data = self.fp.read(len(mvb))
|
||||
n = len(data)
|
||||
mvb[:n] = data
|
||||
else:
|
||||
n = self.fp.readinto(mvb)
|
||||
if not n:
|
||||
raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
|
||||
mvb = mvb[n:]
|
||||
|
@ -892,7 +906,7 @@ class HTTPConnection(object):
|
|||
try:
|
||||
self.sock.sendall(data)
|
||||
except TypeError:
|
||||
if isinstance(data, collections.Iterable):
|
||||
if isinstance(data, Iterable):
|
||||
for d in data:
|
||||
self.sock.sendall(d)
|
||||
else:
|
||||
|
|
|
@ -33,7 +33,7 @@ from __future__ import print_function
|
|||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
from future.builtins import filter, int, map, open, str
|
||||
from future.utils import as_native_str
|
||||
from future.utils import as_native_str, PY2
|
||||
|
||||
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
|
||||
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
|
||||
|
@ -41,7 +41,8 @@ __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
|
|||
import copy
|
||||
import datetime
|
||||
import re
|
||||
re.ASCII = 0
|
||||
if PY2:
|
||||
re.ASCII = 0
|
||||
import time
|
||||
from future.backports.urllib.parse import urlparse, urlsplit, quote
|
||||
from future.backports.http.client import HTTP_PORT
|
||||
|
|
|
@ -138,7 +138,8 @@ from future.utils import PY2, as_native_str
|
|||
# Import our required modules
|
||||
#
|
||||
import re
|
||||
re.ASCII = 0 # for py2 compatibility
|
||||
if PY2:
|
||||
re.ASCII = 0 # for py2 compatibility
|
||||
import string
|
||||
|
||||
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
|
||||
|
|
|
@ -16,7 +16,6 @@ from __future__ import absolute_import
|
|||
|
||||
import subprocess
|
||||
from math import ceil as oldceil
|
||||
from collections import Mapping, MutableMapping
|
||||
|
||||
from operator import itemgetter as _itemgetter, eq as _eq
|
||||
import sys
|
||||
|
@ -25,7 +24,12 @@ from _weakref import proxy as _proxy
|
|||
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
|
||||
from socket import getaddrinfo, SOCK_STREAM, error, socket
|
||||
|
||||
from future.utils import iteritems, itervalues, PY26, PY3
|
||||
from future.utils import iteritems, itervalues, PY2, PY26, PY3
|
||||
|
||||
if PY2:
|
||||
from collections import Mapping, MutableMapping
|
||||
else:
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
|
||||
|
||||
def ceil(x):
|
||||
|
|
0
libs/future/backports/test/pystone.py
Normal file → Executable file
0
libs/future/backports/test/pystone.py
Normal file → Executable file
|
@ -109,11 +109,17 @@ import re
|
|||
import socket
|
||||
import sys
|
||||
import time
|
||||
import collections
|
||||
import tempfile
|
||||
import contextlib
|
||||
import warnings
|
||||
|
||||
from future.utils import PY2
|
||||
|
||||
if PY2:
|
||||
from collections import Iterable
|
||||
else:
|
||||
from collections.abc import Iterable
|
||||
|
||||
# check for SSL
|
||||
try:
|
||||
import ssl
|
||||
|
@ -1221,7 +1227,7 @@ class AbstractHTTPHandler(BaseHandler):
|
|||
mv = memoryview(data)
|
||||
size = len(mv) * mv.itemsize
|
||||
except TypeError:
|
||||
if isinstance(data, collections.Iterable):
|
||||
if isinstance(data, Iterable):
|
||||
raise ValueError("Content-Length should be specified "
|
||||
"for iterable data of type %r %r" % (type(data),
|
||||
data))
|
||||
|
|
|
@ -11,7 +11,7 @@ from future.builtins.iterators import (filter, map, zip)
|
|||
# The isinstance import is no longer needed. We provide it only for
|
||||
# backward-compatibility with future v0.8.2. It will be removed in future v1.0.
|
||||
from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
|
||||
oct, open, pow, round, super)
|
||||
oct, open, pow, round, super, max, min)
|
||||
from future.utils import PY3
|
||||
|
||||
if PY3:
|
||||
|
@ -43,7 +43,7 @@ if not utils.PY3:
|
|||
__all__ = ['filter', 'map', 'zip',
|
||||
'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
|
||||
'round', 'super',
|
||||
'bytes', 'dict', 'int', 'list', 'object', 'range', 'str',
|
||||
'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min'
|
||||
]
|
||||
|
||||
else:
|
||||
|
|
|
@ -13,6 +13,8 @@ The builtin functions are:
|
|||
- ``open`` (equivalent to io.open on Py2)
|
||||
- ``super`` (backport of Py3's magic zero-argument super() function
|
||||
- ``round`` (new "Banker's Rounding" behaviour from Py3)
|
||||
- ``max`` (new default option from Py3.4)
|
||||
- ``min`` (new default option from Py3.4)
|
||||
|
||||
``isinstance`` is also currently exported for backwards compatibility
|
||||
with v0.8.2, although this has been deprecated since v0.9.
|
||||
|
@ -59,6 +61,8 @@ if utils.PY2:
|
|||
from future.builtins.newnext import newnext as next
|
||||
from future.builtins.newround import newround as round
|
||||
from future.builtins.newsuper import newsuper as super
|
||||
from future.builtins.new_min_max import newmax as max
|
||||
from future.builtins.new_min_max import newmin as min
|
||||
from future.types.newint import newint
|
||||
|
||||
_SENTINEL = object()
|
||||
|
@ -89,11 +93,12 @@ if utils.PY2:
|
|||
else:
|
||||
return _builtin_pow(x+0j, y, z)
|
||||
|
||||
|
||||
# ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
|
||||
# callable = __builtin__.callable
|
||||
|
||||
__all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
|
||||
'open', 'pow', 'round', 'super']
|
||||
'open', 'pow', 'round', 'super', 'max', 'min']
|
||||
|
||||
else:
|
||||
import builtins
|
||||
|
@ -109,8 +114,14 @@ else:
|
|||
pow = builtins.pow
|
||||
round = builtins.round
|
||||
super = builtins.super
|
||||
|
||||
__all__ = []
|
||||
if utils.PY34_PLUS:
|
||||
max = builtins.max
|
||||
min = builtins.min
|
||||
__all__ = []
|
||||
else:
|
||||
from future.builtins.new_min_max import newmax as max
|
||||
from future.builtins.new_min_max import newmin as min
|
||||
__all__ = ['min', 'max']
|
||||
|
||||
# The callable() function was removed from Py3.0 and 3.1 and
|
||||
# reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
|
||||
|
|
59
libs/future/builtins/new_min_max.py
Normal file
59
libs/future/builtins/new_min_max.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import itertools
|
||||
|
||||
from future import utils
|
||||
if utils.PY2:
|
||||
from __builtin__ import max as _builtin_max, min as _builtin_min
|
||||
else:
|
||||
from builtins import max as _builtin_max, min as _builtin_min
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
def newmin(*args, **kwargs):
|
||||
return new_min_max(_builtin_min, *args, **kwargs)
|
||||
|
||||
|
||||
def newmax(*args, **kwargs):
|
||||
return new_min_max(_builtin_max, *args, **kwargs)
|
||||
|
||||
|
||||
def new_min_max(_builtin_func, *args, **kwargs):
|
||||
"""
|
||||
To support the argument "default" introduced in python 3.4 for min and max
|
||||
:param _builtin_func: builtin min or builtin max
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return: returns the min or max based on the arguments passed
|
||||
"""
|
||||
|
||||
for key, _ in kwargs.items():
|
||||
if key not in set(['key', 'default']):
|
||||
raise TypeError('Illegal argument %s', key)
|
||||
|
||||
if len(args) == 0:
|
||||
raise TypeError
|
||||
|
||||
if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL:
|
||||
raise TypeError
|
||||
|
||||
if len(args) == 1:
|
||||
iterator = iter(args[0])
|
||||
try:
|
||||
first = next(iterator)
|
||||
except StopIteration:
|
||||
if kwargs.get('default', _SENTINEL) is not _SENTINEL:
|
||||
return kwargs.get('default')
|
||||
else:
|
||||
raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__))
|
||||
else:
|
||||
iterator = itertools.chain([first], iterator)
|
||||
if kwargs.get('key') is not None:
|
||||
return _builtin_func(iterator, key=kwargs.get('key'))
|
||||
else:
|
||||
return _builtin_func(iterator)
|
||||
|
||||
if len(args) > 1:
|
||||
if kwargs.get('key') is not None:
|
||||
return _builtin_func(args, key=kwargs.get('key'))
|
||||
else:
|
||||
return _builtin_func(args)
|
|
@ -38,11 +38,14 @@ def newround(number, ndigits=None):
|
|||
if 'numpy' in repr(type(number)):
|
||||
number = float(number)
|
||||
|
||||
if not PY26:
|
||||
d = Decimal.from_float(number).quantize(exponent,
|
||||
rounding=ROUND_HALF_EVEN)
|
||||
if isinstance(number, Decimal):
|
||||
d = number
|
||||
else:
|
||||
d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
|
||||
if not PY26:
|
||||
d = Decimal.from_float(number).quantize(exponent,
|
||||
rounding=ROUND_HALF_EVEN)
|
||||
else:
|
||||
d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
|
||||
|
||||
if return_int:
|
||||
return int(d)
|
||||
|
|
|
@ -4,5 +4,5 @@ import sys
|
|||
__future_module__ = True
|
||||
from future.standard_library import import_top_level_modules
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
if sys.version_info[0] >= 3:
|
||||
import_top_level_modules()
|
||||
|
|
|
@ -2,7 +2,11 @@ from __future__ import absolute_import
|
|||
from future.utils import PY3
|
||||
|
||||
if PY3:
|
||||
from copyreg import *
|
||||
import copyreg, sys
|
||||
# A "*" import uses Python 3's copyreg.__all__ which does not include
|
||||
# all public names in the API surface for copyreg, this avoids that
|
||||
# problem by just making our module _be_ a reference to the actual module.
|
||||
sys.modules['future.moves.copyreg'] = copyreg
|
||||
else:
|
||||
__future_module__ = True
|
||||
from copy_reg import *
|
||||
|
|
|
@ -11,19 +11,8 @@ if PY3:
|
|||
proxy_bypass,
|
||||
quote,
|
||||
request_host,
|
||||
splitattr,
|
||||
splithost,
|
||||
splitpasswd,
|
||||
splitport,
|
||||
splitquery,
|
||||
splittag,
|
||||
splittype,
|
||||
splituser,
|
||||
splitvalue,
|
||||
thishost,
|
||||
to_bytes,
|
||||
unquote,
|
||||
unwrap,
|
||||
url2pathname,
|
||||
urlcleanup,
|
||||
urljoin,
|
||||
|
@ -32,6 +21,18 @@ if PY3:
|
|||
urlretrieve,
|
||||
urlsplit,
|
||||
urlunparse)
|
||||
|
||||
from urllib.parse import (splitattr,
|
||||
splithost,
|
||||
splitpasswd,
|
||||
splitport,
|
||||
splitquery,
|
||||
splittag,
|
||||
splittype,
|
||||
splituser,
|
||||
splitvalue,
|
||||
to_bytes,
|
||||
unwrap)
|
||||
else:
|
||||
__future_module__ = True
|
||||
with suspend_hooks():
|
||||
|
|
|
@ -272,7 +272,11 @@ class CodeHandler(unittest.TestCase):
|
|||
else:
|
||||
headers = ''
|
||||
|
||||
self.compare(output, headers + reformat_code(expected),
|
||||
reformatted = reformat_code(expected)
|
||||
if headers in reformatted:
|
||||
headers = ''
|
||||
|
||||
self.compare(output, headers + reformatted,
|
||||
ignore_imports=ignore_imports)
|
||||
|
||||
def unchanged(self, code, **kwargs):
|
||||
|
@ -338,6 +342,10 @@ class CodeHandler(unittest.TestCase):
|
|||
'----\n%s\n----' % f.read(),
|
||||
)
|
||||
ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
|
||||
|
||||
if not hasattr(e, 'output'):
|
||||
# The attribute CalledProcessError.output doesn't exist on Py2.6
|
||||
e.output = None
|
||||
raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
|
||||
return output
|
||||
|
||||
|
|
|
@ -5,15 +5,19 @@ Why do this? Without it, the Python 2 bytes object is a very, very
|
|||
different beast to the Python 3 bytes object.
|
||||
"""
|
||||
|
||||
from collections import Iterable
|
||||
from numbers import Integral
|
||||
import string
|
||||
import copy
|
||||
|
||||
from future.utils import istext, isbytes, PY3, with_metaclass
|
||||
from future.utils import istext, isbytes, PY2, PY3, with_metaclass
|
||||
from future.types import no, issubset
|
||||
from future.types.newobject import newobject
|
||||
|
||||
if PY2:
|
||||
from collections import Iterable
|
||||
else:
|
||||
from collections.abc import Iterable
|
||||
|
||||
|
||||
_builtin_bytes = bytes
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ They are very similar. The most notable difference is:
|
|||
from __future__ import division
|
||||
|
||||
import struct
|
||||
import collections
|
||||
|
||||
from future.types.newbytes import newbytes
|
||||
from future.types.newobject import newobject
|
||||
|
@ -17,6 +16,9 @@ from future.utils import PY3, isint, istext, isbytes, with_metaclass, native
|
|||
|
||||
if PY3:
|
||||
long = int
|
||||
from collections.abc import Iterable
|
||||
else:
|
||||
from collections import Iterable
|
||||
|
||||
|
||||
class BaseNewInt(type):
|
||||
|
@ -356,7 +358,7 @@ class newint(with_metaclass(BaseNewInt, long)):
|
|||
raise TypeError("cannot convert unicode objects to bytes")
|
||||
# mybytes can also be passed as a sequence of integers on Py3.
|
||||
# Test for this:
|
||||
elif isinstance(mybytes, collections.Iterable):
|
||||
elif isinstance(mybytes, Iterable):
|
||||
mybytes = newbytes(mybytes)
|
||||
b = mybytes if byteorder == 'big' else mybytes[::-1]
|
||||
if len(b) == 0:
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
"""
|
||||
A pretty lame implementation of a memoryview object for Python 2.6.
|
||||
"""
|
||||
|
||||
from collections import Iterable
|
||||
from numbers import Integral
|
||||
import string
|
||||
|
||||
from future.utils import istext, isbytes, PY3, with_metaclass
|
||||
from future.utils import istext, isbytes, PY2, with_metaclass
|
||||
from future.types import no, issubset
|
||||
|
||||
if PY2:
|
||||
from collections import Iterable
|
||||
else:
|
||||
from collections.abc import Iterable
|
||||
|
||||
# class BaseNewBytes(type):
|
||||
# def __instancecheck__(cls, instance):
|
||||
|
|
|
@ -112,5 +112,6 @@ class newobject(object):
|
|||
"""
|
||||
return object(self)
|
||||
|
||||
__slots__ = []
|
||||
|
||||
__all__ = ['newobject']
|
||||
|
|
|
@ -19,7 +19,12 @@ From Dan Crosta's README:
|
|||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from collections import Sequence, Iterator
|
||||
from future.utils import PY2
|
||||
|
||||
if PY2:
|
||||
from collections import Sequence, Iterator
|
||||
else:
|
||||
from collections.abc import Sequence, Iterator
|
||||
from itertools import islice
|
||||
|
||||
from future.backports.misc import count # with step parameter on Py2.6
|
||||
|
|
|
@ -40,7 +40,6 @@ representations of your objects portably across Py3 and Py2, use the
|
|||
|
||||
"""
|
||||
|
||||
from collections import Iterable
|
||||
from numbers import Number
|
||||
|
||||
from future.utils import PY3, istext, with_metaclass, isnewbytes
|
||||
|
@ -51,6 +50,9 @@ from future.types.newobject import newobject
|
|||
if PY3:
|
||||
# We'll probably never use newstr on Py3 anyway...
|
||||
unicode = str
|
||||
from collections.abc import Iterable
|
||||
else:
|
||||
from collections import Iterable
|
||||
|
||||
|
||||
class BaseNewStr(type):
|
||||
|
@ -105,6 +107,7 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
|
|||
"""
|
||||
Without the u prefix
|
||||
"""
|
||||
|
||||
value = super(newstr, self).__repr__()
|
||||
# assert value[0] == u'u'
|
||||
return value[1:]
|
||||
|
@ -290,7 +293,14 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
|
|||
isinstance(other, bytes) and not isnewbytes(other)):
|
||||
return super(newstr, self).__eq__(other)
|
||||
else:
|
||||
return False
|
||||
return NotImplemented
|
||||
|
||||
def __hash__(self):
|
||||
if (isinstance(self, unicode) or
|
||||
isinstance(self, bytes) and not isnewbytes(self)):
|
||||
return super(newstr, self).__hash__()
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
def __ne__(self, other):
|
||||
if (isinstance(other, unicode) or
|
||||
|
|
|
@ -18,8 +18,10 @@ This module exports useful functions for 2/3 compatible code:
|
|||
* types:
|
||||
|
||||
* text_type: unicode in Python 2, str in Python 3
|
||||
* binary_type: str in Python 2, bytes in Python 3
|
||||
* string_types: basestring in Python 2, str in Python 3
|
||||
* binary_type: str in Python 2, bytes in Python 3
|
||||
* integer_types: (int, long) in Python 2, int in Python 3
|
||||
* class_types: (type, types.ClassType) in Python 2, type in Python 3
|
||||
|
||||
* bchr(c):
|
||||
Take an integer and make a 1-character byte string
|
||||
|
@ -55,7 +57,8 @@ import copy
|
|||
import inspect
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
PY3 = sys.version_info[0] >= 3
|
||||
PY34_PLUS = sys.version_info[0:2] >= (3, 4)
|
||||
PY35_PLUS = sys.version_info[0:2] >= (3, 5)
|
||||
PY36_PLUS = sys.version_info[0:2] >= (3, 6)
|
||||
PY2 = sys.version_info[0] == 2
|
||||
|
@ -405,12 +408,34 @@ if PY3:
|
|||
allows re-raising exceptions with the cls value and traceback on
|
||||
Python 2 and 3.
|
||||
"""
|
||||
if value is not None and isinstance(tp, Exception):
|
||||
raise TypeError("instance exception may not have a separate value")
|
||||
if value is not None:
|
||||
exc = tp(value)
|
||||
else:
|
||||
if isinstance(tp, BaseException):
|
||||
# If the first object is an instance, the type of the exception
|
||||
# is the class of the instance, the instance itself is the value,
|
||||
# and the second object must be None.
|
||||
if value is not None:
|
||||
raise TypeError("instance exception may not have a separate value")
|
||||
exc = tp
|
||||
elif isinstance(tp, type) and not issubclass(tp, BaseException):
|
||||
# If the first object is a class, it becomes the type of the
|
||||
# exception.
|
||||
raise TypeError("class must derive from BaseException, not %s" % tp.__name__)
|
||||
else:
|
||||
# The second object is used to determine the exception value: If it
|
||||
# is an instance of the class, the instance becomes the exception
|
||||
# value. If the second object is a tuple, it is used as the argument
|
||||
# list for the class constructor; if it is None, an empty argument
|
||||
# list is used, and any other object is treated as a single argument
|
||||
# to the constructor. The instance so created by calling the
|
||||
# constructor is used as the exception value.
|
||||
if isinstance(value, tp):
|
||||
exc = value
|
||||
elif isinstance(value, tuple):
|
||||
exc = tp(*value)
|
||||
elif value is None:
|
||||
exc = tp()
|
||||
else:
|
||||
exc = tp(value)
|
||||
|
||||
if exc.__traceback__ is not tb:
|
||||
raise exc.with_traceback(tb)
|
||||
raise exc
|
||||
|
@ -443,12 +468,14 @@ else:
|
|||
e.__suppress_context__ = False
|
||||
if isinstance(cause, type) and issubclass(cause, Exception):
|
||||
e.__cause__ = cause()
|
||||
e.__cause__.__traceback__ = sys.exc_info()[2]
|
||||
e.__suppress_context__ = True
|
||||
elif cause is None:
|
||||
e.__cause__ = None
|
||||
e.__suppress_context__ = True
|
||||
elif isinstance(cause, BaseException):
|
||||
e.__cause__ = cause
|
||||
object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2])
|
||||
e.__suppress_context__ = True
|
||||
else:
|
||||
raise TypeError("exception causes must derive from BaseException")
|
||||
|
@ -552,15 +579,14 @@ def isbytes(obj):
|
|||
|
||||
def isnewbytes(obj):
|
||||
"""
|
||||
Equivalent to the result of ``isinstance(obj, newbytes)`` were
|
||||
``__instancecheck__`` not overridden on the newbytes subclass. In
|
||||
other words, it is REALLY a newbytes instance, not a Py2 native str
|
||||
Equivalent to the result of ``type(obj) == type(newbytes)``
|
||||
in other words, it is REALLY a newbytes instance, not a Py2 native str
|
||||
object?
|
||||
|
||||
Note that this does not cover subclasses of newbytes, and it is not
|
||||
equivalent to ininstance(obj, newbytes)
|
||||
"""
|
||||
# TODO: generalize this so that it works with subclasses of newbytes
|
||||
# Import is here to avoid circular imports:
|
||||
from future.types.newbytes import newbytes
|
||||
return type(obj) == newbytes
|
||||
return type(obj).__name__ == 'newbytes'
|
||||
|
||||
|
||||
def isint(obj):
|
||||
|
@ -726,16 +752,16 @@ else:
|
|||
|
||||
|
||||
__all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
|
||||
'as_native_str', 'bind_method', 'bord', 'bstr',
|
||||
'bytes_to_native_str', 'encode_filename', 'ensure_new_type',
|
||||
'exec_', 'get_next', 'getexception', 'implements_iterator',
|
||||
'is_new_style', 'isbytes', 'isidentifier', 'isint',
|
||||
'isnewbytes', 'istext', 'iteritems', 'iterkeys', 'itervalues',
|
||||
'lfilter', 'listitems', 'listvalues', 'lmap', 'lrange',
|
||||
'lzip', 'native', 'native_bytes', 'native_str',
|
||||
'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr',
|
||||
'bytes_to_native_str', 'class_types', 'encode_filename',
|
||||
'ensure_new_type', 'exec_', 'get_next', 'getexception',
|
||||
'implements_iterator', 'integer_types', 'is_new_style', 'isbytes',
|
||||
'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems',
|
||||
'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues',
|
||||
'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str',
|
||||
'native_str_to_bytes', 'old_div',
|
||||
'python_2_unicode_compatible', 'raise_',
|
||||
'raise_with_traceback', 'reraise', 'text_to_native_str',
|
||||
'tobytes', 'viewitems', 'viewkeys', 'viewvalues',
|
||||
'with_metaclass'
|
||||
]
|
||||
'raise_with_traceback', 'reraise', 'string_types',
|
||||
'text_to_native_str', 'text_type', 'tobytes', 'viewitems',
|
||||
'viewkeys', 'viewvalues', 'with_metaclass'
|
||||
]
|
||||
|
|
|
@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
|||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.0.1"
|
||||
__version__ = "1.1"
|
||||
|
|
|
@ -136,6 +136,7 @@ def normaliseCharList(charList):
|
|||
i += j
|
||||
return rv
|
||||
|
||||
|
||||
# We don't really support characters above the BMP :(
|
||||
max_unicode = int("FFFF", 16)
|
||||
|
||||
|
@ -254,7 +255,7 @@ class InfosetFilter(object):
|
|||
nameRest = name[1:]
|
||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||
if m:
|
||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||
else:
|
||||
nameFirstOutput = nameFirst
|
||||
|
@ -262,7 +263,7 @@ class InfosetFilter(object):
|
|||
nameRestOutput = nameRest
|
||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||
for char in replaceChars:
|
||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||
return nameFirstOutput + nameRestOutput
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from six import text_type, binary_type
|
||||
from six import text_type
|
||||
from six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
import webencodings
|
||||
|
||||
|
@ -12,13 +13,6 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
|||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
from io import StringIO
|
||||
|
||||
try:
|
||||
from io import BytesIO
|
||||
except ImportError:
|
||||
BytesIO = StringIO
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
|
@ -40,13 +34,13 @@ if _utils.supports_lone_surrogates:
|
|||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF])
|
||||
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF}
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
|
@ -367,7 +361,7 @@ class HTMLUnicodeInputStream(object):
|
|||
def unget(self, char):
|
||||
# Only one character is allowed to be ungotten at once - it must
|
||||
# be consumed again before any further call to unget
|
||||
if char is not None:
|
||||
if char is not EOF:
|
||||
if self.chunkOffset == 0:
|
||||
# unget is called quite rarely, so it's a good idea to do
|
||||
# more work here if it saves a bit of work in the frequently
|
||||
|
@ -449,7 +443,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
|||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except: # pylint:disable=bare-except
|
||||
except Exception:
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
|
@ -461,7 +455,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
|||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overriden, we've been overriden
|
||||
# If we've been overridden, we've been overridden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
@ -664,9 +658,7 @@ class EncodingBytes(bytes):
|
|||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||
are found return True and advance the position to the byte after the
|
||||
match. Otherwise return False and leave the position alone"""
|
||||
p = self.position
|
||||
data = self[p:p + len(bytes)]
|
||||
rv = data.startswith(bytes)
|
||||
rv = self.startswith(bytes, self.position)
|
||||
if rv:
|
||||
self.position += len(bytes)
|
||||
return rv
|
||||
|
@ -674,15 +666,11 @@ class EncodingBytes(bytes):
|
|||
def jumpTo(self, bytes):
|
||||
"""Look for the next sequence of bytes matching a given sequence. If
|
||||
a match is found advance the position to the last byte of the match"""
|
||||
newPosition = self[self.position:].find(bytes)
|
||||
if newPosition > -1:
|
||||
# XXX: This is ugly, but I can't see a nicer way to fix this.
|
||||
if self._position == -1:
|
||||
self._position = 0
|
||||
self._position += (newPosition + len(bytes) - 1)
|
||||
return True
|
||||
else:
|
||||
try:
|
||||
self._position = self.index(bytes, self.position) + len(bytes) - 1
|
||||
except ValueError:
|
||||
raise StopIteration
|
||||
return True
|
||||
|
||||
|
||||
class EncodingParser(object):
|
||||
|
@ -694,6 +682,9 @@ class EncodingParser(object):
|
|||
self.encoding = None
|
||||
|
||||
def getEncoding(self):
|
||||
if b"<meta" not in self.data:
|
||||
return None
|
||||
|
||||
methodDispatch = (
|
||||
(b"<!--", self.handleComment),
|
||||
(b"<meta", self.handleMeta),
|
||||
|
@ -703,6 +694,10 @@ class EncodingParser(object):
|
|||
(b"<", self.handlePossibleStartTag))
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
try:
|
||||
self.data.jumpTo(b"<")
|
||||
except StopIteration:
|
||||
break
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
try:
|
||||
|
@ -908,7 +903,7 @@ class ContentAttrParser(object):
|
|||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, binary_type):
|
||||
if isinstance(encoding, bytes):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
|
|
|
@ -2,7 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
|
|||
|
||||
from six import unichr as chr
|
||||
|
||||
from collections import deque
|
||||
from collections import deque, OrderedDict
|
||||
from sys import version_info
|
||||
|
||||
from .constants import spaceCharacters
|
||||
from .constants import entities
|
||||
|
@ -17,6 +18,11 @@ from ._trie import Trie
|
|||
|
||||
entitiesTrie = Trie(entities)
|
||||
|
||||
if version_info >= (3, 7):
|
||||
attributeMap = dict
|
||||
else:
|
||||
attributeMap = OrderedDict
|
||||
|
||||
|
||||
class HTMLTokenizer(object):
|
||||
""" This class takes care of tokenizing HTML.
|
||||
|
@ -228,6 +234,14 @@ class HTMLTokenizer(object):
|
|||
# Add token to the queue to be yielded
|
||||
if (token["type"] in tagTokenTypes):
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
if token["type"] == tokenTypes["StartTag"]:
|
||||
raw = token["data"]
|
||||
data = attributeMap(raw)
|
||||
if len(raw) > len(data):
|
||||
# we had some duplicated attribute, fix so first wins
|
||||
data.update(raw[::-1])
|
||||
token["data"] = data
|
||||
|
||||
if token["type"] == tokenTypes["EndTag"]:
|
||||
if token["data"]:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
|
|
|
@ -1,14 +1,5 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .py import Trie as PyTrie
|
||||
from .py import Trie
|
||||
|
||||
Trie = PyTrie
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
from .datrie import Trie as DATrie
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
Trie = DATrie
|
||||
# pylint:enable=wrong-import-position
|
||||
__all__ = ["Trie"]
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import Mapping
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError: # Python 2.7
|
||||
from collections import Mapping
|
||||
|
||||
|
||||
class Trie(Mapping):
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from datrie import Trie as DATrie
|
||||
from six import text_type
|
||||
|
||||
from ._base import Trie as ABCTrie
|
||||
|
||||
|
||||
class Trie(ABCTrie):
|
||||
def __init__(self, data):
|
||||
chars = set()
|
||||
for key in data.keys():
|
||||
if not isinstance(key, text_type):
|
||||
raise TypeError("All keys must be strings")
|
||||
for char in key:
|
||||
chars.add(char)
|
||||
|
||||
self._data = DATrie("".join(chars))
|
||||
for key, value in data.items():
|
||||
self._data[key] = value
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._data
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def keys(self, prefix=None):
|
||||
return self._data.keys(prefix)
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
return self._data.has_keys_with_prefix(prefix)
|
||||
|
||||
def longest_prefix(self, prefix):
|
||||
return self._data.longest_prefix(prefix)
|
||||
|
||||
def longest_prefix_item(self, prefix):
|
||||
return self._data.longest_prefix_item(prefix)
|
|
@ -2,12 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
|||
|
||||
from types import ModuleType
|
||||
|
||||
from six import text_type
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
|
||||
from six import text_type, PY3
|
||||
|
||||
if PY3:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
else:
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
|
@ -27,7 +35,7 @@ try:
|
|||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except: # pylint:disable=bare-except
|
||||
except Exception:
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
|
@ -47,9 +55,6 @@ class MethodDispatcher(dict):
|
|||
"""
|
||||
|
||||
def __init__(self, items=()):
|
||||
# Using _dictEntries instead of directly assigning to self is about
|
||||
# twice as fast. Please do careful performance testing before changing
|
||||
# anything here.
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
|
@ -64,6 +69,36 @@ class MethodDispatcher(dict):
|
|||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
return BoundMethodDispatcher(instance, self)
|
||||
|
||||
|
||||
class BoundMethodDispatcher(Mapping):
|
||||
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
|
||||
def __init__(self, instance, dispatcher):
|
||||
self.instance = instance
|
||||
self.dispatcher = dispatcher
|
||||
|
||||
def __getitem__(self, key):
|
||||
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
|
||||
# on a function, __get__ is used to bind a function to an instance as a bound method
|
||||
return self.dispatcher[key].__get__(self.instance)
|
||||
|
||||
def get(self, key, default):
|
||||
if key in self.dispatcher:
|
||||
return self[key]
|
||||
else:
|
||||
return default
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.dispatcher)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.dispatcher)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.dispatcher
|
||||
|
||||
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
|
|
@ -519,8 +519,8 @@ adjustForeignAttributes = {
|
|||
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
|
||||
}
|
||||
|
||||
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()])
|
||||
unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()}
|
||||
|
||||
spaceCharacters = frozenset([
|
||||
"\t",
|
||||
|
@ -544,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
|
|||
digits = frozenset(string.digits)
|
||||
hexDigits = frozenset(string.hexdigits)
|
||||
|
||||
asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
|
||||
for c in string.ascii_uppercase])
|
||||
asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
|
||||
|
||||
# Heading elements need to be ordered
|
||||
headingElements = (
|
||||
|
@ -2934,7 +2933,7 @@ tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
|
|||
tokenTypes["EmptyTag"]])
|
||||
|
||||
|
||||
prefixes = dict([(v, k) for k, v in namespaces.items()])
|
||||
prefixes = {v: k for k, v in namespaces.items()}
|
||||
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,15 @@
|
|||
"""Deprecated from html5lib 1.1.
|
||||
|
||||
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
|
||||
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
|
||||
is recommended as a replacement. Please let us know in the aforementioned issue
|
||||
if Bleach is unsuitable for your needs.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from six.moves import urllib_parse as urlparse
|
||||
|
@ -11,6 +20,14 @@ from ..constants import namespaces, prefixes
|
|||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
_deprecation_msg = (
|
||||
"html5lib's sanitizer is deprecated; see " +
|
||||
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
|
||||
"us know if Bleach is unsuitable for your needs"
|
||||
)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
|
@ -750,6 +767,9 @@ class Filter(base.Filter):
|
|||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
|
|
|
@ -2,7 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
|
|||
from six import with_metaclass, viewkeys
|
||||
|
||||
import types
|
||||
from collections import OrderedDict
|
||||
|
||||
from . import _inputstream
|
||||
from . import _tokenizer
|
||||
|
@ -119,8 +118,8 @@ class HTMLParser(object):
|
|||
self.tree = tree(namespaceHTMLElements)
|
||||
self.errors = []
|
||||
|
||||
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
|
||||
getPhases(debug).items()])
|
||||
self.phases = {name: cls(self, self.tree) for name, cls in
|
||||
getPhases(debug).items()}
|
||||
|
||||
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
|
||||
|
||||
|
@ -202,7 +201,7 @@ class HTMLParser(object):
|
|||
DoctypeToken = tokenTypes["Doctype"]
|
||||
ParseErrorToken = tokenTypes["ParseError"]
|
||||
|
||||
for token in self.normalizedTokens():
|
||||
for token in self.tokenizer:
|
||||
prev_token = None
|
||||
new_token = token
|
||||
while new_token is not None:
|
||||
|
@ -260,10 +259,6 @@ class HTMLParser(object):
|
|||
if reprocess:
|
||||
assert self.phase not in phases
|
||||
|
||||
def normalizedTokens(self):
|
||||
for token in self.tokenizer:
|
||||
yield self.normalizeToken(token)
|
||||
|
||||
def parse(self, stream, *args, **kwargs):
|
||||
"""Parse a HTML document into a well-formed tree
|
||||
|
||||
|
@ -325,17 +320,6 @@ class HTMLParser(object):
|
|||
if self.strict:
|
||||
raise ParseError(E[errorcode] % datavars)
|
||||
|
||||
def normalizeToken(self, token):
|
||||
# HTML5 specific normalizations to the token stream
|
||||
if token["type"] == tokenTypes["StartTag"]:
|
||||
raw = token["data"]
|
||||
token["data"] = OrderedDict(raw)
|
||||
if len(raw) > len(token["data"]):
|
||||
# we had some duplicated attribute, fix so first wins
|
||||
token["data"].update(raw[::-1])
|
||||
|
||||
return token
|
||||
|
||||
def adjustMathMLAttributes(self, token):
|
||||
adjust_attributes(token, adjustMathMLAttributes)
|
||||
|
||||
|
@ -413,16 +397,12 @@ class HTMLParser(object):
|
|||
def getPhases(debug):
|
||||
def log(function):
|
||||
"""Logger that records which phase processes each token"""
|
||||
type_names = dict((value, key) for key, value in
|
||||
tokenTypes.items())
|
||||
type_names = {value: key for key, value in tokenTypes.items()}
|
||||
|
||||
def wrapped(self, *args, **kwargs):
|
||||
if function.__name__.startswith("process") and len(args) > 0:
|
||||
token = args[0]
|
||||
try:
|
||||
info = {"type": type_names[token['type']]}
|
||||
except:
|
||||
raise
|
||||
info = {"type": type_names[token['type']]}
|
||||
if token['type'] in tagTokenTypes:
|
||||
info["name"] = token['name']
|
||||
|
||||
|
@ -446,10 +426,13 @@ def getPhases(debug):
|
|||
class Phase(with_metaclass(getMetaclass(debug, log))):
|
||||
"""Base class for helper object that implements each phase of processing
|
||||
"""
|
||||
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
self.parser = parser
|
||||
self.tree = tree
|
||||
self.__startTagCache = {}
|
||||
self.__endTagCache = {}
|
||||
|
||||
def processEOF(self):
|
||||
raise NotImplementedError
|
||||
|
@ -469,7 +452,21 @@ def getPhases(debug):
|
|||
self.tree.insertText(token["data"])
|
||||
|
||||
def processStartTag(self, token):
|
||||
return self.startTagHandler[token["name"]](token)
|
||||
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||
# requires a circular reference to the Phase, and this ends up with a significant
|
||||
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||
name = token["name"]
|
||||
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||
if name in self.__startTagCache:
|
||||
func = self.__startTagCache[name]
|
||||
else:
|
||||
func = self.__startTagCache[name] = self.startTagHandler[name]
|
||||
# bound the cache size in case we get loads of unknown tags
|
||||
while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
|
||||
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||
self.__startTagCache.pop(next(iter(self.__startTagCache)))
|
||||
return func(token)
|
||||
|
||||
def startTagHtml(self, token):
|
||||
if not self.parser.firstStartTag and token["name"] == "html":
|
||||
|
@ -482,9 +479,25 @@ def getPhases(debug):
|
|||
self.parser.firstStartTag = False
|
||||
|
||||
def processEndTag(self, token):
|
||||
return self.endTagHandler[token["name"]](token)
|
||||
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||
# requires a circular reference to the Phase, and this ends up with a significant
|
||||
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||
name = token["name"]
|
||||
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||
if name in self.__endTagCache:
|
||||
func = self.__endTagCache[name]
|
||||
else:
|
||||
func = self.__endTagCache[name] = self.endTagHandler[name]
|
||||
# bound the cache size in case we get loads of unknown tags
|
||||
while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
|
||||
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||
self.__endTagCache.pop(next(iter(self.__endTagCache)))
|
||||
return func(token)
|
||||
|
||||
class InitialPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processSpaceCharacters(self, token):
|
||||
pass
|
||||
|
||||
|
@ -613,6 +626,8 @@ def getPhases(debug):
|
|||
return True
|
||||
|
||||
class BeforeHtmlPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def insertHtmlElement(self):
|
||||
self.tree.insertRoot(impliedTagToken("html", "StartTag"))
|
||||
|
@ -648,19 +663,7 @@ def getPhases(debug):
|
|||
return token
|
||||
|
||||
class BeforeHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("head", "body", "html", "br"), self.endTagImplyHead)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.startTagHead(impliedTagToken("head", "StartTag"))
|
||||
|
@ -693,28 +696,19 @@ def getPhases(debug):
|
|||
self.parser.parseError("end-tag-after-implied-root",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("head", "body", "html", "br"), endTagImplyHead)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("title", self.startTagTitle),
|
||||
(("noframes", "style"), self.startTagNoFramesStyle),
|
||||
("noscript", self.startTagNoscript),
|
||||
("script", self.startTagScript),
|
||||
(("base", "basefont", "bgsound", "command", "link"),
|
||||
self.startTagBaseLinkCommand),
|
||||
("meta", self.startTagMeta),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("head", self.endTagHead),
|
||||
(("br", "html", "body"), self.endTagHtmlBodyBr)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# the real thing
|
||||
def processEOF(self):
|
||||
|
@ -796,22 +790,27 @@ def getPhases(debug):
|
|||
def anythingElse(self):
|
||||
self.endTagHead(impliedTagToken("head"))
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("title", startTagTitle),
|
||||
(("noframes", "style"), startTagNoFramesStyle),
|
||||
("noscript", startTagNoscript),
|
||||
("script", startTagScript),
|
||||
(("base", "basefont", "bgsound", "command", "link"),
|
||||
startTagBaseLinkCommand),
|
||||
("meta", startTagMeta),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("head", endTagHead),
|
||||
(("br", "html", "body"), endTagHtmlBodyBr)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InHeadNoscriptPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
|
||||
(("head", "noscript"), self.startTagHeadNoscript),
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("noscript", self.endTagNoscript),
|
||||
("br", self.endTagBr),
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.parser.parseError("eof-in-head-noscript")
|
||||
|
@ -860,23 +859,21 @@ def getPhases(debug):
|
|||
# Caller must raise parse error first!
|
||||
self.endTagNoscript(impliedTagToken("noscript"))
|
||||
|
||||
class AfterHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
(("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
|
||||
(("head", "noscript"), startTagHeadNoscript),
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("body", self.startTagBody),
|
||||
("frameset", self.startTagFrameset),
|
||||
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
||||
"style", "title"),
|
||||
self.startTagFromHead),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
||||
self.endTagHtmlBodyBr)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("noscript", endTagNoscript),
|
||||
("br", endTagBr),
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterHeadPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.anythingElse()
|
||||
|
@ -927,80 +924,30 @@ def getPhases(debug):
|
|||
self.parser.phase = self.parser.phases["inBody"]
|
||||
self.parser.framesetOK = True
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("body", startTagBody),
|
||||
("frameset", startTagFrameset),
|
||||
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
||||
"style", "title"),
|
||||
startTagFromHead),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
||||
endTagHtmlBodyBr)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InBodyPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
|
||||
# the really-really-really-very crazy mode
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
__slots__ = ("processSpaceCharacters",)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InBodyPhase, self).__init__(*args, **kwargs)
|
||||
# Set this to the default handler
|
||||
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("base", "basefont", "bgsound", "command", "link", "meta",
|
||||
"script", "style", "title"),
|
||||
self.startTagProcessInHead),
|
||||
("body", self.startTagBody),
|
||||
("frameset", self.startTagFrameset),
|
||||
(("address", "article", "aside", "blockquote", "center", "details",
|
||||
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
||||
"section", "summary", "ul"),
|
||||
self.startTagCloseP),
|
||||
(headingElements, self.startTagHeading),
|
||||
(("pre", "listing"), self.startTagPreListing),
|
||||
("form", self.startTagForm),
|
||||
(("li", "dd", "dt"), self.startTagListItem),
|
||||
("plaintext", self.startTagPlaintext),
|
||||
("a", self.startTagA),
|
||||
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
||||
"strong", "tt", "u"), self.startTagFormatting),
|
||||
("nobr", self.startTagNobr),
|
||||
("button", self.startTagButton),
|
||||
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
|
||||
("xmp", self.startTagXmp),
|
||||
("table", self.startTagTable),
|
||||
(("area", "br", "embed", "img", "keygen", "wbr"),
|
||||
self.startTagVoidFormatting),
|
||||
(("param", "source", "track"), self.startTagParamSource),
|
||||
("input", self.startTagInput),
|
||||
("hr", self.startTagHr),
|
||||
("image", self.startTagImage),
|
||||
("isindex", self.startTagIsIndex),
|
||||
("textarea", self.startTagTextarea),
|
||||
("iframe", self.startTagIFrame),
|
||||
("noscript", self.startTagNoscript),
|
||||
(("noembed", "noframes"), self.startTagRawtext),
|
||||
("select", self.startTagSelect),
|
||||
(("rp", "rt"), self.startTagRpRt),
|
||||
(("option", "optgroup"), self.startTagOpt),
|
||||
(("math"), self.startTagMath),
|
||||
(("svg"), self.startTagSvg),
|
||||
(("caption", "col", "colgroup", "frame", "head",
|
||||
"tbody", "td", "tfoot", "th", "thead",
|
||||
"tr"), self.startTagMisplaced)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("body", self.endTagBody),
|
||||
("html", self.endTagHtml),
|
||||
(("address", "article", "aside", "blockquote", "button", "center",
|
||||
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
||||
"section", "summary", "ul"), self.endTagBlock),
|
||||
("form", self.endTagForm),
|
||||
("p", self.endTagP),
|
||||
(("dd", "dt", "li"), self.endTagListItem),
|
||||
(headingElements, self.endTagHeading),
|
||||
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
||||
"strike", "strong", "tt", "u"), self.endTagFormatting),
|
||||
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
|
||||
("br", self.endTagBr),
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
|
||||
def isMatchingFormattingElement(self, node1, node2):
|
||||
return (node1.name == node2.name and
|
||||
node1.namespace == node2.namespace and
|
||||
|
@ -1650,14 +1597,73 @@ def getPhases(debug):
|
|||
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
||||
break
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("base", "basefont", "bgsound", "command", "link", "meta",
|
||||
"script", "style", "title"),
|
||||
startTagProcessInHead),
|
||||
("body", startTagBody),
|
||||
("frameset", startTagFrameset),
|
||||
(("address", "article", "aside", "blockquote", "center", "details",
|
||||
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
||||
"section", "summary", "ul"),
|
||||
startTagCloseP),
|
||||
(headingElements, startTagHeading),
|
||||
(("pre", "listing"), startTagPreListing),
|
||||
("form", startTagForm),
|
||||
(("li", "dd", "dt"), startTagListItem),
|
||||
("plaintext", startTagPlaintext),
|
||||
("a", startTagA),
|
||||
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
||||
"strong", "tt", "u"), startTagFormatting),
|
||||
("nobr", startTagNobr),
|
||||
("button", startTagButton),
|
||||
(("applet", "marquee", "object"), startTagAppletMarqueeObject),
|
||||
("xmp", startTagXmp),
|
||||
("table", startTagTable),
|
||||
(("area", "br", "embed", "img", "keygen", "wbr"),
|
||||
startTagVoidFormatting),
|
||||
(("param", "source", "track"), startTagParamSource),
|
||||
("input", startTagInput),
|
||||
("hr", startTagHr),
|
||||
("image", startTagImage),
|
||||
("isindex", startTagIsIndex),
|
||||
("textarea", startTagTextarea),
|
||||
("iframe", startTagIFrame),
|
||||
("noscript", startTagNoscript),
|
||||
(("noembed", "noframes"), startTagRawtext),
|
||||
("select", startTagSelect),
|
||||
(("rp", "rt"), startTagRpRt),
|
||||
(("option", "optgroup"), startTagOpt),
|
||||
(("math"), startTagMath),
|
||||
(("svg"), startTagSvg),
|
||||
(("caption", "col", "colgroup", "frame", "head",
|
||||
"tbody", "td", "tfoot", "th", "thead",
|
||||
"tr"), startTagMisplaced)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("body", endTagBody),
|
||||
("html", endTagHtml),
|
||||
(("address", "article", "aside", "blockquote", "button", "center",
|
||||
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
||||
"section", "summary", "ul"), endTagBlock),
|
||||
("form", endTagForm),
|
||||
("p", endTagP),
|
||||
(("dd", "dt", "li"), endTagListItem),
|
||||
(headingElements, endTagHeading),
|
||||
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
||||
"strike", "strong", "tt", "u"), endTagFormatting),
|
||||
(("applet", "marquee", "object"), endTagAppletMarqueeObject),
|
||||
("br", endTagBr),
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class TextPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("script", self.endTagScript)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processCharacters(self, token):
|
||||
self.tree.insertText(token["data"])
|
||||
|
@ -1683,30 +1689,15 @@ def getPhases(debug):
|
|||
self.tree.openElements.pop()
|
||||
self.parser.phase = self.parser.originalPhase
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([])
|
||||
startTagHandler.default = startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("script", endTagScript)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTablePhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("caption", self.startTagCaption),
|
||||
("colgroup", self.startTagColgroup),
|
||||
("col", self.startTagCol),
|
||||
(("tbody", "tfoot", "thead"), self.startTagRowGroup),
|
||||
(("td", "th", "tr"), self.startTagImplyTbody),
|
||||
("table", self.startTagTable),
|
||||
(("style", "script"), self.startTagStyleScript),
|
||||
("input", self.startTagInput),
|
||||
("form", self.startTagForm)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("table", self.endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
||||
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def clearStackToTableContext(self):
|
||||
|
@ -1828,9 +1819,32 @@ def getPhases(debug):
|
|||
self.parser.phases["inBody"].processEndTag(token)
|
||||
self.tree.insertFromTable = False
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("caption", startTagCaption),
|
||||
("colgroup", startTagColgroup),
|
||||
("col", startTagCol),
|
||||
(("tbody", "tfoot", "thead"), startTagRowGroup),
|
||||
(("td", "th", "tr"), startTagImplyTbody),
|
||||
("table", startTagTable),
|
||||
(("style", "script"), startTagStyleScript),
|
||||
("input", startTagInput),
|
||||
("form", startTagForm)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("table", endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
||||
"tfoot", "th", "thead", "tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTableTextPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
__slots__ = ("originalPhase", "characterTokens")
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InTableTextPhase, self).__init__(*args, **kwargs)
|
||||
self.originalPhase = None
|
||||
self.characterTokens = []
|
||||
|
||||
|
@ -1875,23 +1889,7 @@ def getPhases(debug):
|
|||
|
||||
class InCaptionPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.startTagTableElement)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("caption", self.endTagCaption),
|
||||
("table", self.endTagTable),
|
||||
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def ignoreEndTagCaption(self):
|
||||
return not self.tree.elementInScope("caption", variant="table")
|
||||
|
@ -1944,23 +1942,24 @@ def getPhases(debug):
|
|||
def endTagOther(self, token):
|
||||
return self.parser.phases["inBody"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), startTagTableElement)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("caption", endTagCaption),
|
||||
("table", endTagTable),
|
||||
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InColumnGroupPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("col", self.startTagCol)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("colgroup", self.endTagColgroup),
|
||||
("col", self.endTagCol)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def ignoreEndTagColgroup(self):
|
||||
return self.tree.openElements[-1].name == "html"
|
||||
|
@ -2010,26 +2009,21 @@ def getPhases(debug):
|
|||
if not ignoreEndTag:
|
||||
return token
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("col", startTagCol)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("colgroup", endTagColgroup),
|
||||
("col", endTagCol)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTableBodyPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("tr", self.startTagTr),
|
||||
(("td", "th"), self.startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
||||
self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
||||
("table", self.endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
||||
"tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def clearStackToTableBodyContext(self):
|
||||
|
@ -2108,26 +2102,26 @@ def getPhases(debug):
|
|||
def endTagOther(self, token):
|
||||
return self.parser.phases["inTable"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("tr", startTagTr),
|
||||
(("td", "th"), startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
||||
startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||
("table", endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
||||
"tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InRowPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("td", "th"), self.startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
||||
"tr"), self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("tr", self.endTagTr),
|
||||
("table", self.endTagTable),
|
||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
||||
self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods (XXX unify this with other table helper methods)
|
||||
def clearStackToTableRowContext(self):
|
||||
|
@ -2197,23 +2191,26 @@ def getPhases(debug):
|
|||
def endTagOther(self, token):
|
||||
return self.parser.phases["inTable"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("td", "th"), startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
||||
"tr"), startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("tr", endTagTr),
|
||||
("table", endTagTable),
|
||||
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
||||
endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InCellPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("td", "th"), self.endTagTableCell),
|
||||
(("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
|
||||
(("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper
|
||||
def closeCell(self):
|
||||
|
@ -2273,26 +2270,22 @@ def getPhases(debug):
|
|||
def endTagOther(self, token):
|
||||
return self.parser.phases["inBody"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("td", "th"), endTagTableCell),
|
||||
(("body", "caption", "col", "colgroup", "html"), endTagIgnore),
|
||||
(("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InSelectPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("option", self.startTagOption),
|
||||
("optgroup", self.startTagOptgroup),
|
||||
("select", self.startTagSelect),
|
||||
(("input", "keygen", "textarea"), self.startTagInput),
|
||||
("script", self.startTagScript)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("option", self.endTagOption),
|
||||
("optgroup", self.endTagOptgroup),
|
||||
("select", self.endTagSelect)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
def processEOF(self):
|
||||
|
@ -2373,21 +2366,25 @@ def getPhases(debug):
|
|||
self.parser.parseError("unexpected-end-tag-in-select",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("option", startTagOption),
|
||||
("optgroup", startTagOptgroup),
|
||||
("select", startTagSelect),
|
||||
(("input", "keygen", "textarea"), startTagInput),
|
||||
("script", startTagScript)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("option", endTagOption),
|
||||
("optgroup", endTagOptgroup),
|
||||
("select", endTagSelect)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InSelectInTablePhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
self.startTagTable)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
self.endTagTable)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.parser.phases["inSelect"].processEOF()
|
||||
|
@ -2412,7 +2409,21 @@ def getPhases(debug):
|
|||
def endTagOther(self, token):
|
||||
return self.parser.phases["inSelect"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
startTagTable)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
endTagTable)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InForeignContentPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
|
||||
"center", "code", "dd", "div", "dl", "dt",
|
||||
"em", "embed", "h1", "h2", "h3",
|
||||
|
@ -2422,9 +2433,6 @@ def getPhases(debug):
|
|||
"span", "strong", "strike", "sub", "sup",
|
||||
"table", "tt", "u", "ul", "var"])
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
def adjustSVGTagNames(self, token):
|
||||
replacements = {"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
|
@ -2478,7 +2486,7 @@ def getPhases(debug):
|
|||
currentNode = self.tree.openElements[-1]
|
||||
if (token["name"] in self.breakoutElements or
|
||||
(token["name"] == "font" and
|
||||
set(token["data"].keys()) & set(["color", "face", "size"]))):
|
||||
set(token["data"].keys()) & {"color", "face", "size"})):
|
||||
self.parser.parseError("unexpected-html-element-in-foreign-content",
|
||||
{"name": token["name"]})
|
||||
while (self.tree.openElements[-1].namespace !=
|
||||
|
@ -2528,16 +2536,7 @@ def getPhases(debug):
|
|||
return new_token
|
||||
|
||||
class AfterBodyPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
# Stop parsing
|
||||
|
@ -2574,23 +2573,17 @@ def getPhases(debug):
|
|||
self.parser.phase = self.parser.phases["inBody"]
|
||||
return token
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InFramesetPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("frameset", self.startTagFrameset),
|
||||
("frame", self.startTagFrame),
|
||||
("noframes", self.startTagNoframes)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("frameset", self.endTagFrameset)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
if self.tree.openElements[-1].name != "html":
|
||||
|
@ -2631,21 +2624,22 @@ def getPhases(debug):
|
|||
self.parser.parseError("unexpected-end-tag-in-frameset",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("frameset", startTagFrameset),
|
||||
("frame", startTagFrame),
|
||||
("noframes", startTagNoframes)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("frameset", endTagFrameset)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterFramesetPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("noframes", self.startTagNoframes)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.endTagHtml)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
# Stop parsing
|
||||
|
@ -2668,14 +2662,19 @@ def getPhases(debug):
|
|||
self.parser.parseError("unexpected-end-tag-after-frameset",
|
||||
{"name": token["name"]})
|
||||
|
||||
class AfterAfterBodyPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("noframes", startTagNoframes)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("html", endTagHtml)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterAfterBodyPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
pass
|
||||
|
@ -2706,15 +2705,13 @@ def getPhases(debug):
|
|||
self.parser.phase = self.parser.phases["inBody"]
|
||||
return token
|
||||
|
||||
class AfterAfterFramesetPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("noframes", self.startTagNoFrames)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
class AfterAfterFramesetPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
pass
|
||||
|
@ -2741,6 +2738,13 @@ def getPhases(debug):
|
|||
def processEndTag(self, token):
|
||||
self.parser.parseError("expected-eof-but-got-end-tag",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("noframes", startTagNoFrames)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
# pylint:enable=unused-argument
|
||||
|
||||
return {
|
||||
|
@ -2774,8 +2778,8 @@ def getPhases(debug):
|
|||
def adjust_attributes(token, replacements):
|
||||
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
|
||||
if needs_adjustment:
|
||||
token['data'] = OrderedDict((replacements.get(k, k), v)
|
||||
for k, v in token['data'].items())
|
||||
token['data'] = type(token['data'])((replacements.get(k, k), v)
|
||||
for k, v in token['data'].items())
|
||||
|
||||
|
||||
def impliedTagToken(name, type="EndTag", attributes=None,
|
||||
|
|
|
@ -274,7 +274,7 @@ class HTMLSerializer(object):
|
|||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError("System identifer contains both single and double quote characters")
|
||||
self.serializeError("System identifier contains both single and double quote characters")
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
|
|
433
libs/html5lib/tests/sanitizer-testdata/tests1.dat
Normal file
433
libs/html5lib/tests/sanitizer-testdata/tests1.dat
Normal file
|
@ -0,0 +1,433 @@
|
|||
[
|
||||
{
|
||||
"name": "IE_Comments",
|
||||
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "IE_Comments_2",
|
||||
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
|
||||
"output": "<script>alert('XSS');</script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "allow_colons_in_path_component",
|
||||
"input": "<a href=\"./this:that\">foo</a>",
|
||||
"output": "<a href='./this:that'>foo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "background_attribute",
|
||||
"input": "<div background=\"javascript:alert('XSS')\"></div>",
|
||||
"output": "<div></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "bgsound",
|
||||
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "div_background_image_unicode_encoded",
|
||||
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
|
||||
"output": "<div style=''>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "div_expression",
|
||||
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
|
||||
"output": "<div style=''>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "double_open_angle_brackets",
|
||||
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "double_open_angle_brackets_2",
|
||||
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "grave_accents",
|
||||
"input": "<img src=`javascript:alert('XSS')` />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "img_dynsrc_lowsrc",
|
||||
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "img_vbscript",
|
||||
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "input_image",
|
||||
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<input type='image'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "link_stylesheets",
|
||||
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
|
||||
"output": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\">"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "link_stylesheets_2",
|
||||
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
|
||||
"output": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\">"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "list_style_image",
|
||||
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
|
||||
"output": "<li style=''>foo</li>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "no_closing_script_tags",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit",
|
||||
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_2",
|
||||
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
|
||||
"output": "<a>foo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_3",
|
||||
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
|
||||
"output": "<img src='http://ha.ckers.org/xss.js'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_II",
|
||||
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
|
||||
"output": "<a>foo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "non_alpha_non_digit_III",
|
||||
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
|
||||
"output": "<a>foo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "platypus",
|
||||
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
|
||||
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "protocol_resolution_in_script_tag",
|
||||
"input": "<script src=//ha.ckers.org/.j></script>",
|
||||
"output": "<script src=\"//ha.ckers.org/.j\"></script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_anchors",
|
||||
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
|
||||
"output": "<a href='foo'><script>baz</script></a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_alt_attribute",
|
||||
"input": "<img alt='foo' onclick='bar' />",
|
||||
"output": "<img alt='foo'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_height_attribute",
|
||||
"input": "<img height='foo' onclick='bar' />",
|
||||
"output": "<img height='foo'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_src_attribute",
|
||||
"input": "<img src='foo' onclick='bar' />",
|
||||
"output": "<img src='foo'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_allow_image_width_attribute",
|
||||
"input": "<img width='foo' onclick='bar' />",
|
||||
"output": "<img width='foo'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_blank_text",
|
||||
"input": "",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_malformed_image_tags",
|
||||
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
|
||||
"output": "<img/><script>alert(\"XSS\")</script>\">"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_handle_non_html",
|
||||
"input": "abc",
|
||||
"output": "abc"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_ridiculous_hack",
|
||||
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_0",
|
||||
"input": "<img src=\"javascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_1",
|
||||
"input": "<img src=javascript:alert('XSS') />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_10",
|
||||
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_11",
|
||||
"input": "<img src=\"jav
ascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_12",
|
||||
"input": "<img src=\"  javascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_13",
|
||||
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_14",
|
||||
"input": "<img src=\" javascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_2",
|
||||
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_3",
|
||||
"input": "<img src='javascript:alert("XSS")' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_4",
|
||||
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_5",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_6",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_7",
|
||||
"input": "<img src='javascript:alert('XSS')' />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_8",
|
||||
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_not_fall_for_xss_image_hack_9",
|
||||
"input": "<img src=\"jav	ascript:alert('XSS');\" />",
|
||||
"output": "<img/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_half_open_scripts",
|
||||
"input": "<img src=\"javascript:alert('XSS')\"",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_invalid_script_tag",
|
||||
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js\" xss=\"\"></script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
|
||||
"input": "<<script>alert(\"XSS\");//<</script>",
|
||||
"output": "<<script>alert(\"XSS\");//<</script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
|
||||
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
|
||||
"output": ""
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_tag_broken_up_by_null",
|
||||
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
|
||||
"output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_sanitize_unclosed_script",
|
||||
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
|
||||
"output": "<script src=\"http://ha.ckers.org/xss.js?&lt;b\"></script>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
|
||||
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
|
||||
"output": "<a title='1'>boo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
|
||||
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
|
||||
"output": "<a title='1'>boo</a>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
|
||||
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
|
||||
"output": "<img title='1'/>boo"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
|
||||
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
|
||||
"output": "<img title='1'/>boo"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "xml_base",
|
||||
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
|
||||
"output": "<div>foo</div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "xul",
|
||||
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
|
||||
"output": "<p style=''>fubar</p>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "quotes_in_attributes",
|
||||
"input": "<img src='foo' title='\"foo\" bar' />",
|
||||
"output": "<img src='foo' title='\"foo\" bar'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "uri_refs_in_svg_attributes",
|
||||
"input": "<svg><rect fill='url(#foo)' />",
|
||||
"output": "<svg><rect fill='url(#foo)'></rect></svg>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "absolute_uri_refs_in_svg_attributes",
|
||||
"input": "<svg><rect fill='url(http://bad.com/) #fff' />",
|
||||
"output": "<svg><rect fill=' #fff'></rect></svg>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "uri_ref_with_space_in svg_attribute",
|
||||
"input": "<svg><rect fill='url(\n#foo)' />",
|
||||
"output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "absolute_uri_ref_with_space_in svg_attribute",
|
||||
"input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
|
||||
"output": "<svg><rect fill=' '></rect></svg>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "allow_html5_image_tag",
|
||||
"input": "<image src='foo' />",
|
||||
"output": "<img src='foo'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "style_attr_end_with_nothing",
|
||||
"input": "<div style=\"color: blue\" />",
|
||||
"output": "<div style='color: blue;'></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "style_attr_end_with_space",
|
||||
"input": "<div style=\"color: blue \" />",
|
||||
"output": "<div style='color: blue ;'></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "style_attr_end_with_semicolon",
|
||||
"input": "<div style=\"color: blue;\" />",
|
||||
"output": "<div style='color: blue;'></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "style_attr_end_with_semicolon_space",
|
||||
"input": "<div style=\"color: blue; \" />",
|
||||
"output": "<div style='color: blue;'></div>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "attributes_with_embedded_quotes",
|
||||
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
|
||||
"output": "<img src='doesntexist.jpg\"'onerror=\"alert(1)'/>"
|
||||
},
|
||||
|
||||
{
|
||||
"name": "attributes_with_embedded_quotes_II",
|
||||
"input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
|
||||
"output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
|
||||
}
|
||||
]
|
|
@ -27,14 +27,15 @@ class SanitizerTest(pytest.Item):
|
|||
expected = self.test["output"]
|
||||
|
||||
parsed = parseFragment(input)
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char="'",
|
||||
alphabetical_attributes=True)
|
||||
with pytest.deprecated_call():
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char="'",
|
||||
alphabetical_attributes=True)
|
||||
errorMsg = "\n".join(["\n\nInput:", input,
|
||||
"\nExpected:", expected,
|
||||
"\nReceived:", serialized])
|
||||
|
|
395
libs/html5lib/tests/serializer-testdata/core.test
Normal file
395
libs/html5lib/tests/serializer-testdata/core.test
Normal file
|
@ -0,0 +1,395 @@
|
|||
{
|
||||
"tests": [
|
||||
{
|
||||
"expected": [
|
||||
"<span title='test \"with\" &quot;'>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "test \"with\" ""
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value escaping"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=foo>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value non-quoting"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo<bar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo<bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value non-quoting (with <)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo=bar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo=bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with =)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo>bar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo>bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with >)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title='foo\"bar'>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\"bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with \")"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo'bar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo'bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with ')"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo'bar"baz\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo'bar\"baz"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with both \" and ')"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo bar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo bar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with space)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo\tbar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\tbar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with tab)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo\nbar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\nbar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with LF)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo\rbar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\rbar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with CR)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo\u000bbar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\u000bbar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value non-quoting (with linetab)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<span title=\"foo\fbar\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "foo\fbar"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "proper attribute value quoting (with form feed)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<img>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"EmptyTag",
|
||||
"img",
|
||||
{}
|
||||
]
|
||||
],
|
||||
"description": "void element (as EmptyTag token)"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<!DOCTYPE foo>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Doctype",
|
||||
"foo"
|
||||
]
|
||||
],
|
||||
"description": "doctype in error"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"a<b>c&d"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Characters",
|
||||
"a<b>c&d"
|
||||
]
|
||||
],
|
||||
"description": "character data",
|
||||
"options": {
|
||||
"encoding": "utf-8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<script>a<b>c&d"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"script",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"a<b>c&d"
|
||||
]
|
||||
],
|
||||
"description": "rcdata"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<!DOCTYPE HTML>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Doctype",
|
||||
"HTML"
|
||||
]
|
||||
],
|
||||
"description": "doctype"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Doctype",
|
||||
"HTML",
|
||||
"-//W3C//DTD HTML 4.01//EN",
|
||||
"http://www.w3.org/TR/html4/strict.dtd"
|
||||
]
|
||||
],
|
||||
"description": "HTML 4.01 DOCTYPE"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Doctype",
|
||||
"HTML",
|
||||
"-//W3C//DTD HTML 4.01//EN"
|
||||
]
|
||||
],
|
||||
"description": "HTML 4.01 DOCTYPE without system identifier"
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Doctype",
|
||||
"html",
|
||||
"",
|
||||
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
|
||||
]
|
||||
],
|
||||
"description": "IBM DOCTYPE without public identifier"
|
||||
}
|
||||
]
|
||||
}
|
350
libs/html5lib/tests/serializer-testdata/injectmeta.test
Normal file
350
libs/html5lib/tests/serializer-testdata/injectmeta.test
Normal file
|
@ -0,0 +1,350 @@
|
|||
{
|
||||
"tests": [
|
||||
{
|
||||
"expected": [
|
||||
""
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "no encoding",
|
||||
"options": {
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta charset=utf-8>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "empytag head",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta charset=utf-8><title>foo</title>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"title",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"foo"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"title"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/title",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta charset=utf-8>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "charset",
|
||||
"value": "ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/meta-charset",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta charset=utf-8><meta charset=utf-8>",
|
||||
"<head><meta charset=utf-8><meta charset=ascii>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "charset",
|
||||
"value": "ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "charset",
|
||||
"value": "ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/ two meta-charset",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta charset=utf-8><meta content=noindex name=robots>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "name",
|
||||
"value": "robots"
|
||||
},
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "content",
|
||||
"value": "noindex"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/robots",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta content=noindex name=robots><meta charset=utf-8>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "name",
|
||||
"value": "robots"
|
||||
},
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "content",
|
||||
"value": "noindex"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "charset",
|
||||
"value": "ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/robots & charset",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "http-equiv",
|
||||
"value": "content-type"
|
||||
},
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "content",
|
||||
"value": "text/html; charset=ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/ charset in http-equiv content-type",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "name",
|
||||
"value": "robots"
|
||||
},
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "content",
|
||||
"value": "noindex"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EmptyTag",
|
||||
"meta",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "http-equiv",
|
||||
"value": "content-type"
|
||||
},
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "content",
|
||||
"value": "text/html; charset=ascii"
|
||||
}
|
||||
]
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"head"
|
||||
]
|
||||
],
|
||||
"description": "head w/robots & charset in http-equiv content-type",
|
||||
"options": {
|
||||
"encoding": "utf-8",
|
||||
"inject_meta_charset": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
3254
libs/html5lib/tests/serializer-testdata/optionaltags.test
Normal file
3254
libs/html5lib/tests/serializer-testdata/optionaltags.test
Normal file
File diff suppressed because it is too large
Load diff
334
libs/html5lib/tests/serializer-testdata/options.test
Normal file
334
libs/html5lib/tests/serializer-testdata/options.test
Normal file
|
@ -0,0 +1,334 @@
|
|||
{
|
||||
"tests": [
|
||||
{
|
||||
"expected": [
|
||||
"<span title='test 'with' quote_char'>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "test 'with' quote_char"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "quote_char=\"'\"",
|
||||
"options": {
|
||||
"quote_char": "'"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<button disabled>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"button",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "disabled",
|
||||
"value": "disabled"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "quote_attr_values='always'",
|
||||
"options": {
|
||||
"quote_attr_values": "always"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div itemscope>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "itemscope",
|
||||
"value": "itemscope"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "quote_attr_values='always' with itemscope",
|
||||
"options": {
|
||||
"quote_attr_values": "always"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div irrelevant>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "irrelevant",
|
||||
"value": "irrelevant"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "quote_attr_values='always' with irrelevant",
|
||||
"options": {
|
||||
"quote_attr_values": "always"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div class=\"foo\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "class",
|
||||
"value": "foo"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "non-minimized quote_attr_values='always'",
|
||||
"options": {
|
||||
"quote_attr_values": "always"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div class=foo>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "class",
|
||||
"value": "foo"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "non-minimized quote_attr_values='legacy'",
|
||||
"options": {
|
||||
"quote_attr_values": "legacy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div class=foo>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "class",
|
||||
"value": "foo"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "non-minimized quote_attr_values='spec'",
|
||||
"options": {
|
||||
"quote_attr_values": "spec"
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<img />"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"EmptyTag",
|
||||
"img",
|
||||
{}
|
||||
]
|
||||
],
|
||||
"description": "use_trailing_solidus=true with void element",
|
||||
"options": {
|
||||
"use_trailing_solidus": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
{}
|
||||
]
|
||||
],
|
||||
"description": "use_trailing_solidus=true with non-void element",
|
||||
"options": {
|
||||
"use_trailing_solidus": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div itemscope=itemscope>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "itemscope",
|
||||
"value": "itemscope"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "minimize_boolean_attributes=false",
|
||||
"options": {
|
||||
"minimize_boolean_attributes": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div irrelevant=irrelevant>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "irrelevant",
|
||||
"value": "irrelevant"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "minimize_boolean_attributes=false",
|
||||
"options": {
|
||||
"minimize_boolean_attributes": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div itemscope=\"\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "itemscope",
|
||||
"value": ""
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "minimize_boolean_attributes=false with empty value",
|
||||
"options": {
|
||||
"minimize_boolean_attributes": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<div irrelevant=\"\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"div",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "irrelevant",
|
||||
"value": ""
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "minimize_boolean_attributes=false with empty value",
|
||||
"options": {
|
||||
"minimize_boolean_attributes": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<a title=\"a<b>c&d\">"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"a",
|
||||
[
|
||||
{
|
||||
"namespace": null,
|
||||
"name": "title",
|
||||
"value": "a<b>c&d"
|
||||
}
|
||||
]
|
||||
]
|
||||
],
|
||||
"description": "escape less than signs in attribute values",
|
||||
"options": {
|
||||
"escape_lt_in_attrs": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<script>a<b>c&d"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"script",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"a<b>c&d"
|
||||
]
|
||||
],
|
||||
"description": "rcdata",
|
||||
"options": {
|
||||
"escape_rcdata": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
198
libs/html5lib/tests/serializer-testdata/whitespace.test
Normal file
198
libs/html5lib/tests/serializer-testdata/whitespace.test
Normal file
|
@ -0,0 +1,198 @@
|
|||
{
|
||||
"tests": [
|
||||
{
|
||||
"expected": [
|
||||
" foo"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f foo"
|
||||
]
|
||||
],
|
||||
"description": "bare text with leading spaces",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"foo "
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Characters",
|
||||
"foo \t\r\n\f"
|
||||
]
|
||||
],
|
||||
"description": "bare text with trailing spaces",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"foo bar"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"Characters",
|
||||
"foo \t\r\n\f bar"
|
||||
]
|
||||
],
|
||||
"description": "bare text with inner spaces",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"pre",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"pre"
|
||||
]
|
||||
],
|
||||
"description": "text within <pre>",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"pre",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f fo"
|
||||
],
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"o \t\r\n\f b"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"span"
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"ar \t\r\n\f"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"pre"
|
||||
]
|
||||
],
|
||||
"description": "text within <pre>, with inner markup",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"textarea",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"textarea"
|
||||
]
|
||||
],
|
||||
"description": "text within <textarea>",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"script",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"script"
|
||||
]
|
||||
],
|
||||
"description": "text within <script>",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"expected": [
|
||||
"<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>"
|
||||
],
|
||||
"input": [
|
||||
[
|
||||
"StartTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"style",
|
||||
{}
|
||||
],
|
||||
[
|
||||
"Characters",
|
||||
"\t\r\n\f foo \t\r\n\f bar \t\r\n\f"
|
||||
],
|
||||
[
|
||||
"EndTag",
|
||||
"http://www.w3.org/1999/xhtml",
|
||||
"style"
|
||||
]
|
||||
],
|
||||
"description": "text within <style>",
|
||||
"options": {
|
||||
"strip_whitespace": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -143,11 +143,12 @@ def convert(stripChars):
|
|||
return "\n".join(rv)
|
||||
return convertData
|
||||
|
||||
|
||||
convertExpected = convert(2)
|
||||
|
||||
|
||||
def errorMessage(input, expected, actual):
|
||||
msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
|
||||
msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
|
||||
(repr(input), repr(expected), repr(actual)))
|
||||
if sys.version_info[0] == 2:
|
||||
msg = msg.encode("ascii", "backslashreplace")
|
||||
|
|
|
@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
|
|||
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
|
||||
|
||||
|
||||
def runParserEncodingTest(data, encoding):
|
||||
def param_encoding():
|
||||
for filename in get_data_files("encoding"):
|
||||
tests = _TestData(filename, b"data", encoding=None)
|
||||
for test in tests:
|
||||
yield test[b'data'], test[b'encoding']
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data, encoding", param_encoding())
|
||||
def test_parser_encoding(data, encoding):
|
||||
p = HTMLParser()
|
||||
assert p.documentEncoding is None
|
||||
p.parse(data, useChardet=False)
|
||||
|
@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
|
|||
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
|
||||
|
||||
|
||||
def runPreScanEncodingTest(data, encoding):
|
||||
@pytest.mark.parametrize("data, encoding", param_encoding())
|
||||
def test_prescan_encoding(data, encoding):
|
||||
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
|
||||
encoding = encoding.lower().decode("ascii")
|
||||
|
||||
|
@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
|
|||
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
|
||||
|
||||
|
||||
def test_encoding():
|
||||
for filename in get_data_files("encoding"):
|
||||
tests = _TestData(filename, b"data", encoding=None)
|
||||
for test in tests:
|
||||
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
|
||||
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
|
||||
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
import chardet # noqa
|
||||
|
|
|
@ -28,10 +28,10 @@ def test_errorMessage():
|
|||
|
||||
# Assertions!
|
||||
if six.PY2:
|
||||
assert b"Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
|
||||
assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
|
||||
else:
|
||||
assert six.PY3
|
||||
assert "Input:\n1\nExpected:\n2\nRecieved\n3\n" == r
|
||||
assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
|
||||
|
||||
assert input.__repr__.call_count == 1
|
||||
assert expected.__repr__.call_count == 1
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from six import PY2, text_type, unichr
|
||||
from six import PY2, text_type
|
||||
|
||||
import io
|
||||
|
||||
from . import support # noqa
|
||||
|
||||
from html5lib.constants import namespaces, tokenTypes
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib import parse, parseFragment, HTMLParser
|
||||
|
||||
|
||||
|
@ -53,42 +53,6 @@ def test_unicode_file():
|
|||
assert parse(io.StringIO("a")) is not None
|
||||
|
||||
|
||||
def test_maintain_attribute_order():
|
||||
# This is here because we impl it in parser and not tokenizer
|
||||
p = HTMLParser()
|
||||
# generate loads to maximize the chance a hash-based mutation will occur
|
||||
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||
token = {'name': 'html',
|
||||
'selfClosing': False,
|
||||
'selfClosingAcknowledged': False,
|
||||
'type': tokenTypes["StartTag"],
|
||||
'data': attrs}
|
||||
out = p.normalizeToken(token)
|
||||
attr_order = list(out["data"].keys())
|
||||
assert attr_order == [x for x, i in attrs]
|
||||
|
||||
|
||||
def test_duplicate_attribute():
|
||||
# This is here because we impl it in parser and not tokenizer
|
||||
doc = parse('<p class=a class=b>')
|
||||
el = doc[1][0]
|
||||
assert el.get("class") == "a"
|
||||
|
||||
|
||||
def test_maintain_duplicate_attribute_order():
|
||||
# This is here because we impl it in parser and not tokenizer
|
||||
p = HTMLParser()
|
||||
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||
token = {'name': 'html',
|
||||
'selfClosing': False,
|
||||
'selfClosingAcknowledged': False,
|
||||
'type': tokenTypes["StartTag"],
|
||||
'data': attrs + [('a', len(attrs))]}
|
||||
out = p.normalizeToken(token)
|
||||
attr_order = list(out["data"].keys())
|
||||
assert attr_order == [x for x, i in attrs]
|
||||
|
||||
|
||||
def test_debug_log():
|
||||
parser = HTMLParser(debug=True)
|
||||
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
|
||||
|
|
|
@ -1,31 +1,22 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
from html5lib import constants, parseFragment, serialize
|
||||
from html5lib.filters import sanitizer
|
||||
|
||||
|
||||
def runSanitizerTest(_, expected, input):
|
||||
parsed = parseFragment(expected)
|
||||
expected = serialize(parsed,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char='"',
|
||||
alphabetical_attributes=True)
|
||||
assert expected == sanitize_html(input)
|
||||
|
||||
|
||||
def sanitize_html(stream):
|
||||
parsed = parseFragment(stream)
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char='"',
|
||||
alphabetical_attributes=True)
|
||||
with pytest.deprecated_call():
|
||||
serialized = serialize(parsed,
|
||||
sanitize=True,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char='"',
|
||||
alphabetical_attributes=True)
|
||||
return serialized
|
||||
|
||||
|
||||
|
@ -59,7 +50,7 @@ def test_data_uri_disallowed_type():
|
|||
assert expected == sanitized
|
||||
|
||||
|
||||
def test_sanitizer():
|
||||
def param_sanitizer():
|
||||
for ns, tag_name in sanitizer.allowed_elements:
|
||||
if ns != constants.namespaces["html"]:
|
||||
continue
|
||||
|
@ -67,19 +58,19 @@ def test_sanitizer():
|
|||
'tfoot', 'th', 'thead', 'tr', 'select']:
|
||||
continue # TODO
|
||||
if tag_name == 'image':
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
yield ("test_should_allow_%s_tag" % tag_name,
|
||||
"<img title=\"1\"/>foo <bad>bar</bad> baz",
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||
elif tag_name == 'br':
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
yield ("test_should_allow_%s_tag" % tag_name,
|
||||
"<br title=\"1\"/>foo <bad>bar</bad> baz<br/>",
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||
elif tag_name in constants.voidElements:
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
yield ("test_should_allow_%s_tag" % tag_name,
|
||||
"<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||
else:
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
yield ("test_should_allow_%s_tag" % tag_name,
|
||||
"<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
|
||||
|
||||
|
@ -93,7 +84,7 @@ def test_sanitizer():
|
|||
attribute_value = 'foo'
|
||||
if attribute_name in sanitizer.attr_val_is_uri:
|
||||
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
|
||||
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
|
||||
yield ("test_should_allow_%s_attribute" % attribute_name,
|
||||
"<p %s=\"%s\">foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value),
|
||||
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
|
||||
|
||||
|
@ -101,7 +92,7 @@ def test_sanitizer():
|
|||
rest_of_uri = '//sub.domain.tld/path/object.ext'
|
||||
if protocol == 'data':
|
||||
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
||||
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
|
||||
yield ("test_should_allow_uppercase_%s_uris" % protocol,
|
||||
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
||||
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
||||
|
||||
|
@ -110,11 +101,26 @@ def test_sanitizer():
|
|||
if protocol == 'data':
|
||||
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
|
||||
protocol = protocol.upper()
|
||||
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
|
||||
yield ("test_should_allow_uppercase_%s_uris" % protocol,
|
||||
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
|
||||
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expected, input",
|
||||
(pytest.param(expected, input, id=id)
|
||||
for id, expected, input in param_sanitizer()))
|
||||
def test_sanitizer(expected, input):
|
||||
parsed = parseFragment(expected)
|
||||
expected = serialize(parsed,
|
||||
omit_optional_tags=False,
|
||||
use_trailing_solidus=True,
|
||||
space_before_trailing_solidus=False,
|
||||
quote_attr_values="always",
|
||||
quote_char='"',
|
||||
alphabetical_attributes=True)
|
||||
assert expected == sanitize_html(input)
|
||||
|
||||
|
||||
def test_lowercase_color_codes_in_style():
|
||||
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
|
||||
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
|
||||
|
|
|
@ -80,7 +80,7 @@ class JsonWalker(TreeWalker):
|
|||
|
||||
|
||||
def serialize_html(input, options):
|
||||
options = dict([(str(k), v) for k, v in options.items()])
|
||||
options = {str(k): v for k, v in options.items()}
|
||||
encoding = options.get("encoding", None)
|
||||
if "encoding" in options:
|
||||
del options["encoding"]
|
||||
|
@ -89,19 +89,6 @@ def serialize_html(input, options):
|
|||
return serializer.render(stream, encoding)
|
||||
|
||||
|
||||
def runSerializerTest(input, expected, options):
|
||||
encoding = options.get("encoding", None)
|
||||
|
||||
if encoding:
|
||||
expected = list(map(lambda x: x.encode(encoding), expected))
|
||||
|
||||
result = serialize_html(input, options)
|
||||
if len(expected) == 1:
|
||||
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
|
||||
elif result not in expected:
|
||||
assert False, "Expected: %s, Received: %s" % (expected, result)
|
||||
|
||||
|
||||
def throwsWithLatin1(input):
|
||||
with pytest.raises(UnicodeEncodeError):
|
||||
serialize_html(input, {"encoding": "iso-8859-1"})
|
||||
|
@ -120,13 +107,13 @@ def testDoctypeSystemId():
|
|||
|
||||
|
||||
def testCdataCharacters():
|
||||
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
|
||||
["<style>ā"], {"encoding": "iso-8859-1"})
|
||||
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
|
||||
["<style>ā"], {"encoding": "iso-8859-1"})
|
||||
|
||||
|
||||
def testCharacters():
|
||||
runSerializerTest([["Characters", "\u0101"]],
|
||||
["ā"], {"encoding": "iso-8859-1"})
|
||||
test_serializer([["Characters", "\u0101"]],
|
||||
["ā"], {"encoding": "iso-8859-1"})
|
||||
|
||||
|
||||
def testStartTagName():
|
||||
|
@ -138,9 +125,9 @@ def testAttributeName():
|
|||
|
||||
|
||||
def testAttributeValue():
|
||||
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
|
||||
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
|
||||
["<span potato=ā>"], {"encoding": "iso-8859-1"})
|
||||
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
|
||||
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
|
||||
["<span potato=ā>"], {"encoding": "iso-8859-1"})
|
||||
|
||||
|
||||
def testEndTagName():
|
||||
|
@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
|
|||
else:
|
||||
output_ = ['<span foo="%s">' % c]
|
||||
options_ = {"quote_attr_values": "spec"}
|
||||
runSerializerTest(input_, output_, options_)
|
||||
test_serializer(input_, output_, options_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
|
||||
|
@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
|
|||
else:
|
||||
output_ = ['<span foo="%s">' % c]
|
||||
options_ = {"quote_attr_values": "legacy"}
|
||||
runSerializerTest(input_, output_, options_)
|
||||
test_serializer(input_, output_, options_)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
|
|||
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>'
|
||||
|
||||
|
||||
def test_serializer():
|
||||
def param_serializer():
|
||||
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
|
||||
with open(filename) as fp:
|
||||
tests = json.load(fp)
|
||||
for test in tests['tests']:
|
||||
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
|
||||
yield test["input"], test["expected"], test.get("options", {})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input, expected, options", param_serializer())
|
||||
def test_serializer(input, expected, options):
|
||||
encoding = options.get("encoding", None)
|
||||
|
||||
if encoding:
|
||||
expected = list(map(lambda x: x.encode(encoding), expected))
|
||||
|
||||
result = serialize_html(input, options)
|
||||
if len(expected) == 1:
|
||||
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
|
||||
elif result not in expected:
|
||||
assert False, "Expected: %s, Received: %s" % (expected, result)
|
||||
|
|
|
@ -308,9 +308,11 @@ def test_invalid_codepoints(inp, num):
|
|||
("'\\uD800\\uD800\\uD800'", 3),
|
||||
("'a\\uD800a\\uD800a\\uD800a'", 3),
|
||||
("'\\uDFFF\\uDBFF'", 2),
|
||||
pytest.mark.skipif(sys.maxunicode == 0xFFFF,
|
||||
("'\\uDBFF\\uDFFF'", 2),
|
||||
reason="narrow Python")])
|
||||
pytest.param(
|
||||
"'\\uDBFF\\uDFFF'", 2,
|
||||
marks=pytest.mark.skipif(
|
||||
sys.maxunicode == 0xFFFF,
|
||||
reason="narrow Python"))])
|
||||
def test_invalid_codepoints_surrogates(inp, num):
|
||||
inp = eval(inp) # pylint:disable=eval-used
|
||||
fp = StringIO(inp)
|
||||
|
|
66
libs/html5lib/tests/test_tokenizer2.py
Normal file
66
libs/html5lib/tests/test_tokenizer2.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import io
|
||||
|
||||
from six import unichr, text_type
|
||||
|
||||
from html5lib._tokenizer import HTMLTokenizer
|
||||
from html5lib.constants import tokenTypes
|
||||
|
||||
|
||||
def ignore_parse_errors(toks):
|
||||
for tok in toks:
|
||||
if tok['type'] != tokenTypes['ParseError']:
|
||||
yield tok
|
||||
|
||||
|
||||
def test_maintain_attribute_order():
|
||||
# generate loads to maximize the chance a hash-based mutation will occur
|
||||
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
|
||||
|
||||
toks = HTMLTokenizer(stream)
|
||||
out = list(ignore_parse_errors(toks))
|
||||
|
||||
assert len(out) == 1
|
||||
assert out[0]['type'] == tokenTypes['StartTag']
|
||||
|
||||
attrs_tok = out[0]['data']
|
||||
assert len(attrs_tok) == len(attrs)
|
||||
|
||||
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
|
||||
assert in_name == out_name
|
||||
assert in_value == out_value
|
||||
|
||||
|
||||
def test_duplicate_attribute():
|
||||
stream = io.StringIO("<span a=1 a=2 a=3>")
|
||||
|
||||
toks = HTMLTokenizer(stream)
|
||||
out = list(ignore_parse_errors(toks))
|
||||
|
||||
assert len(out) == 1
|
||||
assert out[0]['type'] == tokenTypes['StartTag']
|
||||
|
||||
attrs_tok = out[0]['data']
|
||||
assert len(attrs_tok) == 1
|
||||
assert list(attrs_tok.items()) == [('a', '1')]
|
||||
|
||||
|
||||
def test_maintain_duplicate_attribute_order():
|
||||
# generate loads to maximize the chance a hash-based mutation will occur
|
||||
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
|
||||
|
||||
toks = HTMLTokenizer(stream)
|
||||
out = list(ignore_parse_errors(toks))
|
||||
|
||||
assert len(out) == 1
|
||||
assert out[0]['type'] == tokenTypes['StartTag']
|
||||
|
||||
attrs_tok = out[0]['data']
|
||||
assert len(attrs_tok) == len(attrs)
|
||||
|
||||
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
|
||||
assert in_name == out_name
|
||||
assert in_value == out_value
|
|
@ -1,7 +1,9 @@
|
|||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import itertools
|
||||
import sys
|
||||
|
||||
from six import unichr, text_type
|
||||
import pytest
|
||||
|
||||
try:
|
||||
|
@ -61,24 +63,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
|
|||
setter['ElementTree'](docfrag)(name, value)
|
||||
|
||||
|
||||
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
|
||||
"""tests what happens when we add attributes to the intext"""
|
||||
treeName, treeClass = tree
|
||||
if treeClass is None:
|
||||
pytest.skip("Treebuilder not loaded")
|
||||
parser = html5parser.HTMLParser(tree=treeClass["builder"])
|
||||
document = parser.parseFragment(intext)
|
||||
for nom, val in attrs_to_add:
|
||||
set_attribute_on_first_child(document, nom, val, treeName)
|
||||
|
||||
document = treeClass.get("adapter", lambda x: x)(document)
|
||||
output = treewalkers.pprint(treeClass["walker"](document))
|
||||
output = attrlist.sub(sortattrs, output)
|
||||
if output not in expected:
|
||||
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
|
||||
|
||||
|
||||
def test_treewalker_six_mix():
|
||||
def param_treewalker_six_mix():
|
||||
"""Str/Unicode mix. If str attrs added to tree"""
|
||||
|
||||
# On Python 2.x string literals are of type str. Unless, like this
|
||||
|
@ -99,7 +84,25 @@ def test_treewalker_six_mix():
|
|||
|
||||
for tree in sorted(treeTypes.items()):
|
||||
for intext, attrs, expected in sm_tests:
|
||||
yield runTreewalkerEditTest, intext, expected, attrs, tree
|
||||
yield intext, expected, attrs, tree
|
||||
|
||||
|
||||
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
|
||||
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
|
||||
"""tests what happens when we add attributes to the intext"""
|
||||
treeName, treeClass = tree
|
||||
if treeClass is None:
|
||||
pytest.skip("Treebuilder not loaded")
|
||||
parser = html5parser.HTMLParser(tree=treeClass["builder"])
|
||||
document = parser.parseFragment(intext)
|
||||
for nom, val in attrs_to_add:
|
||||
set_attribute_on_first_child(document, nom, val, treeName)
|
||||
|
||||
document = treeClass.get("adapter", lambda x: x)(document)
|
||||
output = treewalkers.pprint(treeClass["walker"](document))
|
||||
output = attrlist.sub(sortattrs, output)
|
||||
if output not in expected:
|
||||
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
|
||||
|
@ -134,3 +137,69 @@ def test_lxml_xml():
|
|||
output = Lint(walker(lxmltree))
|
||||
|
||||
assert list(output) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("treeName",
|
||||
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
|
||||
pytest.mark.skipif(
|
||||
treeName != "lxml" or
|
||||
sys.version_info < (3, 7), reason="dict order undef")])
|
||||
for treeName in sorted(treeTypes.keys())])
|
||||
def test_maintain_attribute_order(treeName):
|
||||
treeAPIs = treeTypes[treeName]
|
||||
if treeAPIs is None:
|
||||
pytest.skip("Treebuilder not loaded")
|
||||
|
||||
# generate loads to maximize the chance a hash-based mutation will occur
|
||||
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
|
||||
data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
|
||||
|
||||
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
|
||||
document = parser.parseFragment(data)
|
||||
|
||||
document = treeAPIs.get("adapter", lambda x: x)(document)
|
||||
output = list(Lint(treeAPIs["walker"](document)))
|
||||
|
||||
assert len(output) == 2
|
||||
assert output[0]['type'] == 'StartTag'
|
||||
assert output[1]['type'] == "EndTag"
|
||||
|
||||
attrs_out = output[0]['data']
|
||||
assert len(attrs) == len(attrs_out)
|
||||
|
||||
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()):
|
||||
assert (None, in_name) == out_name
|
||||
assert in_value == out_value
|
||||
|
||||
|
||||
@pytest.mark.parametrize("treeName",
|
||||
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
|
||||
pytest.mark.skipif(
|
||||
treeName != "lxml" or
|
||||
sys.version_info < (3, 7), reason="dict order undef")])
|
||||
for treeName in sorted(treeTypes.keys())])
|
||||
def test_maintain_attribute_order_adjusted(treeName):
|
||||
treeAPIs = treeTypes[treeName]
|
||||
if treeAPIs is None:
|
||||
pytest.skip("Treebuilder not loaded")
|
||||
|
||||
# generate loads to maximize the chance a hash-based mutation will occur
|
||||
data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"
|
||||
|
||||
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
|
||||
document = parser.parseFragment(data)
|
||||
|
||||
document = treeAPIs.get("adapter", lambda x: x)(document)
|
||||
output = list(Lint(treeAPIs["walker"](document)))
|
||||
|
||||
assert len(output) == 2
|
||||
assert output[0]['type'] == 'StartTag'
|
||||
assert output[1]['type'] == "EndTag"
|
||||
|
||||
attrs_out = output[0]['data']
|
||||
|
||||
assert list(attrs_out.items()) == [((None, 'a'), '1'),
|
||||
((None, 'refX'), '2'),
|
||||
((None, 'b'), '3'),
|
||||
(('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
|
||||
((None, 'c'), '5')]
|
||||
|
|
34
libs/html5lib/tests/testdata/AUTHORS.rst
vendored
34
libs/html5lib/tests/testdata/AUTHORS.rst
vendored
|
@ -1,34 +0,0 @@
|
|||
Credits
|
||||
=======
|
||||
|
||||
The ``html5lib`` test data is maintained by:
|
||||
|
||||
- James Graham
|
||||
- Geoffrey Sneddon
|
||||
|
||||
|
||||
Contributors
|
||||
------------
|
||||
|
||||
- Adam Barth
|
||||
- Andi Sidwell
|
||||
- Anne van Kesteren
|
||||
- David Flanagan
|
||||
- Edward Z. Yang
|
||||
- Geoffrey Sneddon
|
||||
- Henri Sivonen
|
||||
- Ian Hickson
|
||||
- Jacques Distler
|
||||
- James Graham
|
||||
- Lachlan Hunt
|
||||
- lantis63
|
||||
- Mark Pilgrim
|
||||
- Mats Palmgren
|
||||
- Ms2ger
|
||||
- Nolan Waite
|
||||
- Philip Taylor
|
||||
- Rafael Weinstein
|
||||
- Ryan King
|
||||
- Sam Ruby
|
||||
- Simon Pieters
|
||||
- Thomas Broyer
|
21
libs/html5lib/tests/testdata/LICENSE
vendored
21
libs/html5lib/tests/testdata/LICENSE
vendored
|
@ -1,21 +0,0 @@
|
|||
Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and
|
||||
other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -1,51 +0,0 @@
|
|||
老子《道德經》 第一~四十章
|
||||
|
||||
老子道經
|
||||
|
||||
第一章
|
||||
|
||||
道可道,非常道。名可名,非常名。無,名天地之始﹔有,名萬物之母。
|
||||
故常無,欲以觀其妙;常有,欲以觀其徼。此兩者,同出而異名,同謂之
|
||||
玄。玄之又玄,眾妙之門。
|
||||
|
||||
第二章
|
||||
|
||||
天下皆知美之為美,斯惡矣﹔皆知善之為善,斯不善矣。故有無相生,難
|
||||
易相成,長短相形,高下相傾,音聲相和,前後相隨。是以聖人處「無為
|
||||
」之事,行「不言」之教。萬物作焉而不辭,生而不有,為而不恃,功成
|
||||
而弗居。夫唯弗居,是以不去。
|
||||
|
||||
第三章
|
||||
|
||||
不尚賢,使民不爭﹔不貴難得之貨,使民不為盜﹔不見可欲,使民心不亂
|
||||
。是以「聖人」之治,虛其心,實其腹,弱其志,強其骨。常使民無知無
|
||||
欲。使夫智者不敢為也。為「無為」,則無不治。
|
||||
|
||||
第四章
|
||||
|
||||
「道」沖,而用之或不盈。淵兮,似萬物之宗﹔挫其銳,解其紛,和其光
|
||||
,同其塵﹔湛兮似或存。吾不知誰之子?象帝之先。
|
||||
|
||||
第五章
|
||||
|
||||
天地不仁,以萬物為芻狗﹔聖人不仁,以百姓為芻狗。天地之間,其猶橐
|
||||
蘥乎?虛而不屈,動而愈出。多言數窮,不如守中。
|
||||
|
||||
第六章
|
||||
|
||||
谷神不死,是謂玄牝。玄牝之門,是謂天地根。綿綿若存,用之不勤。
|
||||
|
||||
第七章
|
||||
|
||||
天長地久。天地所以能長且久者,以其不自生,故能長久。是以聖人後其
|
||||
身而身先,外其身而身存。非以其無私邪?故能成其私。
|
||||
|
||||
第八章
|
||||
|
||||
上善若水。水善利萬物而不爭。處眾人之所惡,故幾於道。居善地,心善
|
||||
淵,與善仁,言善信,政善治,事善能,動善時。夫唯不爭,故無尤。
|
||||
|
||||
第九章
|
||||
|
||||
持而盈之,不如其已﹔揣而銳之,不可長保。金玉滿堂,莫之能守﹔富貴
|
||||
而驕,自遺其咎。功遂身退,天之道。
|
|
@ -1,10 +0,0 @@
|
|||
#data
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
|
||||
<!--京-->
|
||||
<title>Yahoo! JAPAN</title>
|
||||
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
|
||||
<style type="text/css" media="all">
|
||||
#encoding
|
||||
euc-jp
|
394
libs/html5lib/tests/testdata/encoding/tests1.dat
vendored
394
libs/html5lib/tests/testdata/encoding/tests1.dat
vendored
File diff suppressed because one or more lines are too long
115
libs/html5lib/tests/testdata/encoding/tests2.dat
vendored
115
libs/html5lib/tests/testdata/encoding/tests2.dat
vendored
|
@ -1,115 +0,0 @@
|
|||
#data
|
||||
<meta
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<!
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset = "
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset=euc-jp
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta <meta charset='euc-jp'>
|
||||
#encoding
|
||||
euc-jp
|
||||
|
||||
#data
|
||||
<meta charset = 'euc-jp'>
|
||||
#encoding
|
||||
euc-jp
|
||||
|
||||
#data
|
||||
<!-- -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<!-- -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type<meta charset="utf-8">
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset =
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset= utf-8
|
||||
>
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<meta content = "text/html;
|
||||
#encoding
|
||||
windows-1252
|
||||
|
||||
#data
|
||||
<meta charset="UTF-16">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<meta charset="UTF-16LE">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<meta charset="UTF-16BE">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<html a=ñ>
|
||||
<meta charset="utf-8">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<html ñ>
|
||||
<meta charset="utf-8">
|
||||
#encoding
|
||||
utf-8
|
||||
|
||||
#data
|
||||
<html>ñ
|
||||
<meta charset="utf-8">
|
||||
#encoding
|
||||
utf-8
|
125
libs/html5lib/tests/testdata/serializer/core.test
vendored
125
libs/html5lib/tests/testdata/serializer/core.test
vendored
|
@ -1,125 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "proper attribute value escaping",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" ""}]]],
|
||||
"expected": ["<span title='test \"with\" &quot;'>"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value non-quoting",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
|
||||
"expected": ["<span title=foo>"],
|
||||
"xhtml": ["<span title=\"foo\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value non-quoting (with <)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
|
||||
"expected": ["<span title=foo<bar>"],
|
||||
"xhtml": ["<span title=\"foo<bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with =)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
|
||||
"expected": ["<span title=\"foo=bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with >)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
|
||||
"expected": ["<span title=\"foo>bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with \")",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
|
||||
"expected": ["<span title='foo\"bar'>"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with ')",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
|
||||
"expected": ["<span title=\"foo'bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with both \" and ')",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
|
||||
"expected": ["<span title=\"foo'bar"baz\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with space)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
|
||||
"expected": ["<span title=\"foo bar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with tab)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
|
||||
"expected": ["<span title=\"foo\tbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with LF)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
|
||||
"expected": ["<span title=\"foo\nbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with CR)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
|
||||
"expected": ["<span title=\"foo\rbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value non-quoting (with linetab)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
|
||||
"expected": ["<span title=foo\u000Bbar>"],
|
||||
"xhtml": ["<span title=\"foo\u000Bbar\">"]
|
||||
},
|
||||
|
||||
{"description": "proper attribute value quoting (with form feed)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
|
||||
"expected": ["<span title=\"foo\u000Cbar\">"]
|
||||
},
|
||||
|
||||
{"description": "void element (as EmptyTag token)",
|
||||
"input": [["EmptyTag", "img", {}]],
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "void element (as StartTag token)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
|
||||
"expected": ["<img>"],
|
||||
"xhtml": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "doctype in error",
|
||||
"input": [["Doctype", "foo"]],
|
||||
"expected": ["<!DOCTYPE foo>"]
|
||||
},
|
||||
|
||||
{"description": "character data",
|
||||
"options": {"encoding":"utf-8"},
|
||||
"input": [["Characters", "a<b>c&d"]],
|
||||
"expected": ["a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "rcdata",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"],
|
||||
"xhtml": ["<script>a<b>c&d"]
|
||||
},
|
||||
|
||||
{"description": "doctype",
|
||||
"input": [["Doctype", "HTML"]],
|
||||
"expected": ["<!DOCTYPE HTML>"]
|
||||
},
|
||||
|
||||
{"description": "HTML 4.01 DOCTYPE",
|
||||
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
|
||||
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
|
||||
},
|
||||
|
||||
{"description": "HTML 4.01 DOCTYPE without system identifer",
|
||||
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
|
||||
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
|
||||
},
|
||||
|
||||
{"description": "IBM DOCTYPE without public identifer",
|
||||
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
|
||||
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
|
||||
}
|
||||
|
||||
]}
|
|
@ -1,66 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "no encoding",
|
||||
"options": {"inject_meta_charset": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": [""],
|
||||
"xhtml": ["<head></head>"]
|
||||
},
|
||||
|
||||
{"description": "empytag head",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/title",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta charset=utf-8><title>foo</title>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ two meta-charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
|
||||
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/ charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
},
|
||||
|
||||
{"description": "head w/robots & charset in http-equiv content-type",
|
||||
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
|
||||
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
|
||||
}
|
||||
|
||||
]}
|
|
@ -1,965 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "html start-tag followed by text, with attributes",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
|
||||
"expected": ["<html lang=en>foo"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "html start-tag followed by comment",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
|
||||
"expected": ["<html><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by space character",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
|
||||
"expected": ["<html> foo"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by text",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by start-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag followed by end-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "html end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
|
||||
"expected": ["</html><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
|
||||
"expected": ["</html> foo"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "html end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "head start-tag followed by comment",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
|
||||
"expected": ["<head><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by space character",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
|
||||
"expected": ["<head> foo"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by text",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
|
||||
"expected": ["<head>foo"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by start-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["<head></foo>", "</foo>"]
|
||||
},
|
||||
|
||||
{"description": "empty head element",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
{"description": "head start-tag followed by empty-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
|
||||
"expected": ["<head>", ""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "head end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
|
||||
"expected": ["</head><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
|
||||
"expected": ["</head> foo"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "head end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "body start-tag followed by comment",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
|
||||
"expected": ["<body><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by space character",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
|
||||
"expected": ["<body> foo"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by text",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by start-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag followed by end-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "body end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
|
||||
"expected": ["</body><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
|
||||
"expected": ["</body> foo"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "body end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "li end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
|
||||
"expected": ["</li><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
|
||||
"expected": ["</li> foo"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
|
||||
"expected": ["</li>foo"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</li><foo>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by li start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
|
||||
"expected": ["<li>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "li end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "dt end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
|
||||
"expected": ["</dt><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
|
||||
"expected": ["</dt> foo"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
|
||||
"expected": ["</dt>foo"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</dt><foo>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by dt start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
|
||||
"expected": ["<dt>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by dd start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
|
||||
"expected": ["<dd>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</dt></foo>"]
|
||||
},
|
||||
|
||||
{"description": "dt end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
|
||||
"expected": ["</dt>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "dd end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
|
||||
"expected": ["</dd><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
|
||||
"expected": ["</dd> foo"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
|
||||
"expected": ["</dd>foo"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</dd><foo>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by dd start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
|
||||
"expected": ["<dd>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by dt start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
|
||||
"expected": ["<dt>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "dd end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "p end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
|
||||
"expected": ["</p><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
|
||||
"expected": ["</p> foo"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
|
||||
"expected": ["</p>foo"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</p><foo>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by address start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
|
||||
"expected": ["<address>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by article start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
|
||||
"expected": ["<article>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by aside start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
|
||||
"expected": ["<aside>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by blockquote start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
|
||||
"expected": ["<blockquote>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by datagrid start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
|
||||
"expected": ["<datagrid>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by dialog start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
|
||||
"expected": ["<dialog>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by dir start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
|
||||
"expected": ["<dir>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by div start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
|
||||
"expected": ["<div>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by dl start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
|
||||
"expected": ["<dl>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by fieldset start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
|
||||
"expected": ["<fieldset>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by footer start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
|
||||
"expected": ["<footer>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by form start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
|
||||
"expected": ["<form>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h1 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
|
||||
"expected": ["<h1>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h2 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
|
||||
"expected": ["<h2>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h3 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
|
||||
"expected": ["<h3>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h4 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
|
||||
"expected": ["<h4>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h5 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
|
||||
"expected": ["<h5>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by h6 start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
|
||||
"expected": ["<h6>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by header start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
|
||||
"expected": ["<header>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by hr empty-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
|
||||
"expected": ["<hr>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by menu start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
|
||||
"expected": ["<menu>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by nav start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
|
||||
"expected": ["<nav>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by ol start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
|
||||
"expected": ["<ol>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by p start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
|
||||
"expected": ["<p>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by pre start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
|
||||
"expected": ["<pre>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by section start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
|
||||
"expected": ["<section>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by table start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
|
||||
"expected": ["<table>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by ul start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
|
||||
"expected": ["<ul>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "p end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "optgroup end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
|
||||
"expected": ["</optgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
|
||||
"expected": ["</optgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
|
||||
"expected": ["</optgroup>foo"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</optgroup><foo>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by optgroup start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
|
||||
"expected": ["<optgroup>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "optgroup end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "option end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
|
||||
"expected": ["</option><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
|
||||
"expected": ["</option> foo"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
|
||||
"expected": ["</option>foo"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by optgroup start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
|
||||
"expected": ["<optgroup>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</option><foo>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by option start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
|
||||
"expected": ["<option>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "option end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "colgroup start-tag followed by comment",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
|
||||
"expected": ["<colgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by space character",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
|
||||
"expected": ["<colgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by text",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
|
||||
"expected": ["<colgroup>foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by start-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<colgroup><foo>"]
|
||||
},
|
||||
|
||||
{"description": "first colgroup in a table with a col child",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
|
||||
"expected": ["<table><col>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup with a col child, following another colgroup",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
|
||||
"expected": ["</colgroup><col>", "<colgroup><col>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag followed by end-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["<colgroup></foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup start-tag at EOF",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
|
||||
"expected": ["<colgroup>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "colgroup end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
|
||||
"expected": ["</colgroup><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
|
||||
"expected": ["</colgroup> foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
|
||||
"expected": ["foo"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "colgroup end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "thead end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
|
||||
"expected": ["</thead><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
|
||||
"expected": ["</thead> foo"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
|
||||
"expected": ["</thead>foo"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</thead><foo>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
||||
"expected": ["<tbody>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by tfoot start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
|
||||
"expected": ["<tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</thead></foo>"]
|
||||
},
|
||||
|
||||
{"description": "thead end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
|
||||
"expected": ["</thead>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tbody start-tag followed by comment",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
|
||||
"expected": ["<tbody><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by space character",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
|
||||
"expected": ["<tbody> foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by text",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
|
||||
"expected": ["<tbody>foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by start-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["<tbody><foo>"]
|
||||
},
|
||||
|
||||
{"description": "first tbody in a table with a tr child",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
||||
"expected": ["<table><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following another tbody",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</tbody><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following a thead",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</thead><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody with a tr child, following a tfoot",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
||||
"expected": ["<tbody><tr>", "</tfoot><tr>"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag followed by end-tag",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["<tbody></foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody start-tag at EOF",
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
||||
"expected": ["<tbody>"]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{"description": "tbody end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
|
||||
"expected": ["</tbody><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
|
||||
"expected": ["</tbody> foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
|
||||
"expected": ["</tbody>foo"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</tbody><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
||||
"expected": ["<tbody>", "</tbody>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by tfoot start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
|
||||
"expected": ["<tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tbody end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tfoot end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
|
||||
"expected": ["</tfoot><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
|
||||
"expected": ["</tfoot> foo"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
|
||||
"expected": ["</tfoot>foo"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</tfoot><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by tbody start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
|
||||
"expected": ["<tbody>", "</tfoot>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tfoot end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "tr end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
|
||||
"expected": ["</tr><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
|
||||
"expected": ["</tr> foo"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
|
||||
"expected": ["</tr>foo"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</tr><foo>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by tr start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
|
||||
"expected": ["<tr>", "</tr>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "tr end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "td end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
|
||||
"expected": ["</td><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
|
||||
"expected": ["</td> foo"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
|
||||
"expected": ["</td>foo"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</td><foo>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by td start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
|
||||
"expected": ["<td>", "</td>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by th start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
|
||||
"expected": ["<th>", "</td>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "td end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
|
||||
"expected": [""]
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{"description": "th end-tag followed by comment",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
|
||||
"expected": ["</th><!--foo-->"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by space character",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
|
||||
"expected": ["</th> foo"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by text",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
|
||||
"expected": ["</th>foo"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
|
||||
"expected": ["</th><foo>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by th start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
|
||||
"expected": ["<th>", "</th>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by td start-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
|
||||
"expected": ["<td>", "</th>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag followed by end-tag",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
|
||||
"expected": ["</foo>"]
|
||||
},
|
||||
|
||||
{"description": "th end-tag at EOF",
|
||||
"input": [["EndTag", "http://www.w3.org/1999/xhtml" , "th"]],
|
||||
"expected": [""]
|
||||
}
|
||||
|
||||
]}
|
|
@ -1,60 +0,0 @@
|
|||
{"tests":[
|
||||
|
||||
{"description": "quote_char=\"'\"",
|
||||
"options": {"quote_char": "'"},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
|
||||
"expected": ["<span title='test 'with' quote_char'>"]
|
||||
},
|
||||
|
||||
{"description": "quote_attr_values=true",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
|
||||
"expected": ["<button disabled>"],
|
||||
"xhtml": ["<button disabled=\"disabled\">"]
|
||||
},
|
||||
|
||||
{"description": "quote_attr_values=true with irrelevant",
|
||||
"options": {"quote_attr_values": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
||||
"expected": ["<div irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "use_trailing_solidus=true with void element",
|
||||
"options": {"use_trailing_solidus": true},
|
||||
"input": [["EmptyTag", "img", {}]],
|
||||
"expected": ["<img />"]
|
||||
},
|
||||
|
||||
{"description": "use_trailing_solidus=true with non-void element",
|
||||
"options": {"use_trailing_solidus": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
|
||||
"expected": ["<div>"]
|
||||
},
|
||||
|
||||
{"description": "minimize_boolean_attributes=false",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
|
||||
"expected": ["<div irrelevant=irrelevant>"],
|
||||
"xhtml": ["<div irrelevant=\"irrelevant\">"]
|
||||
},
|
||||
|
||||
{"description": "minimize_boolean_attributes=false with empty value",
|
||||
"options": {"minimize_boolean_attributes": false},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
|
||||
"expected": ["<div irrelevant=\"\">"]
|
||||
},
|
||||
|
||||
{"description": "escape less than signs in attribute values",
|
||||
"options": {"escape_lt_in_attrs": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
|
||||
"expected": ["<a title=\"a<b>c&d\">"]
|
||||
},
|
||||
|
||||
{"description": "rcdata",
|
||||
"options": {"escape_rcdata": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
|
||||
"expected": ["<script>a<b>c&d"]
|
||||
}
|
||||
|
||||
]}
|
|
@ -1,51 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "bare text with leading spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "\t\r\n\u000C foo"]],
|
||||
"expected": [" foo"]
|
||||
},
|
||||
|
||||
{"description": "bare text with trailing spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "foo \t\r\n\u000C"]],
|
||||
"expected": ["foo "]
|
||||
},
|
||||
|
||||
{"description": "bare text with inner spaces",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["Characters", "foo \t\r\n\u000C bar"]],
|
||||
"expected": ["foo bar"]
|
||||
},
|
||||
|
||||
{"description": "text within <pre>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
||||
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
|
||||
},
|
||||
|
||||
{"description": "text within <pre>, with inner markup",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
|
||||
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
|
||||
},
|
||||
|
||||
{"description": "text within <textarea>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
|
||||
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
|
||||
},
|
||||
|
||||
{"description": "text within <script>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
|
||||
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
|
||||
},
|
||||
|
||||
{"description": "text within <style>",
|
||||
"options": {"strip_whitespace": true},
|
||||
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
|
||||
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
|
||||
}
|
||||
|
||||
]}
|
104
libs/html5lib/tests/testdata/tokenizer/README.md
vendored
104
libs/html5lib/tests/testdata/tokenizer/README.md
vendored
|
@ -1,104 +0,0 @@
|
|||
Tokenizer tests
|
||||
===============
|
||||
|
||||
The test format is [JSON](http://www.json.org/). This has the advantage
|
||||
that the syntax allows backward-compatible extensions to the tests and
|
||||
the disadvantage that it is relatively verbose.
|
||||
|
||||
Basic Structure
|
||||
---------------
|
||||
|
||||
{"tests": [
|
||||
{"description": "Test description",
|
||||
"input": "input_string",
|
||||
"output": [expected_output_tokens],
|
||||
"initialStates": [initial_states],
|
||||
"lastStartTag": last_start_tag,
|
||||
"ignoreErrorOrder": ignore_error_order
|
||||
}
|
||||
]}
|
||||
|
||||
Multiple tests per file are allowed simply by adding more objects to the
|
||||
"tests" list.
|
||||
|
||||
`description`, `input` and `output` are always present. The other values
|
||||
are optional.
|
||||
|
||||
### Test set-up
|
||||
|
||||
`test.input` is a string containing the characters to pass to the
|
||||
tokenizer. Specifically, it represents the characters of the **input
|
||||
stream**, and so implementations are expected to perform the processing
|
||||
described in the spec's **Preprocessing the input stream** section
|
||||
before feeding the result to the tokenizer.
|
||||
|
||||
If `test.doubleEscaped` is present and `true`, then `test.input` is not
|
||||
quite as described above. Instead, it must first be subjected to another
|
||||
round of unescaping (i.e., in addition to any unescaping involved in the
|
||||
JSON import), and the result of *that* represents the characters of the
|
||||
input stream. Currently, the only unescaping required by this option is
|
||||
to convert each sequence of the form \\uHHHH (where H is a hex digit)
|
||||
into the corresponding Unicode code point. (Note that this option also
|
||||
affects the interpretation of `test.output`.)
|
||||
|
||||
`test.initialStates` is a list of strings, each being the name of a
|
||||
tokenizer state. The test should be run once for each string, using it
|
||||
to set the tokenizer's initial state for that run. If
|
||||
`test.initialStates` is omitted, it defaults to `["data state"]`.
|
||||
|
||||
`test.lastStartTag` is a lowercase string that should be used as "the
|
||||
tag name of the last start tag to have been emitted from this
|
||||
tokenizer", referenced in the spec's definition of **appropriate end tag
|
||||
token**. If it is omitted, it is treated as if "no start tag has been
|
||||
emitted from this tokenizer".
|
||||
|
||||
### Test results
|
||||
|
||||
`test.output` is a list of tokens, ordered with the first produced by
|
||||
the tokenizer the first (leftmost) in the list. The list must mach the
|
||||
**complete** list of tokens that the tokenizer should produce. Valid
|
||||
tokens are:
|
||||
|
||||
["DOCTYPE", name, public_id, system_id, correctness]
|
||||
["StartTag", name, {attributes}*, true*]
|
||||
["StartTag", name, {attributes}]
|
||||
["EndTag", name]
|
||||
["Comment", data]
|
||||
["Character", data]
|
||||
"ParseError"
|
||||
|
||||
`public_id` and `system_id` are either strings or `null`. `correctness`
|
||||
is either `true` or `false`; `true` corresponds to the force-quirks flag
|
||||
being false, and vice-versa.
|
||||
|
||||
When the self-closing flag is set, the `StartTag` array has `true` as
|
||||
its fourth entry. When the flag is not set, the array has only three
|
||||
entries for backwards compatibility.
|
||||
|
||||
All adjacent character tokens are coalesced into a single
|
||||
`["Character", data]` token.
|
||||
|
||||
If `test.doubleEscaped` is present and `true`, then every string within
|
||||
`test.output` must be further unescaped (as described above) before
|
||||
comparing with the tokenizer's output.
|
||||
|
||||
`test.ignoreErrorOrder` is a boolean value indicating that the order of
|
||||
`ParseError` tokens relative to other tokens in the output stream is
|
||||
unimportant, and implementations should ignore such differences between
|
||||
their output and `expected_output_tokens`. (This is used for errors
|
||||
emitted by the input stream preprocessing stage, since it is useful to
|
||||
test that code but it is undefined when the errors occur). If it is
|
||||
omitted, it defaults to `false`.
|
||||
|
||||
xmlViolation tests
|
||||
------------------
|
||||
|
||||
`tokenizer/xmlViolation.test` differs from the above in a couple of
|
||||
ways:
|
||||
|
||||
- The name of the single member of the top-level JSON object is
|
||||
"xmlViolationTests" instead of "tests".
|
||||
- Each test's expected output assumes that implementation is applying
|
||||
the tweaks given in the spec's "Coercing an HTML DOM into an
|
||||
infoset" section.
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"PLAINTEXT content model flag",
|
||||
"initialStates":["PLAINTEXT state"],
|
||||
"lastStartTag":"plaintext",
|
||||
"input":"<head>&body;",
|
||||
"output":[["Character", "<head>&body;"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xMp>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp ",
|
||||
"output":[["Character", "foo"], "ParseError"]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp",
|
||||
"output":[["Character", "foo</xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp/",
|
||||
"output":[["Character", "foo"], "ParseError"]},
|
||||
|
||||
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp<",
|
||||
"output":[["Character", "foo</xmp<"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</foo>bar</xmp>",
|
||||
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Partial end tags leading straight into partial end tags",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xmp</xmp</xmp>",
|
||||
"output":[["Character", "</xmp</xmp"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</foo>bar</xmpaar>",
|
||||
"output":[["Character", "</foo>bar</xmpaar>"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp></baz>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"RAWTEXT w/ something looking like an entity",
|
||||
"initialStates":["RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"&foo;",
|
||||
"output":[["Character", "&foo;"]]},
|
||||
|
||||
{"description":"RCDATA w/ an entity",
|
||||
"initialStates":["RCDATA state"],
|
||||
"lastStartTag":"textarea",
|
||||
"input":"<",
|
||||
"output":[["Character", "<"]]}
|
||||
|
||||
]}
|
|
@ -1,96 +0,0 @@
|
|||
{
|
||||
"tests": [
|
||||
{
|
||||
"description":"CR in bogus comment state",
|
||||
"input":"<?\u000d",
|
||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
||||
},
|
||||
{
|
||||
"description":"CRLF in bogus comment state",
|
||||
"input":"<?\u000d\u000a",
|
||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
||||
},
|
||||
{
|
||||
"description":"CRLFLF in bogus comment state",
|
||||
"input":"<?\u000d\u000a\u000a",
|
||||
"output":["ParseError", ["Comment", "?\u000a\u000a"]]
|
||||
},
|
||||
{
|
||||
"description":"NUL in RCDATA and RAWTEXT",
|
||||
"doubleEscaped":true,
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"input":"\\u0000",
|
||||
"output":["ParseError", ["Character", "\\uFFFD"]]
|
||||
},
|
||||
{
|
||||
"description":"leading U+FEFF must pass through",
|
||||
"doubleEscaped":true,
|
||||
"input":"\\uFEFFfoo\\uFEFFbar",
|
||||
"output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
|
||||
},
|
||||
{
|
||||
"description":"Non BMP-charref in in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"input":"≂̸",
|
||||
"output":[["Character", "\u2242\u0338"]]
|
||||
},
|
||||
{
|
||||
"description":"Bad charref in in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"input":"&NotEqualTild;",
|
||||
"output":["ParseError", ["Character", "&NotEqualTild;"]]
|
||||
},
|
||||
{
|
||||
"description":"lowercase endtags in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</XMP>",
|
||||
"output":[["EndTag","xmp"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</ XMP>",
|
||||
"output":[["Character","</ XMP>"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm>",
|
||||
"output":[["Character","</xm>"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm ",
|
||||
"output":[["Character","</xm "]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm/",
|
||||
"output":[["Character","</xm/"]]
|
||||
},
|
||||
{
|
||||
"description":"Non BMP-charref in attribute",
|
||||
"input":"<p id=\"≂̸\">",
|
||||
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
|
||||
},
|
||||
{
|
||||
"description":"--!NUL in comment ",
|
||||
"doubleEscaped":true,
|
||||
"input":"<!----!\\u0000-->",
|
||||
"output":["ParseError", "ParseError", ["Comment", "--!\\uFFFD"]]
|
||||
},
|
||||
{
|
||||
"description":"space EOF after doctype ",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null , false]]
|
||||
}
|
||||
|
||||
]
|
||||
}
|
283
libs/html5lib/tests/testdata/tokenizer/entities.test
vendored
283
libs/html5lib/tests/testdata/tokenizer/entities.test
vendored
|
@ -1,283 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
|
||||
"input":"<h a='¬i;'>",
|
||||
"output": [["StartTag", "h", {"a": "¬i;"}]]},
|
||||
|
||||
{"description": "Entity name followed by the equals sign in an attribute value.",
|
||||
"input":"<h a='&lang='>",
|
||||
"output": [["StartTag", "h", {"a": "&lang="}]]},
|
||||
|
||||
{"description": "CR as numeric entity",
|
||||
"input":"
",
|
||||
"output": ["ParseError", ["Character", "\r"]]},
|
||||
|
||||
{"description": "CR as hexadecimal numeric entity",
|
||||
"input":"
",
|
||||
"output": ["ParseError", ["Character", "\r"]]},
|
||||
|
||||
{"description": "Windows-1252 EURO SIGN numeric entity.",
|
||||
"input":"€",
|
||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
|
||||
"input":"‚",
|
||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
|
||||
"input":"ƒ",
|
||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
|
||||
"input":"„",
|
||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
||||
|
||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
|
||||
"input":"…",
|
||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
||||
|
||||
{"description": "Windows-1252 DAGGER numeric entity.",
|
||||
"input":"†",
|
||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
|
||||
"input":"‡",
|
||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
||||
|
||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
|
||||
"input":"ˆ",
|
||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
||||
|
||||
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
|
||||
"input":"‰",
|
||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
|
||||
"input":"Š",
|
||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
||||
"input":"‹",
|
||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
|
||||
"input":"Œ",
|
||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
|
||||
"input":"Ž",
|
||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
|
||||
"input":"‘",
|
||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
|
||||
"input":"’",
|
||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
|
||||
"input":"“",
|
||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
|
||||
"input":"”",
|
||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
||||
|
||||
{"description": "Windows-1252 BULLET numeric entity.",
|
||||
"input":"•",
|
||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
||||
|
||||
{"description": "Windows-1252 EN DASH numeric entity.",
|
||||
"input":"–",
|
||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
||||
|
||||
{"description": "Windows-1252 EM DASH numeric entity.",
|
||||
"input":"—",
|
||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
||||
|
||||
{"description": "Windows-1252 SMALL TILDE numeric entity.",
|
||||
"input":"˜",
|
||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
||||
|
||||
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
|
||||
"input":"™",
|
||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
|
||||
"input":"š",
|
||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
||||
"input":"›",
|
||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
|
||||
"input":"œ",
|
||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
||||
|
||||
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
|
||||
"input":"€",
|
||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‚",
|
||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
|
||||
"input":"ƒ",
|
||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"„",
|
||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
||||
|
||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
|
||||
"input":"…",
|
||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
||||
|
||||
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
|
||||
"input":"†",
|
||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
|
||||
"input":"‡",
|
||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
||||
|
||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
|
||||
"input":"ˆ",
|
||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
||||
|
||||
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
|
||||
"input":"‰",
|
||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
|
||||
"input":"Š",
|
||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‹",
|
||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
|
||||
"input":"Œ",
|
||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
|
||||
"input":"Ž",
|
||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‘",
|
||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"’",
|
||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"“",
|
||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"”",
|
||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
||||
|
||||
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
|
||||
"input":"•",
|
||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
||||
|
||||
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
|
||||
"input":"–",
|
||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
||||
|
||||
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
|
||||
"input":"—",
|
||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
||||
|
||||
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
|
||||
"input":"˜",
|
||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
||||
|
||||
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
|
||||
"input":"™",
|
||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
|
||||
"input":"š",
|
||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"›",
|
||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
|
||||
"input":"œ",
|
||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
|
||||
"input":"ž",
|
||||
"output": ["ParseError", ["Character", "\u017E"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
|
||||
"input":"Ÿ",
|
||||
"output": ["ParseError", ["Character", "\u0178"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character a.",
|
||||
"input":"aa",
|
||||
"output": ["ParseError", ["Character", "aa"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character A.",
|
||||
"input":"aA",
|
||||
"output": ["ParseError", ["Character", "aA"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character f.",
|
||||
"input":"af",
|
||||
"output": ["ParseError", ["Character", "af"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character A.",
|
||||
"input":"aF",
|
||||
"output": ["ParseError", ["Character", "aF"]]}
|
||||
|
||||
]}
|
|
@ -1,33 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Commented close tag in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!--</xmp>--></xmp>",
|
||||
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Bogus comment in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!-->baz</xmp>",
|
||||
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!--></xmp><!-->baz</xmp>",
|
||||
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Commented entities in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":" & <!-- & --> & </xmp>",
|
||||
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
|
||||
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
|
||||
|
||||
]}
|
42210
libs/html5lib/tests/testdata/tokenizer/namedEntities.test
vendored
42210
libs/html5lib/tests/testdata/tokenizer/namedEntities.test
vendored
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,7 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"<!---- >",
|
||||
"input":"<!---- >",
|
||||
"output":["ParseError", "ParseError", ["Comment","-- >"]]}
|
||||
|
||||
]}
|
196
libs/html5lib/tests/testdata/tokenizer/test1.test
vendored
196
libs/html5lib/tests/testdata/tokenizer/test1.test
vendored
|
@ -1,196 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"Correct Doctype lowercase",
|
||||
"input":"<!DOCTYPE html>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype uppercase",
|
||||
"input":"<!DOCTYPE HTML>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype mixed case",
|
||||
"input":"<!DOCTYPE HtMl>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype case with EOF",
|
||||
"input":"<!DOCTYPE HtMl",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Truncated doctype start",
|
||||
"input":"<!DOC>",
|
||||
"output":["ParseError", ["Comment", "DOC"]]},
|
||||
|
||||
{"description":"Doctype in error",
|
||||
"input":"<!DOCTYPE foo>",
|
||||
"output":[["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"Single Start Tag",
|
||||
"input":"<h>",
|
||||
"output":[["StartTag", "h", {}]]},
|
||||
|
||||
{"description":"Empty end tag",
|
||||
"input":"</>",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Empty start tag",
|
||||
"input":"<>",
|
||||
"output":["ParseError", ["Character", "<>"]]},
|
||||
|
||||
{"description":"Start Tag w/attribute",
|
||||
"input":"<h a='b'>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start Tag w/attribute no quotes",
|
||||
"input":"<h a=b>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start/End Tag",
|
||||
"input":"<h></h>",
|
||||
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Two unclosed start tags",
|
||||
"input":"<p>One<p>Two",
|
||||
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
||||
|
||||
{"description":"End Tag w/attribute",
|
||||
"input":"<h></h a='b'>",
|
||||
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Multiple atts",
|
||||
"input":"<h a='b' c='d'>",
|
||||
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Multiple atts no space",
|
||||
"input":"<h a='b'c='d'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Repeated attr",
|
||||
"input":"<h a='b' a='d'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Simple comment",
|
||||
"input":"<!--comment-->",
|
||||
"output":[["Comment", "comment"]]},
|
||||
|
||||
{"description":"Comment, Central dash no space",
|
||||
"input":"<!----->",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Comment, two central dashes",
|
||||
"input":"<!-- --comment -->",
|
||||
"output":["ParseError", ["Comment", " --comment "]]},
|
||||
|
||||
{"description":"Unfinished comment",
|
||||
"input":"<!--comment",
|
||||
"output":["ParseError", ["Comment", "comment"]]},
|
||||
|
||||
{"description":"Start of a comment",
|
||||
"input":"<!-",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Short comment",
|
||||
"input":"<!-->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment two",
|
||||
"input":"<!--->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment three",
|
||||
"input":"<!---->",
|
||||
"output":[["Comment", ""]]},
|
||||
|
||||
|
||||
{"description":"Ampersand EOF",
|
||||
"input":"&",
|
||||
"output":[["Character", "&"]]},
|
||||
|
||||
{"description":"Ampersand ampersand EOF",
|
||||
"input":"&&",
|
||||
"output":[["Character", "&&"]]},
|
||||
|
||||
{"description":"Ampersand space EOF",
|
||||
"input":"& ",
|
||||
"output":[["Character", "& "]]},
|
||||
|
||||
{"description":"Unfinished entity",
|
||||
"input":"&f",
|
||||
"output":[["Character", "&f"]]},
|
||||
|
||||
{"description":"Ampersand, number sign",
|
||||
"input":"&#",
|
||||
"output":["ParseError", ["Character", "&#"]]},
|
||||
|
||||
{"description":"Unfinished numeric entity",
|
||||
"input":"&#x",
|
||||
"output":["ParseError", ["Character", "&#x"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm \u00ACit"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (2)",
|
||||
"input":"I'm ∉",
|
||||
"output":[["Character","I'm \u2209"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (2)",
|
||||
"input":"I'm ¬in",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
|
||||
|
||||
{"description":"Partial entity match at end of file",
|
||||
"input":"I'm &no",
|
||||
"output":[["Character","I'm &no"]]},
|
||||
|
||||
{"description":"Non-ASCII character reference name",
|
||||
"input":"&\u00AC;",
|
||||
"output":[["Character", "&\u00AC;"]]},
|
||||
|
||||
{"description":"ASCII decimal entity",
|
||||
"input":"$",
|
||||
"output":[["Character","$"]]},
|
||||
|
||||
{"description":"ASCII hexadecimal entity",
|
||||
"input":"?",
|
||||
"output":[["Character","?"]]},
|
||||
|
||||
{"description":"Hexadecimal entity in attribute",
|
||||
"input":"<h a='?'></h>",
|
||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in x",
|
||||
"input":"<h a='¬x'>",
|
||||
"output":[["StartTag", "h", {"a":"¬x"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in 1",
|
||||
"input":"<h a='¬1'>",
|
||||
"output":[["StartTag", "h", {"a":"¬1"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in i",
|
||||
"input":"<h a='¬i'>",
|
||||
"output":[["StartTag", "h", {"a":"¬i"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon",
|
||||
"input":"<h a='©'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
|
||||
|
||||
{"description":"Unquoted attribute ending in ampersand",
|
||||
"input":"<s o=& t>",
|
||||
"output":[["StartTag","s",{"o":"&","t":""}]]},
|
||||
|
||||
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
|
||||
"input":"<a a=a&>foo",
|
||||
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
|
||||
|
||||
{"description":"plaintext element",
|
||||
"input":"<plaintext>foobar",
|
||||
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
|
||||
|
||||
{"description":"Open angled bracket in unquoted attribute value state",
|
||||
"input":"<a a=f<>",
|
||||
"output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
|
||||
|
||||
]}
|
179
libs/html5lib/tests/testdata/tokenizer/test2.test
vendored
179
libs/html5lib/tests/testdata/tokenizer/test2.test
vendored
|
@ -1,179 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"DOCTYPE without name",
|
||||
"input":"<!DOCTYPE>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE without space before name",
|
||||
"input":"<!DOCTYPEhtml>",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Incorrect DOCTYPE without a space before name",
|
||||
"input":"<!DOCTYPEfoo>",
|
||||
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC",
|
||||
"input":"<!DOCTYPE html PUBLIC",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC '",
|
||||
"input":"<!DOCTYPE html PUBLIC '",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
||||
"input":"<!DOCTYPE html PUBLIC 'x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with systemId",
|
||||
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId and systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with > in double-quoted publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \">x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in single-quoted publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC '>x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in double-quoted systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in single-quoted systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
||||
|
||||
{"description":"Incomplete doctype",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Numeric entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
||||
"input":"��",
|
||||
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
||||
"input":"ꯍ",
|
||||
"output":[["Character", "\uABCD"]]},
|
||||
|
||||
{"description":"Entity without a name",
|
||||
"input":"&;",
|
||||
"output":[["Character", "&;"]]},
|
||||
|
||||
{"description":"Unescaped ampersand in attribute value",
|
||||
"input":"<h a='&'>",
|
||||
"output":[["StartTag", "h", { "a":"&" }]]},
|
||||
|
||||
{"description":"StartTag containing <",
|
||||
"input":"<a<b>",
|
||||
"output":[["StartTag", "a<b", { }]]},
|
||||
|
||||
{"description":"Non-void element containing trailing /",
|
||||
"input":"<h/>",
|
||||
"output":[["StartTag","h",{},true]]},
|
||||
|
||||
{"description":"Void element with permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag","br",{},true]]},
|
||||
|
||||
{"description":"Void element with permitted slash (with attribute)",
|
||||
"input":"<br foo='bar'/>",
|
||||
"output":[["StartTag","br",{"foo":"bar"},true]]},
|
||||
|
||||
{"description":"StartTag containing /",
|
||||
"input":"<h/a='b'>",
|
||||
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Double-quoted attribute value",
|
||||
"input":"<h a=\"b\">",
|
||||
"output":[["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Unescaped </",
|
||||
"input":"</",
|
||||
"output":["ParseError", ["Character", "</"]]},
|
||||
|
||||
{"description":"Illegal end tag name",
|
||||
"input":"</1>",
|
||||
"output":["ParseError", ["Comment", "1"]]},
|
||||
|
||||
{"description":"Simili processing instruction",
|
||||
"input":"<?namespace>",
|
||||
"output":["ParseError", ["Comment", "?namespace"]]},
|
||||
|
||||
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
||||
"input":"<?foo-->",
|
||||
"output":["ParseError", ["Comment", "?foo--"]]},
|
||||
|
||||
{"description":"Unescaped <",
|
||||
"input":"foo < bar",
|
||||
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
||||
|
||||
{"description":"Null Byte Replacement",
|
||||
"input":"\u0000",
|
||||
"output":["ParseError", ["Character", "\u0000"]]},
|
||||
|
||||
{"description":"Comment with dash",
|
||||
"input":"<!---x",
|
||||
"output":["ParseError", ["Comment", "-x"]]},
|
||||
|
||||
{"description":"Entity + newline",
|
||||
"input":"\nx\n>\n",
|
||||
"output":[["Character","\nx\n>\n"]]},
|
||||
|
||||
{"description":"Start tag with no attributes but space before the greater-than sign",
|
||||
"input":"<h >",
|
||||
"output":[["StartTag", "h", {}]]},
|
||||
|
||||
{"description":"Empty attribute followed by uppercase attribute",
|
||||
"input":"<h a B=''>",
|
||||
"output":[["StartTag", "h", {"a":"", "b":""}]]},
|
||||
|
||||
{"description":"Double-quote after attribute name",
|
||||
"input":"<h a \">",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
|
||||
|
||||
{"description":"Single-quote after attribute name",
|
||||
"input":"<h a '>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
|
||||
|
||||
{"description":"Empty end tag with following characters",
|
||||
"input":"a</>bc",
|
||||
"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
|
||||
|
||||
{"description":"Empty end tag with following tag",
|
||||
"input":"a</><b>c",
|
||||
"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
|
||||
|
||||
{"description":"Empty end tag with following comment",
|
||||
"input":"a</><!--b-->c",
|
||||
"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
|
||||
|
||||
{"description":"Empty end tag with following end tag",
|
||||
"input":"a</></b>c",
|
||||
"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
|
||||
|
||||
]}
|
6047
libs/html5lib/tests/testdata/tokenizer/test3.test
vendored
6047
libs/html5lib/tests/testdata/tokenizer/test3.test
vendored
File diff suppressed because it is too large
Load diff
344
libs/html5lib/tests/testdata/tokenizer/test4.test
vendored
344
libs/html5lib/tests/testdata/tokenizer/test4.test
vendored
|
@ -1,344 +0,0 @@
|
|||
{"tests": [
|
||||
|
||||
{"description":"< in attribute name",
|
||||
"input":"<z/0 <>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
||||
|
||||
{"description":"< in attribute value",
|
||||
"input":"<z x=<>",
|
||||
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
||||
|
||||
{"description":"= in unquoted attribute value",
|
||||
"input":"<z z=z=z>",
|
||||
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
|
||||
|
||||
{"description":"= attribute",
|
||||
"input":"<z =>",
|
||||
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
|
||||
|
||||
{"description":"== attribute",
|
||||
"input":"<z ==>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
|
||||
|
||||
{"description":"=== attribute",
|
||||
"input":"<z ===>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
|
||||
|
||||
{"description":"==== attribute",
|
||||
"input":"<z ====>",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
|
||||
|
||||
{"description":"\" after ampersand in double-quoted attribute value",
|
||||
"input":"<z z=\"&\">",
|
||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
||||
|
||||
{"description":"' after ampersand in double-quoted attribute value",
|
||||
"input":"<z z=\"&'\">",
|
||||
"output":[["StartTag", "z", {"z": "&'"}]]},
|
||||
|
||||
{"description":"' after ampersand in single-quoted attribute value",
|
||||
"input":"<z z='&'>",
|
||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
||||
|
||||
{"description":"\" after ampersand in single-quoted attribute value",
|
||||
"input":"<z z='&\"'>",
|
||||
"output":[["StartTag", "z", {"z": "&\""}]]},
|
||||
|
||||
{"description":"Text after bogus character reference",
|
||||
"input":"<z z='&xlink_xmlns;'>bar<z>",
|
||||
"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
|
||||
|
||||
{"description":"Text after hex character reference",
|
||||
"input":"<z z='  foo'>bar<z>",
|
||||
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
|
||||
|
||||
{"description":"Attribute name starting with \"",
|
||||
"input":"<foo \"='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name starting with '",
|
||||
"input":"<foo '='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name containing \"",
|
||||
"input":"<foo a\"b='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name containing '",
|
||||
"input":"<foo a'b='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
|
||||
|
||||
{"description":"Unquoted attribute value containing '",
|
||||
"input":"<foo a=b'c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
|
||||
|
||||
{"description":"Unquoted attribute value containing \"",
|
||||
"input":"<foo a=b\"c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
|
||||
|
||||
{"description":"Double-quoted attribute value not followed by whitespace",
|
||||
"input":"<foo a=\"b\"c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
||||
|
||||
{"description":"Single-quoted attribute value not followed by whitespace",
|
||||
"input":"<foo a='b'c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
||||
|
||||
{"description":"Quoted attribute followed by permitted /",
|
||||
"input":"<br a='b'/>",
|
||||
"output":[["StartTag","br",{"a":"b"},true]]},
|
||||
|
||||
{"description":"Quoted attribute followed by non-permitted /",
|
||||
"input":"<bar a='b'/>",
|
||||
"output":[["StartTag","bar",{"a":"b"},true]]},
|
||||
|
||||
{"description":"CR EOF after doctype name",
|
||||
"input":"<!doctype html \r",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"CR EOF in tag name",
|
||||
"input":"<z\r",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Slash EOF in tag name",
|
||||
"input":"<z/",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Zero hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero-prefixed hex numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Zero-prefixed decimal numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Empty hex numeric entities",
|
||||
"input":"&#x &#X ",
|
||||
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
||||
|
||||
{"description":"Empty decimal numeric entities",
|
||||
"input":"&# &#; ",
|
||||
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
||||
|
||||
{"description":"Non-BMP numeric entity",
|
||||
"input":"𐀀",
|
||||
"output":[["Character", "\uD800\uDC00"]]},
|
||||
|
||||
{"description":"Maximum non-BMP numeric entity",
|
||||
"input":"",
|
||||
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
|
||||
|
||||
{"description":"Above maximum numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"32-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Surrogate code point edge cases",
|
||||
"input":"퟿����",
|
||||
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
||||
|
||||
{"description":"Uppercase start tag name",
|
||||
"input":"<X>",
|
||||
"output":[["StartTag", "x", {}]]},
|
||||
|
||||
{"description":"Uppercase end tag name",
|
||||
"input":"</X>",
|
||||
"output":[["EndTag", "x"]]},
|
||||
|
||||
{"description":"Uppercase attribute name",
|
||||
"input":"<x X>",
|
||||
"output":[["StartTag", "x", { "x":"" }]]},
|
||||
|
||||
{"description":"Tag/attribute name case edge values",
|
||||
"input":"<x@AZ[`az{ @AZ[`az{>",
|
||||
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
||||
|
||||
{"description":"Duplicate different-case attributes",
|
||||
"input":"<x x=1 x=2 X=3>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
||||
|
||||
{"description":"Uppercase close tag attributes",
|
||||
"input":"</x X>",
|
||||
"output":["ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Duplicate close tag attributes",
|
||||
"input":"</x x x>",
|
||||
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag","br",{},true]]},
|
||||
|
||||
{"description":"Non-permitted slash",
|
||||
"input":"<xr/>",
|
||||
"output":[["StartTag","xr",{},true]]},
|
||||
|
||||
{"description":"Permitted slash but in close tag",
|
||||
"input":"</br/>",
|
||||
"output":["ParseError", ["EndTag", "br"]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
||||
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
||||
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
||||
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
||||
"output":[["DOCTYPE", "html", null, "xYz", true]]},
|
||||
|
||||
{"description":"U+0000 in lookahead region after non-matching character",
|
||||
"input":"<!doc>\u0000",
|
||||
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+0000 in lookahead region",
|
||||
"input":"<!doc\u0000",
|
||||
"output":["ParseError", ["Comment", "doc\uFFFD"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+0080 in lookahead region",
|
||||
"input":"<!doc\u0080",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+FDD1 in lookahead region",
|
||||
"input":"<!doc\uFDD1",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+1FFFF in lookahead region",
|
||||
"input":"<!doc\uD83F\uDFFF",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"CR followed by non-LF",
|
||||
"input":"\r?",
|
||||
"output":[["Character", "\n?"]]},
|
||||
|
||||
{"description":"CR at EOF",
|
||||
"input":"\r",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"LF at EOF",
|
||||
"input":"\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR LF",
|
||||
"input":"\r\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR CR",
|
||||
"input":"\r\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF LF",
|
||||
"input":"\n\n",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF CR",
|
||||
"input":"\n\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"text CR CR CR text",
|
||||
"input":"text\r\r\rtext",
|
||||
"output":[["Character", "text\n\n\ntext"]]},
|
||||
|
||||
{"description":"Doctype publik",
|
||||
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype publi",
|
||||
"input":"<!DOCTYPE html PUBLI",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sistem",
|
||||
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sys",
|
||||
"input":"<!DOCTYPE html SYS",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype html x>text",
|
||||
"input":"<!DOCTYPE html x>text",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
|
||||
|
||||
{"description":"Grave accent in unquoted attribute",
|
||||
"input":"<a a=aa`>",
|
||||
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
|
||||
|
||||
{"description":"EOF in tag name state ",
|
||||
"input":"<a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in tag name state",
|
||||
"input":"<a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in before attribute name state",
|
||||
"input":"<a ",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute name state",
|
||||
"input":"<a a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in after attribute name state",
|
||||
"input":"<a a ",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in before attribute value state",
|
||||
"input":"<a a =",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (double quoted) state",
|
||||
"input":"<a a =\"a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (single quoted) state",
|
||||
"input":"<a a ='a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (unquoted) state",
|
||||
"input":"<a a =a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in after attribute value state",
|
||||
"input":"<a a ='a'",
|
||||
"output":["ParseError"]}
|
||||
|
||||
]}
|
1295
libs/html5lib/tests/testdata/tokenizer/unicodeChars.test
vendored
1295
libs/html5lib/tests/testdata/tokenizer/unicodeChars.test
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,31 +0,0 @@
|
|||
{"tests" : [
|
||||
{"description": "Invalid Unicode character U+DFFF",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uDFFF",
|
||||
"output":["ParseError", ["Character", "\\uDFFF"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description": "Invalid Unicode character U+D800",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uD800",
|
||||
"output":["ParseError", ["Character", "\\uD800"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
|
||||
"doubleEscaped":true,
|
||||
"input": "a\\uDFFF",
|
||||
"output":[["Character", "a"], "ParseError", ["Character", "\\uDFFF"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description": "Invalid Unicode character U+D800 with valid following character",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uD800a",
|
||||
"output":["ParseError", ["Character", "\\uD800a"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"CR followed by U+0000",
|
||||
"input":"\r\u0000",
|
||||
"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
|
||||
"ignoreErrorOrder":true}
|
||||
]
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
{"xmlViolationTests": [
|
||||
|
||||
{"description":"Non-XML character",
|
||||
"input":"a\uFFFFb",
|
||||
"ignoreErrorOrder":true,
|
||||
"output":["ParseError",["Character","a\uFFFDb"]]},
|
||||
|
||||
{"description":"Non-XML space",
|
||||
"input":"a\u000Cb",
|
||||
"ignoreErrorOrder":true,
|
||||
"output":[["Character","a b"]]},
|
||||
|
||||
{"description":"Double hyphen in comment",
|
||||
"input":"<!-- foo -- bar -->",
|
||||
"output":["ParseError",["Comment"," foo - - bar "]]},
|
||||
|
||||
{"description":"FF between attributes",
|
||||
"input":"<a b=''\u000Cc=''>",
|
||||
"output":[["StartTag","a",{"b":"","c":""}]]}
|
||||
]}
|
||||
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
Tree Construction Tests
|
||||
=======================
|
||||
|
||||
Each file containing tree construction tests consists of any number of
|
||||
tests separated by two newlines (LF) and a single newline before the end
|
||||
of the file. For instance:
|
||||
|
||||
[TEST]LF
|
||||
LF
|
||||
[TEST]LF
|
||||
LF
|
||||
[TEST]LF
|
||||
|
||||
Where [TEST] is the following format:
|
||||
|
||||
Each test must begin with a string "\#data" followed by a newline (LF).
|
||||
All subsequent lines until a line that says "\#errors" are the test data
|
||||
and must be passed to the system being tested unchanged, except with the
|
||||
final newline (on the last line) removed.
|
||||
|
||||
Then there must be a line that says "\#errors". It must be followed by
|
||||
one line per parse error that a conformant checker would return. It
|
||||
doesn't matter what those lines are, although they can't be
|
||||
"\#document-fragment", "\#document", "\#script-off", "\#script-on", or
|
||||
empty, the only thing that matters is that there be the right number
|
||||
of parse errors.
|
||||
|
||||
Then there \*may\* be a line that says "\#document-fragment", which must
|
||||
be followed by a newline (LF), followed by a string of characters that
|
||||
indicates the context element, followed by a newline (LF). If the string
|
||||
of characters starts with "svg ", the context element is in the SVG
|
||||
namespace and the substring after "svg " is the local name. If the
|
||||
string of characters starts with "math ", the context element is in the
|
||||
MathML namespace and the substring after "math " is the local name.
|
||||
Otherwise, the context element is in the HTML namespace and the string
|
||||
is the local name. If this line is present the "\#data" must be parsed
|
||||
using the HTML fragment parsing algorithm with the context element as
|
||||
context.
|
||||
|
||||
Then there \*may\* be a line that says "\#script-off" or
|
||||
"\#script-on". If a line that says "\#script-off" is present, the
|
||||
parser must set the scripting flag to disabled. If a line that says
|
||||
"\#script-on" is present, it must set it to enabled. Otherwise, the
|
||||
test should be run in both modes.
|
||||
|
||||
Then there must be a line that says "\#document", which must be followed
|
||||
by a dump of the tree of the parsed DOM. Each node must be represented
|
||||
by a single line. Each line must start with "| ", followed by two spaces
|
||||
per parent node that the node has before the root document node.
|
||||
|
||||
- Element nodes must be represented by a "`<`" then the *tag name
|
||||
string* "`>`", and all the attributes must be given, sorted
|
||||
lexicographically by UTF-16 code unit according to their *attribute
|
||||
name string*, on subsequent lines, as if they were children of the
|
||||
element node.
|
||||
- Attribute nodes must have the *attribute name string*, then an "="
|
||||
sign, then the attribute value in double quotes (").
|
||||
- Text nodes must be the string, in double quotes. Newlines aren't
|
||||
escaped.
|
||||
- Comments must be "`<`" then "`!-- `" then the data then "` -->`".
|
||||
- DOCTYPEs must be "`<!DOCTYPE `" then the name then if either of the
|
||||
system id or public id is non-empty a space, public id in
|
||||
double-quotes, another space an the system id in double-quotes, and
|
||||
then in any case "`>`".
|
||||
- Processing instructions must be "`<?`", then the target, then a
|
||||
space, then the data and then "`>`". (The HTML parser cannot emit
|
||||
processing instructions, but scripts can, and the WebVTT to DOM
|
||||
rules can emit them.)
|
||||
- Template contents are represented by the string "content" with the
|
||||
children below it.
|
||||
|
||||
The *tag name string* is the local name prefixed by a namespace
|
||||
designator. For the HTML namespace, the namespace designator is the
|
||||
empty string, i.e. there's no prefix. For the SVG namespace, the
|
||||
namespace designator is "svg ". For the MathML namespace, the namespace
|
||||
designator is "math ".
|
||||
|
||||
The *attribute name string* is the local name prefixed by a namespace
|
||||
designator. For no namespace, the namespace designator is the empty
|
||||
string, i.e. there's no prefix. For the XLink namespace, the namespace
|
||||
designator is "xlink ". For the XML namespace, the namespace designator
|
||||
is "xml ". For the XMLNS namespace, the namespace designator is "xmlns
|
||||
". Note the difference between "xlink:href" which is an attribute in no
|
||||
namespace with the local name "xlink:href" and "xlink href" which is an
|
||||
attribute in the xlink namespace with the local name "href".
|
||||
|
||||
If there is also a "\#document-fragment" the bit following "\#document"
|
||||
must be a representation of the HTML fragment serialization for the
|
||||
context element given by "\#document-fragment".
|
||||
|
||||
For example:
|
||||
|
||||
#data
|
||||
<p>One<p>Two
|
||||
#errors
|
||||
3: Missing document type declaration
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "One"
|
||||
| <p>
|
||||
| "Two"
|
|
@ -1,354 +0,0 @@
|
|||
#data
|
||||
<a><p></a></p>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,10): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a>1<p>2</a>3</p>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,12): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <p>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<button>2</a>3</button>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,17): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <button>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<b>2</a>3</b>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,12): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <b>
|
||||
| "2"
|
||||
| <b>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<div>2<div>3</a>4</div>5</div>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,20): adoption-agency-1.3
|
||||
(1,20): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <div>
|
||||
| <a>
|
||||
| "2"
|
||||
| <div>
|
||||
| <a>
|
||||
| "3"
|
||||
| "4"
|
||||
| "5"
|
||||
|
||||
#data
|
||||
<table><a>1<p>2</a>3</p>
|
||||
#errors
|
||||
(1,7): expected-doctype-but-got-start-tag
|
||||
(1,10): unexpected-start-tag-implies-table-voodoo
|
||||
(1,11): unexpected-character-implies-table-voodoo
|
||||
(1,14): unexpected-start-tag-implies-table-voodoo
|
||||
(1,15): unexpected-character-implies-table-voodoo
|
||||
(1,19): unexpected-end-tag-implies-table-voodoo
|
||||
(1,19): adoption-agency-1.3
|
||||
(1,20): unexpected-character-implies-table-voodoo
|
||||
(1,24): unexpected-end-tag-implies-table-voodoo
|
||||
(1,24): eof-in-table
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <p>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<b><b><a><p></a>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,16): adoption-agency-1.3
|
||||
(1,16): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <b>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<b><a><b><p></a>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,16): adoption-agency-1.3
|
||||
(1,16): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a><b><b><p></a>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,16): adoption-agency-1.3
|
||||
(1,16): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,30): unexpected-end-tag
|
||||
(1,35): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "1"
|
||||
| <s>
|
||||
| id="A"
|
||||
| "2"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "3"
|
||||
| <s>
|
||||
| id="A"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "4"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "5"
|
||||
|
||||
#data
|
||||
<table><a>1<td>2</td>3</table>
|
||||
#errors
|
||||
(1,7): expected-doctype-but-got-start-tag
|
||||
(1,10): unexpected-start-tag-implies-table-voodoo
|
||||
(1,11): unexpected-character-implies-table-voodoo
|
||||
(1,15): unexpected-cell-in-table-body
|
||||
(1,30): unexpected-implied-end-tag-in-table-view
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <a>
|
||||
| "3"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "2"
|
||||
|
||||
#data
|
||||
<table>A<td>B</td>C</table>
|
||||
#errors
|
||||
(1,7): expected-doctype-but-got-start-tag
|
||||
(1,8): unexpected-character-implies-table-voodoo
|
||||
(1,12): unexpected-cell-in-table-body
|
||||
(1,22): unexpected-character-implies-table-voodoo
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "AC"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<a><svg><tr><input></a>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,23): unexpected-end-tag
|
||||
(1,23): adoption-agency-1.3
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <svg svg>
|
||||
| <svg tr>
|
||||
| <svg input>
|
||||
|
||||
#data
|
||||
<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
|
||||
#errors
|
||||
(1,5): expected-doctype-but-got-start-tag
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): adoption-agency-1.3
|
||||
(1,65): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <div>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<div><a><b><u><i><code><div></a>
|
||||
#errors
|
||||
(1,5): expected-doctype-but-got-start-tag
|
||||
(1,32): adoption-agency-1.3
|
||||
(1,32): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <a>
|
||||
| <b>
|
||||
| <u>
|
||||
| <i>
|
||||
| <code>
|
||||
| <u>
|
||||
| <i>
|
||||
| <code>
|
||||
| <div>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<b><b><b><b>x</b></b></b></b>y
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| "x"
|
||||
| "y"
|
||||
|
||||
#data
|
||||
<p><b><b><b><b><p>x
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,18): unexpected-end-tag
|
||||
(1,19): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <p>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<b><em><foo><foob><fooc><aside></b></em>
|
||||
#errors
|
||||
(1,35): adoption-agency-1.3
|
||||
(1,40): adoption-agency-1.3
|
||||
(1,40): expected-closing-tag-but-got-eof
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| <b>
|
||||
| <em>
|
||||
| <foo>
|
||||
| <foob>
|
||||
| <fooc>
|
||||
| <aside>
|
||||
| <b>
|
|
@ -1,39 +0,0 @@
|
|||
#data
|
||||
<b>1<i>2<p>3</b>4
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,16): adoption-agency-1.3
|
||||
(1,17): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "1"
|
||||
| <i>
|
||||
| "2"
|
||||
| <i>
|
||||
| <p>
|
||||
| <b>
|
||||
| "3"
|
||||
| "4"
|
||||
|
||||
#data
|
||||
<a><div><style></style><address><a>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-start-tag
|
||||
(1,35): unexpected-start-tag-implies-end-tag
|
||||
(1,35): adoption-agency-1.3
|
||||
(1,35): adoption-agency-1.3
|
||||
(1,35): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <style>
|
||||
| <address>
|
||||
| <a>
|
||||
| <a>
|
|
@ -1,178 +0,0 @@
|
|||
#data
|
||||
FOO<!-- BAR -->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR --!>BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): unexpected-bang-after-double-dash-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- >BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): unexpected-char-in-comment
|
||||
(1,21): eof-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- >BAZ -->
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): unexpected-char-in-comment
|
||||
(1,24): unexpected-char-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): unexpected-char-in-comment
|
||||
(1,24): unexpected-char-in-comment
|
||||
(1,31): unexpected-bang-after-double-dash-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): unexpected-char-in-comment
|
||||
(1,24): unexpected-char-in-comment
|
||||
(1,31): unexpected-char-in-comment
|
||||
(1,35): eof-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
|
||||
|
||||
#data
|
||||
FOO<!---->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!--->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,9): incorrect-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,8): incorrect-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
<?xml version="1.0">Hi
|
||||
#errors
|
||||
(1,1): expected-tag-name-but-got-question-mark
|
||||
(1,22): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <!-- ?xml version="1.0" -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hi"
|
||||
|
||||
#data
|
||||
<?xml version="1.0">
|
||||
#errors
|
||||
(1,1): expected-tag-name-but-got-question-mark
|
||||
(1,20): expected-doctype-but-got-eof
|
||||
#document
|
||||
| <!-- ?xml version="1.0" -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<?xml version
|
||||
#errors
|
||||
(1,1): expected-tag-name-but-got-question-mark
|
||||
(1,13): expected-doctype-but-got-eof
|
||||
#document
|
||||
| <!-- ?xml version -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
FOO<!----->BAZ
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,10): unexpected-dash-after-double-dash-in-comment
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- - -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
<html><!-- comment --><title>Comment before head</title>
|
||||
#errors
|
||||
(1,6): expected-doctype-but-got-start-tag
|
||||
#document
|
||||
| <html>
|
||||
| <!-- comment -->
|
||||
| <head>
|
||||
| <title>
|
||||
| "Comment before head"
|
||||
| <body>
|
|
@ -1,424 +0,0 @@
|
|||
#data
|
||||
<!DOCTYPE html>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!dOctYpE HtMl>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPEhtml>Hello
|
||||
#errors
|
||||
(1,9): need-space-after-doctype
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE>Hello
|
||||
#errors
|
||||
(1,9): need-space-after-doctype
|
||||
(1,10): expected-doctype-name-but-got-right-bracket
|
||||
(1,10): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE >Hello
|
||||
#errors
|
||||
(1,11): expected-doctype-name-but-got-right-bracket
|
||||
(1,11): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato>Hello
|
||||
#errors
|
||||
(1,17): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato >Hello
|
||||
#errors
|
||||
(1,18): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato taco>Hello
|
||||
#errors
|
||||
(1,17): expected-space-or-right-bracket-in-doctype
|
||||
(1,22): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato taco "ddd>Hello
|
||||
#errors
|
||||
(1,17): expected-space-or-right-bracket-in-doctype
|
||||
(1,27): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM>Hello
|
||||
#errors
|
||||
(1,24): unexpected-char-in-doctype
|
||||
(1,24): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM >Hello
|
||||
#errors
|
||||
(1,28): unexpected-char-in-doctype
|
||||
(1,28): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM ggg>Hello
|
||||
#errors
|
||||
(1,34): unexpected-char-in-doctype
|
||||
(1,37): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM taco >Hello
|
||||
#errors
|
||||
(1,25): unexpected-char-in-doctype
|
||||
(1,31): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM 'taco"'>Hello
|
||||
#errors
|
||||
(1,32): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "" "taco"">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM "taco">Hello
|
||||
#errors
|
||||
(1,31): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "" "taco">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM "tai'co">Hello
|
||||
#errors
|
||||
(1,33): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "" "tai'co">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
|
||||
#errors
|
||||
(1,24): unexpected-char-in-doctype
|
||||
(1,34): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato grass SYSTEM taco>Hello
|
||||
#errors
|
||||
(1,17): expected-space-or-right-bracket-in-doctype
|
||||
(1,35): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIc>Hello
|
||||
#errors
|
||||
(1,24): unexpected-end-of-doctype
|
||||
(1,24): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIc >Hello
|
||||
#errors
|
||||
(1,25): unexpected-end-of-doctype
|
||||
(1,25): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIcgoof>Hello
|
||||
#errors
|
||||
(1,24): unexpected-char-in-doctype
|
||||
(1,28): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC goof>Hello
|
||||
#errors
|
||||
(1,25): unexpected-char-in-doctype
|
||||
(1,29): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC "go'of">Hello
|
||||
#errors
|
||||
(1,32): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "go'of" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC 'go'of'>Hello
|
||||
#errors
|
||||
(1,29): unexpected-char-in-doctype
|
||||
(1,32): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "go" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
|
||||
#errors
|
||||
(1,38): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "go:hh of" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
|
||||
#errors
|
||||
(1,38): unexpected-char-in-doctype
|
||||
(1,48): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE potato "W3C-//dfdf" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE ...>Hello
|
||||
#errors
|
||||
(1,14): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE ...>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
#errors
|
||||
(2,58): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
||||
#errors
|
||||
(2,54): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
|
||||
<!-- internal declarations -->
|
||||
]>
|
||||
#errors
|
||||
(1,23): expected-space-or-right-bracket-in-doctype
|
||||
(2,30): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE root-element>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "]>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC
|
||||
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
|
||||
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
||||
#errors
|
||||
(3,53): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
|
||||
#errors
|
||||
(1,63): unknown-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "Mine!"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
|
||||
#errors
|
||||
(1,50): unexpected-char-in-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
(1,50): unexpected-char-in-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
(1,21): unexpected-char-in-doctype
|
||||
(1,49): unexpected-char-in-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
(1,21): unexpected-char-in-doctype
|
||||
(1,49): unexpected-char-in-doctype
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
Binary file not shown.
|
@ -1,795 +0,0 @@
|
|||
#data
|
||||
FOO>BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>BAR"
|
||||
|
||||
#data
|
||||
FOO>BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,6): named-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>BAR"
|
||||
|
||||
#data
|
||||
FOO> BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,6): named-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO> BAR"
|
||||
|
||||
#data
|
||||
FOO>;;BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>;;BAR"
|
||||
|
||||
#data
|
||||
I'm ¬it; I tell you
|
||||
#errors
|
||||
(1,4): expected-doctype-but-got-chars
|
||||
(1,9): named-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "I'm ¬it; I tell you"
|
||||
|
||||
#data
|
||||
I'm ∉ I tell you
|
||||
#errors
|
||||
(1,4): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "I'm ∉ I tell you"
|
||||
|
||||
#data
|
||||
FOO& BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO& BAR"
|
||||
|
||||
#data
|
||||
FOO&<BAR>
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,9): expected-closing-tag-but-got-eof
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&"
|
||||
| <bar>
|
||||
|
||||
#data
|
||||
FOO&&&>BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&&&>BAR"
|
||||
|
||||
#data
|
||||
FOO)BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO)BAR"
|
||||
|
||||
#data
|
||||
FOOABAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOABAR"
|
||||
|
||||
#data
|
||||
FOOABAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOABAR"
|
||||
|
||||
#data
|
||||
FOO&#BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,5): expected-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#BAR"
|
||||
|
||||
#data
|
||||
FOO&#ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,5): expected-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#ZOO"
|
||||
|
||||
#data
|
||||
FOOºR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,7): expected-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOºR"
|
||||
|
||||
#data
|
||||
FOO&#xZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,6): expected-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#xZOO"
|
||||
|
||||
#data
|
||||
FOO&#XZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,6): expected-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#XZOO"
|
||||
|
||||
#data
|
||||
FOO)BAR
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,7): numeric-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO)BAR"
|
||||
|
||||
#data
|
||||
FOO䆺R
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,10): numeric-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO䆺R"
|
||||
|
||||
#data
|
||||
FOOAZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,8): numeric-entity-without-semicolon
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOAZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOOxZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOxZOO"
|
||||
|
||||
#data
|
||||
FOOyZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOyZOO"
|
||||
|
||||
#data
|
||||
FOO€ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO€ZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO‚ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‚ZOO"
|
||||
|
||||
#data
|
||||
FOOƒZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOƒZOO"
|
||||
|
||||
#data
|
||||
FOO„ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO„ZOO"
|
||||
|
||||
#data
|
||||
FOO…ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO…ZOO"
|
||||
|
||||
#data
|
||||
FOO†ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO†ZOO"
|
||||
|
||||
#data
|
||||
FOO‡ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‡ZOO"
|
||||
|
||||
#data
|
||||
FOOˆZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOˆZOO"
|
||||
|
||||
#data
|
||||
FOO‰ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‰ZOO"
|
||||
|
||||
#data
|
||||
FOOŠZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŠZOO"
|
||||
|
||||
#data
|
||||
FOO‹ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‹ZOO"
|
||||
|
||||
#data
|
||||
FOOŒZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŒZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOŽZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŽZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO‘ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‘ZOO"
|
||||
|
||||
#data
|
||||
FOO’ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO’ZOO"
|
||||
|
||||
#data
|
||||
FOO“ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO“ZOO"
|
||||
|
||||
#data
|
||||
FOO”ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO”ZOO"
|
||||
|
||||
#data
|
||||
FOO•ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO•ZOO"
|
||||
|
||||
#data
|
||||
FOO–ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO–ZOO"
|
||||
|
||||
#data
|
||||
FOO—ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO—ZOO"
|
||||
|
||||
#data
|
||||
FOO˜ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO˜ZOO"
|
||||
|
||||
#data
|
||||
FOO™ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO™ZOO"
|
||||
|
||||
#data
|
||||
FOOšZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOšZOO"
|
||||
|
||||
#data
|
||||
FOO›ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO›ZOO"
|
||||
|
||||
#data
|
||||
FOOœZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOœZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOžZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOžZOO"
|
||||
|
||||
#data
|
||||
FOOŸZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŸZOO"
|
||||
|
||||
#data
|
||||
FOO ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO ZOO"
|
||||
|
||||
#data
|
||||
FOO퟿ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,11): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO􈟔ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
(1,13): eof-in-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>"
|
||||
|
||||
#data
|
||||
FOO�
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
(1,13): eof-in-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>"
|
||||
|
||||
#data
|
||||
FOO�
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,13): illegal-codepoint-for-numeric-entity
|
||||
(1,13): eof-in-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,16): numeric-entity-without-semicolon
|
||||
(1,16): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,15): numeric-entity-without-semicolon
|
||||
(1,15): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
(1,3): expected-doctype-but-got-chars
|
||||
(1,17): numeric-entity-without-semicolon
|
||||
(1,17): illegal-codepoint-for-numeric-entity
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue