import io
from html5lib._tokenizer import HTMLTokenizer
from html5lib.constants import tokenTypes
def ignore_parse_errors(toks):
for tok in toks:
if tok['type'] != tokenTypes['ParseError']:
yield tok
def test_maintain_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == len(attrs)
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
assert in_name == out_name
assert in_value == out_value
def test_duplicate_attribute():
stream = io.StringIO("")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == 1
assert list(attrs_tok.items()) == [('a', '1')]
def test_maintain_duplicate_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
assert len(out) == 1
assert out[0]['type'] == tokenTypes['StartTag']
attrs_tok = out[0]['data']
assert len(attrs_tok) == len(attrs)
for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_tok.items()):
assert in_name == out_name
assert in_value == out_value