@@ -69,27 +69,32 @@ def concatenateCharacterTokens(tokens):
6969 return outputTokens
7070
7171def normalizeTokens (tokens ):
72- """ convert array of attributes to a dictionary """
7372 # TODO: convert tests to reflect arrays
7473 for i , token in enumerate (tokens ):
7574 if token [0 ] == u'ParseError' :
7675 tokens [i ] = token [0 ]
77- #token[2] = dict(token[2][::-1])
7876 return tokens
7977
80- def tokensMatch (expectedTokens , recievedTokens ):
78+ def tokensMatch (expectedTokens , receivedTokens , ignoreErrorOrder ):
8179 """Test whether the test has passed or failed
8280
83- For brevity in the tests, the test has passed if the sequence of expected
84- tokens appears anywhere in the sequence of returned tokens.
81+ If the ignoreErrorOrder flag is set to true we don't test the relative
82+ positions of parse errors and non parse errors
8583 """
86- return expectedTokens == recievedTokens
87- for i , token in enumerate (recievedTokens ):
88- if expectedTokens [0 ] == token :
89- if (len (expectedTokens ) <= len (recievedTokens [i :]) and
90- recievedTokens [i :i + len (expectedTokens )]):
91- return True
92- return False
84+ if not ignoreErrorOrder :
85+ return expectedTokens == receivedTokens
86+ else :
87+ #Sort the tokens into two groups; non-parse errors and parse errors
88+ tokens = {"expected" :[[],[]], "received" :[[],[]]}
89+ for tokenType , tokenList in zip (tokens .keys (),
90+ (expectedTokens , receivedTokens )):
91+ for token in tokenList :
92+ if token != "ParseError" :
93+ tokens [tokenType ][0 ].append (token )
94+ else :
95+ tokens [tokenType ][1 ].append (token )
96+
97+ return tokens ["expected" ] == tokens ["received" ]
9398
9499
95100class TestCase (unittest .TestCase ):
@@ -107,9 +112,11 @@ def runTokenizerTest(self, test):
107112 test ['contentModelFlag' ] ,
108113 "\n Input:" , str (test ['input' ]),
109114 "\n Expected:" , str (output ),
110- "\n Recieved :" , str (tokens )])
115+ "\n received :" , str (tokens )])
111116 tokens = normalizeTokens (tokens )
112- self .assertEquals (tokensMatch (tokens , output ), True , errorMsg )
117+ ignoreErrorOrder = test .get ('ignoreErrorOrder' , False )
118+ self .assertEquals (tokensMatch (tokens , output , ignoreErrorOrder ), True ,
119+ errorMsg )
113120
114121def buildTestSuite ():
115122 for filename in html5lib_test_files ('tokenizer' , '*.test' ):
0 commit comments