@@ -23,42 +23,30 @@ def test_codec_name_c(self):
2323 def test_codec_name_d (self ):
2424 self .assertEqual (inputstream .codecName ("ISO_8859--1" ), "windows-1252" )
2525
26- def buildTestSuite ():
26+ def runEncodingTest (data , encoding ):
27+ p = HTMLParser ()
28+ t = p .parse (data , useChardet = False )
29+ encoding = encoding .lower ().decode ("ascii" )
30+
31+ errorMessage = ("Input:\n %s\n Expected:\n %s\n Recieved\n %s\n " %
32+ (data , repr (encoding ),
33+ repr (p .tokenizer .stream .charEncoding [0 ])))
34+ assert encoding == p .tokenizer .stream .charEncoding [0 ], errorMessage
35+
36+ def test_encoding ():
2737 for filename in get_data_files ("encoding" ):
2838 test_name = os .path .basename (filename ).replace ('.dat' ,'' ). \
2939 replace ('-' ,'' )
30- tests = TestData (filename , "data" )
40+ tests = TestData (filename , b "data", encoding = None )
3141 for idx , test in enumerate (tests ):
32- def encodingTest (self , data = test ['data' ],
33- encoding = test ['encoding' ]):
34- p = HTMLParser ()
35- t = p .parse (data , useChardet = False )
36-
37- errorMessage = ("Input:\n %s\n Expected:\n %s\n Recieved\n %s\n " %
38- (data , repr (encoding .lower ()),
39- repr (p .tokenizer .stream .charEncoding )))
40- self .assertEquals (encoding .lower (),
41- p .tokenizer .stream .charEncoding [0 ],
42- errorMessage )
43- setattr (Html5EncodingTestCase , 'test_%s_%d' % (test_name , idx + 1 ),
44- encodingTest )
45-
46- try :
47- import chardet
48- def test_chardet (self ):
49- data = open (os .path .join (test_dir , "encoding" , "chardet" , "test_big5.txt" )).read ()
50- encoding = inputstream .HTMLInputStream (data ).charEncoding
51- assert encoding [0 ].lower () == "big5"
52- setattr (Html5EncodingTestCase , 'test_chardet' , test_chardet )
53- except ImportError :
54- print ("chardet not found, skipping chardet tests" )
55-
42+ yield (runEncodingTest , test [b'data' ], test [b'encoding' ])
5643
57- return unittest .defaultTestLoader .loadTestsFromName (__name__ )
58-
59- def main ():
60- buildTestSuite ()
61- unittest .main ()
62-
63- if __name__ == "__main__" :
64- main ()
44+ try :
45+ import chardet
46+ def test_chardet (self ):
47+ data = open (os .path .join (test_dir , "encoding" , "chardet" , "test_big5.txt" )).read ()
48+ encoding = inputstream .HTMLInputStream (data ).charEncoding
49+ assert encoding [0 ].lower () == "big5"
50+ setattr (Html5EncodingTestCase , 'test_chardet' , test_chardet )
51+ except ImportError :
52+ print ("chardet not found, skipping chardet tests" )
0 commit comments