2424class UnicodeMethodsTest (unittest .TestCase ):
2525
2626 # update this, if the database changes
27- expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e '
27+ expectedchecksum = '9e43ee3929471739680c0e705482b4ae1c4122e4 '
2828
29- # TODO: RUSTPYTHON
30- @unittest .expectedFailure
29+ @unittest .expectedFailure # TODO: RUSTPYTHON; + 9e43ee3929471739680c0e705482b4ae1c4122e4
3130 @requires_resource ('cpu' )
3231 def test_method_checksum (self ):
3332 h = hashlib .sha1 ()
@@ -79,10 +78,9 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
7978
8079 # Update this if the database changes. Make sure to do a full rebuild
8180 # (e.g. 'make distclean && make') to get the correct checksum.
82- expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba '
81+ expectedchecksum = '23ab09ed4abdf93db23b97359108ed630dd8311d '
8382
84- # TODO: RUSTPYTHON
85- @unittest .expectedFailure
83+ @unittest .expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'unicodedata' has no attribute 'digit'
8684 @requires_resource ('cpu' )
8785 def test_function_checksum (self ):
8886 data = []
@@ -122,22 +120,18 @@ def test_no_names_in_pua(self):
122120 char = chr (i )
123121 self .assertRaises (ValueError , self .db .name , char )
124122
125- # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A'
126- @unittest .expectedFailure
127123 def test_lookup_nonexistant (self ):
128124 # just make sure that lookup can fail
129- for nonexistant in [
125+ for nonexistent in [
130126 "LATIN SMLL LETR A" ,
131127 "OPEN HANDS SIGHS" ,
132128 "DREGS" ,
133129 "HANDBUG" ,
134130 "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK" ,
135131 "???" ,
136132 ]:
137- self .assertRaises (KeyError , self .db .lookup , nonexistant )
133+ self .assertRaises (KeyError , self .db .lookup , nonexistent )
138134
139- # TODO: RUSTPYTHON
140- @unittest .expectedFailure
141135 def test_digit (self ):
142136 self .assertEqual (self .db .digit ('A' , None ), None )
143137 self .assertEqual (self .db .digit ('9' ), 9 )
@@ -150,8 +144,6 @@ def test_digit(self):
150144 self .assertRaises (TypeError , self .db .digit , 'xx' )
151145 self .assertRaises (ValueError , self .db .digit , 'x' )
152146
153- # TODO: RUSTPYTHON
154- @unittest .expectedFailure
155147 def test_numeric (self ):
156148 self .assertEqual (self .db .numeric ('A' ,None ), None )
157149 self .assertEqual (self .db .numeric ('9' ), 9 )
@@ -165,8 +157,6 @@ def test_numeric(self):
165157 self .assertRaises (TypeError , self .db .numeric , 'xx' )
166158 self .assertRaises (ValueError , self .db .numeric , 'x' )
167159
168- # TODO: RUSTPYTHON
169- @unittest .expectedFailure
170160 def test_decimal (self ):
171161 self .assertEqual (self .db .decimal ('A' ,None ), None )
172162 self .assertEqual (self .db .decimal ('9' ), 9 )
@@ -189,8 +179,7 @@ def test_category(self):
189179 self .assertRaises (TypeError , self .db .category )
190180 self .assertRaises (TypeError , self .db .category , 'xx' )
191181
192- # TODO: RUSTPYTHON
193- @unittest .expectedFailure
182+ @unittest .expectedFailure # TODO: RUSTPYTHON; - L
194183 def test_bidirectional (self ):
195184 self .assertEqual (self .db .bidirectional ('\uFFFE ' ), '' )
196185 self .assertEqual (self .db .bidirectional (' ' ), 'WS' )
@@ -200,8 +189,6 @@ def test_bidirectional(self):
200189 self .assertRaises (TypeError , self .db .bidirectional )
201190 self .assertRaises (TypeError , self .db .bidirectional , 'xx' )
202191
203- # TODO: RUSTPYTHON
204- @unittest .expectedFailure
205192 def test_decomposition (self ):
206193 self .assertEqual (self .db .decomposition ('\uFFFE ' ),'' )
207194 self .assertEqual (self .db .decomposition ('\u00bc ' ), '<fraction> 0031 2044 0034' )
@@ -218,8 +205,6 @@ def test_mirrored(self):
218205 self .assertRaises (TypeError , self .db .mirrored )
219206 self .assertRaises (TypeError , self .db .mirrored , 'xx' )
220207
221- # TODO: RUSTPYTHON
222- @unittest .expectedFailure
223208 def test_combining (self ):
224209 self .assertEqual (self .db .combining ('\uFFFE ' ), 0 )
225210 self .assertEqual (self .db .combining ('a' ), 0 )
@@ -247,8 +232,7 @@ def test_issue10254(self):
247232 b = 'C\u0338 ' * 20 + '\xC7 '
248233 self .assertEqual (self .db .normalize ('NFC' , a ), b )
249234
250- # TODO: RUSTPYTHON
251- @unittest .expectedFailure
235+ @unittest .expectedFailure # TODO: RUSTPYTHON; ? +
252236 def test_issue29456 (self ):
253237 # Fix #29456
254238 u1176_str_a = '\u1100 \u1176 \u11a8 '
@@ -275,8 +259,7 @@ def test_east_asian_width(self):
275259 self .assertEqual (eaw ('\u2010 ' ), 'A' )
276260 self .assertEqual (eaw ('\U00020000 ' ), 'W' )
277261
278- # TODO: RUSTPYTHON
279- @unittest .expectedFailure
262+ @unittest .expectedFailure # TODO: RUSTPYTHON; + W
280263 def test_east_asian_width_unassigned (self ):
281264 eaw = self .db .east_asian_width
282265 # unassigned
@@ -294,8 +277,7 @@ def test_east_asian_width_unassigned(self):
294277 self .assertEqual (eaw (char ), 'A' )
295278 self .assertIs (self .db .name (char , None ), None )
296279
297- # TODO: RUSTPYTHON
298- @unittest .expectedFailure
280+ @unittest .expectedFailure # TODO: RUSTPYTHON; + N
299281 def test_east_asian_width_9_0_changes (self ):
300282 self .assertEqual (self .db .ucd_3_2_0 .east_asian_width ('\u231a ' ), 'N' )
301283 self .assertEqual (self .db .east_asian_width ('\u231a ' ), 'W' )
@@ -307,8 +289,7 @@ def test_disallow_instantiation(self):
307289 # Ensure that the type disallows instantiation (bpo-43916)
308290 check_disallow_instantiation (self , unicodedata .UCD )
309291
310- # TODO: RUSTPYTHON
311- @unittest .expectedFailure
292+ @unittest .expectedFailure # TODO: RUSTPYTHON; ---
312293 @force_not_colorized
313294 def test_failed_import_during_compiling (self ):
314295 # Issue 4367
@@ -326,8 +307,6 @@ def test_failed_import_during_compiling(self):
326307 "(can't load unicodedata module)"
327308 self .assertIn (error , result .err .decode ("ascii" ))
328309
329- # TODO: RUSTPYTHON
330- @unittest .expectedFailure
331310 def test_decimal_numeric_consistent (self ):
332311 # Test that decimal and numeric are consistent,
333312 # i.e. if a character has a decimal value,
@@ -341,8 +320,6 @@ def test_decimal_numeric_consistent(self):
341320 count += 1
342321 self .assertTrue (count >= 10 ) # should have tested at least the ASCII digits
343322
344- # TODO: RUSTPYTHON
345- @unittest .expectedFailure
346323 def test_digit_numeric_consistent (self ):
347324 # Test that digit and numeric are consistent,
348325 # i.e. if a character has a digit value,
@@ -359,8 +336,7 @@ def test_digit_numeric_consistent(self):
359336 def test_bug_1704793 (self ):
360337 self .assertEqual (self .db .lookup ("GOTHIC LETTER FAIHU" ), '\U00010346 ' )
361338
362- # TODO: RUSTPYTHON
363- @unittest .expectedFailure
339+ @unittest .expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true
364340 def test_ucd_510 (self ):
365341 import unicodedata
366342 # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
@@ -384,8 +360,7 @@ def test_bug_5828(self):
384360 [0 ]
385361 )
386362
387- # TODO: RUSTPYTHON
388- @unittest .expectedFailure
363+ @unittest .expectedFailure # TODO: RUSTPYTHON; + Dž
389364 def test_bug_4971 (self ):
390365 # LETTER DZ WITH CARON: DZ, Dz, dz
391366 self .assertEqual ("\u01c4 " .title (), "\u01c5 " )
@@ -414,7 +389,6 @@ def unistr(data):
414389 data = [int (x , 16 ) for x in data .split (" " )]
415390 return "" .join ([chr (x ) for x in data ])
416391
417- @unittest .expectedFailure # TODO: RUSTPYTHON
418392 @requires_resource ('network' )
419393 @requires_resource ('cpu' )
420394 def test_normalization (self ):
@@ -502,6 +476,29 @@ def test_bug_834676(self):
502476 # Check for bug 834676
503477 unicodedata .normalize ('NFC' , '\ud55c \uae00 ' )
504478
479+ def test_normalize_return_type (self ):
480+ # gh-129569: normalize() return type must always be str
481+ normalize = unicodedata .normalize
482+
483+ class MyStr (str ):
484+ pass
485+
486+ normalization_forms = ("NFC" , "NFKC" , "NFD" , "NFKD" )
487+ input_strings = (
488+ # normalized strings
489+ "" ,
490+ "ascii" ,
491+ # unnormalized strings
492+ "\u1e0b \u0323 " ,
493+ "\u0071 \u0307 \u0323 " ,
494+ )
495+
496+ for form in normalization_forms :
497+ for input_str in input_strings :
498+ with self .subTest (form = form , input_str = input_str ):
499+ self .assertIs (type (normalize (form , input_str )), str )
500+ self .assertIs (type (normalize (form , MyStr (input_str ))), str )
501+
505502
506503if __name__ == "__main__" :
507504 unittest .main ()
0 commit comments