Skip to content

Commit 93d83c1

Browse files
authored
Merge pull request #7114 from youknowone/unicode
Update test_unicodedata from v3.14.2 and implement more
2 parents 516ced9 + 2e45179 commit 93d83c1

5 files changed

Lines changed: 170 additions & 45 deletions

File tree

Lib/test/test_re.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,6 @@ def test_other_escapes(self):
851851
with self.subTest(c):
852852
self.assertRaises(re.PatternError, re.compile, '[\\%c]' % c)
853853

854-
@unittest.expectedFailure # TODO: RUSTPYTHON
855854
def test_named_unicode_escapes(self):
856855
# test individual Unicode named escapes
857856
self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<'))

Lib/test/test_ucn.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ def check_version(testfile):
203203
with self.assertRaises(KeyError):
204204
unicodedata.ucd_3_2_0.lookup(seqname)
205205

206-
@unittest.expectedFailure # TODO: RUSTPYTHON
207206
def test_errors(self):
208207
self.assertRaises(TypeError, unicodedata.name)
209208
self.assertRaises(TypeError, unicodedata.name, 'xx')

Lib/test/test_unicodedata.py

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@
2424
class UnicodeMethodsTest(unittest.TestCase):
2525

2626
# update this, if the database changes
27-
expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e'
27+
expectedchecksum = '9e43ee3929471739680c0e705482b4ae1c4122e4'
2828

29-
# TODO: RUSTPYTHON
30-
@unittest.expectedFailure
29+
@unittest.expectedFailure # TODO: RUSTPYTHON; + 9e43ee3929471739680c0e705482b4ae1c4122e4
3130
@requires_resource('cpu')
3231
def test_method_checksum(self):
3332
h = hashlib.sha1()
@@ -79,10 +78,9 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
7978

8079
# Update this if the database changes. Make sure to do a full rebuild
8180
# (e.g. 'make distclean && make') to get the correct checksum.
82-
expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba'
81+
expectedchecksum = '23ab09ed4abdf93db23b97359108ed630dd8311d'
8382

84-
# TODO: RUSTPYTHON
85-
@unittest.expectedFailure
83+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'unicodedata' has no attribute 'digit'
8684
@requires_resource('cpu')
8785
def test_function_checksum(self):
8886
data = []
@@ -122,22 +120,18 @@ def test_no_names_in_pua(self):
122120
char = chr(i)
123121
self.assertRaises(ValueError, self.db.name, char)
124122

125-
# TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A'
126-
@unittest.expectedFailure
127123
def test_lookup_nonexistant(self):
128124
# just make sure that lookup can fail
129-
for nonexistant in [
125+
for nonexistent in [
130126
"LATIN SMLL LETR A",
131127
"OPEN HANDS SIGHS",
132128
"DREGS",
133129
"HANDBUG",
134130
"MODIFIER LETTER CYRILLIC SMALL QUESTION MARK",
135131
"???",
136132
]:
137-
self.assertRaises(KeyError, self.db.lookup, nonexistant)
133+
self.assertRaises(KeyError, self.db.lookup, nonexistent)
138134

139-
# TODO: RUSTPYTHON
140-
@unittest.expectedFailure
141135
def test_digit(self):
142136
self.assertEqual(self.db.digit('A', None), None)
143137
self.assertEqual(self.db.digit('9'), 9)
@@ -150,8 +144,6 @@ def test_digit(self):
150144
self.assertRaises(TypeError, self.db.digit, 'xx')
151145
self.assertRaises(ValueError, self.db.digit, 'x')
152146

153-
# TODO: RUSTPYTHON
154-
@unittest.expectedFailure
155147
def test_numeric(self):
156148
self.assertEqual(self.db.numeric('A',None), None)
157149
self.assertEqual(self.db.numeric('9'), 9)
@@ -165,8 +157,6 @@ def test_numeric(self):
165157
self.assertRaises(TypeError, self.db.numeric, 'xx')
166158
self.assertRaises(ValueError, self.db.numeric, 'x')
167159

168-
# TODO: RUSTPYTHON
169-
@unittest.expectedFailure
170160
def test_decimal(self):
171161
self.assertEqual(self.db.decimal('A',None), None)
172162
self.assertEqual(self.db.decimal('9'), 9)
@@ -189,8 +179,7 @@ def test_category(self):
189179
self.assertRaises(TypeError, self.db.category)
190180
self.assertRaises(TypeError, self.db.category, 'xx')
191181

192-
# TODO: RUSTPYTHON
193-
@unittest.expectedFailure
182+
@unittest.expectedFailure # TODO: RUSTPYTHON; - L
194183
def test_bidirectional(self):
195184
self.assertEqual(self.db.bidirectional('\uFFFE'), '')
196185
self.assertEqual(self.db.bidirectional(' '), 'WS')
@@ -200,8 +189,6 @@ def test_bidirectional(self):
200189
self.assertRaises(TypeError, self.db.bidirectional)
201190
self.assertRaises(TypeError, self.db.bidirectional, 'xx')
202191

203-
# TODO: RUSTPYTHON
204-
@unittest.expectedFailure
205192
def test_decomposition(self):
206193
self.assertEqual(self.db.decomposition('\uFFFE'),'')
207194
self.assertEqual(self.db.decomposition('\u00bc'), '<fraction> 0031 2044 0034')
@@ -218,8 +205,6 @@ def test_mirrored(self):
218205
self.assertRaises(TypeError, self.db.mirrored)
219206
self.assertRaises(TypeError, self.db.mirrored, 'xx')
220207

221-
# TODO: RUSTPYTHON
222-
@unittest.expectedFailure
223208
def test_combining(self):
224209
self.assertEqual(self.db.combining('\uFFFE'), 0)
225210
self.assertEqual(self.db.combining('a'), 0)
@@ -247,8 +232,7 @@ def test_issue10254(self):
247232
b = 'C\u0338' * 20 + '\xC7'
248233
self.assertEqual(self.db.normalize('NFC', a), b)
249234

250-
# TODO: RUSTPYTHON
251-
@unittest.expectedFailure
235+
@unittest.expectedFailure # TODO: RUSTPYTHON; ? +
252236
def test_issue29456(self):
253237
# Fix #29456
254238
u1176_str_a = '\u1100\u1176\u11a8'
@@ -275,8 +259,7 @@ def test_east_asian_width(self):
275259
self.assertEqual(eaw('\u2010'), 'A')
276260
self.assertEqual(eaw('\U00020000'), 'W')
277261

278-
# TODO: RUSTPYTHON
279-
@unittest.expectedFailure
262+
@unittest.expectedFailure # TODO: RUSTPYTHON; + W
280263
def test_east_asian_width_unassigned(self):
281264
eaw = self.db.east_asian_width
282265
# unassigned
@@ -294,8 +277,7 @@ def test_east_asian_width_unassigned(self):
294277
self.assertEqual(eaw(char), 'A')
295278
self.assertIs(self.db.name(char, None), None)
296279

297-
# TODO: RUSTPYTHON
298-
@unittest.expectedFailure
280+
@unittest.expectedFailure # TODO: RUSTPYTHON; + N
299281
def test_east_asian_width_9_0_changes(self):
300282
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
301283
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
@@ -307,8 +289,7 @@ def test_disallow_instantiation(self):
307289
# Ensure that the type disallows instantiation (bpo-43916)
308290
check_disallow_instantiation(self, unicodedata.UCD)
309291

310-
# TODO: RUSTPYTHON
311-
@unittest.expectedFailure
292+
@unittest.expectedFailure # TODO: RUSTPYTHON; ---
312293
@force_not_colorized
313294
def test_failed_import_during_compiling(self):
314295
# Issue 4367
@@ -326,8 +307,6 @@ def test_failed_import_during_compiling(self):
326307
"(can't load unicodedata module)"
327308
self.assertIn(error, result.err.decode("ascii"))
328309

329-
# TODO: RUSTPYTHON
330-
@unittest.expectedFailure
331310
def test_decimal_numeric_consistent(self):
332311
# Test that decimal and numeric are consistent,
333312
# i.e. if a character has a decimal value,
@@ -341,8 +320,6 @@ def test_decimal_numeric_consistent(self):
341320
count += 1
342321
self.assertTrue(count >= 10) # should have tested at least the ASCII digits
343322

344-
# TODO: RUSTPYTHON
345-
@unittest.expectedFailure
346323
def test_digit_numeric_consistent(self):
347324
# Test that digit and numeric are consistent,
348325
# i.e. if a character has a digit value,
@@ -359,8 +336,7 @@ def test_digit_numeric_consistent(self):
359336
def test_bug_1704793(self):
360337
self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346')
361338

362-
# TODO: RUSTPYTHON
363-
@unittest.expectedFailure
339+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true
364340
def test_ucd_510(self):
365341
import unicodedata
366342
# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
@@ -384,8 +360,7 @@ def test_bug_5828(self):
384360
[0]
385361
)
386362

387-
# TODO: RUSTPYTHON
388-
@unittest.expectedFailure
363+
@unittest.expectedFailure # TODO: RUSTPYTHON; + Dž
389364
def test_bug_4971(self):
390365
# LETTER DZ WITH CARON: DZ, Dz, dz
391366
self.assertEqual("\u01c4".title(), "\u01c5")
@@ -414,7 +389,6 @@ def unistr(data):
414389
data = [int(x, 16) for x in data.split(" ")]
415390
return "".join([chr(x) for x in data])
416391

417-
@unittest.expectedFailure # TODO: RUSTPYTHON
418392
@requires_resource('network')
419393
@requires_resource('cpu')
420394
def test_normalization(self):
@@ -502,6 +476,29 @@ def test_bug_834676(self):
502476
# Check for bug 834676
503477
unicodedata.normalize('NFC', '\ud55c\uae00')
504478

479+
def test_normalize_return_type(self):
480+
# gh-129569: normalize() return type must always be str
481+
normalize = unicodedata.normalize
482+
483+
class MyStr(str):
484+
pass
485+
486+
normalization_forms = ("NFC", "NFKC", "NFD", "NFKD")
487+
input_strings = (
488+
# normalized strings
489+
"",
490+
"ascii",
491+
# unnormalized strings
492+
"\u1e0b\u0323",
493+
"\u0071\u0307\u0323",
494+
)
495+
496+
for form in normalization_forms:
497+
for input_str in input_strings:
498+
with self.subTest(form=form, input_str=input_str):
499+
self.assertIs(type(normalize(form, input_str)), str)
500+
self.assertIs(type(normalize(form, MyStr(input_str))), str)
501+
505502

506503
if __name__ == "__main__":
507504
unittest.main()

Lib/test/test_urlparse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1495,7 +1495,6 @@ def test_all(self):
14951495
expected.append(name)
14961496
self.assertCountEqual(urllib.parse.__all__, expected)
14971497

1498-
@unittest.expectedFailure # TODO: RUSTPYTHON
14991498
def test_urlsplit_normalization(self):
15001499
# Certain characters should never occur in the netloc,
15011500
# including under normalization.

0 commit comments

Comments
 (0)