Skip to content

Commit 856c6f1

Browse files
committed
Fix various utf encodings
1 parent 377dece commit 856c6f1

File tree

11 files changed

+352
-272
lines changed

11 files changed

+352
-272
lines changed

Lib/_pycodecs.py

Lines changed: 309 additions & 231 deletions
Large diffs are not rendered by default.

Lib/test/test_codecs.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,6 @@ def test_badbom(self):
715715
f = codecs.getreader(self.encoding)(s)
716716
self.assertRaises(UnicodeDecodeError, f.read)
717717

718-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
719718
def test_partial(self):
720719
self.check_partial(
721720
"\x00\xff\u0100\uffff\U00010000",
@@ -737,7 +736,6 @@ def test_partial(self):
737736
]
738737
)
739738

740-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
741739
def test_handlers(self):
742740
self.assertEqual(('\ufffd', 1),
743741
codecs.utf_16_decode(b'\x01', 'replace', True))
@@ -781,7 +779,6 @@ def test_invalid_modes(self):
781779
self.assertIn("can't have text and binary mode at once",
782780
str(cm.exception))
783781

784-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
785782
def test_incremental_surrogatepass(self):
786783
return super().test_incremental_surrogatepass()
787784

@@ -791,7 +788,6 @@ class UTF16LETest(ReadTest, unittest.TestCase):
791788
encoding = "utf-16-le"
792789
ill_formed_sequence = b"\x80\xdc"
793790

794-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
795791
def test_partial(self):
796792
self.check_partial(
797793
"\x00\xff\u0100\uffff\U00010000",
@@ -832,7 +828,6 @@ def test_nonbmp(self):
832828
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
833829
"\U00010203")
834830

835-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
836831
def test_incremental_surrogatepass(self):
837832
return super().test_incremental_surrogatepass()
838833

@@ -841,7 +836,6 @@ class UTF16BETest(ReadTest, unittest.TestCase):
841836
encoding = "utf-16-be"
842837
ill_formed_sequence = b"\xdc\x80"
843838

844-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
845839
def test_partial(self):
846840
self.check_partial(
847841
"\x00\xff\u0100\uffff\U00010000",
@@ -882,7 +876,6 @@ def test_nonbmp(self):
882876
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
883877
"\U00010203")
884878

885-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
886879
def test_incremental_surrogatepass(self):
887880
return super().test_incremental_surrogatepass()
888881

@@ -1010,7 +1003,6 @@ def test_ascii(self):
10101003
b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
10111004
b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')
10121005

1013-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
10141006
def test_partial(self):
10151007
self.check_partial(
10161008
'a+-b\x00c\x80d\u0100e\U00010000f',
@@ -1115,11 +1107,9 @@ def test_lone_surrogates(self):
11151107
with self.subTest(raw=raw):
11161108
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
11171109

1118-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
11191110
def test_readline(self):
11201111
return super().test_readline()
11211112

1122-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: utf_7_decode() takes from 1 to 2 positional arguments but 3 were given
11231113
def test_incremental_surrogatepass(self):
11241114
return super().test_incremental_surrogatepass()
11251115

@@ -3409,6 +3399,7 @@ def test_invalid_code_page(self):
34093399
self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
34103400
self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')
34113401

3402+
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
34123403
def test_code_page_name(self):
34133404
self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
34143405
codecs.code_page_encode, 932, '\xff')
@@ -3475,7 +3466,7 @@ def check_encode(self, cp, tests):
34753466
self.assertRaises(UnicodeEncodeError,
34763467
text.encode, f'cp{cp}', errors)
34773468

3478-
@expectedFailure # TODO: RUSTPYTHON
3469+
@unittest.expectedFailure # TODO: RUSTPYTHON
34793470
def test_cp932(self):
34803471
self.check_encode(932, (
34813472
('abc', 'strict', b'abc'),
@@ -3510,6 +3501,7 @@ def test_cp932(self):
35103501
(b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
35113502
))
35123503

3504+
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
35133505
def test_cp1252(self):
35143506
self.check_encode(1252, (
35153507
('abc', 'strict', b'abc'),
@@ -3583,6 +3575,7 @@ def test_cp20106(self):
35833575
(b'(\xbf)', 'surrogatepass', None),
35843576
))
35853577

3578+
@unittest.expectedFailure # TODO: RUSTPYTHON # TODO: RUSTPYTHON
35863579
def test_cp_utf7(self):
35873580
cp = 65000
35883581
self.check_encode(cp, (
@@ -3603,6 +3596,7 @@ def test_cp_utf7(self):
36033596
(b'[\xff]', 'strict', '[\xff]'),
36043597
))
36053598

3599+
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
36063600
def test_multibyte_encoding(self):
36073601
self.check_decode(932, (
36083602
(b'\x84\xe9\x80', 'ignore', '\u9a3e'),
@@ -3636,6 +3630,7 @@ def test_code_page_decode_flags(self):
36363630
self.assertEqual(codecs.code_page_decode(42, b'abc'),
36373631
('\uf061\uf062\uf063', 3))
36383632

3633+
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
36393634
def test_incremental(self):
36403635
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
36413636
self.assertEqual(decoded, ('', 0))

Lib/test/test_fileinput.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -980,8 +980,6 @@ def check(errors, expected_lines):
980980
check('replace', ['\ufffdabc'])
981981
check('backslashreplace', ['\\x80abc'])
982982

983-
# TODO: RUSTPYTHON
984-
@unittest.expectedFailure
985983
def test_modes(self):
986984
with open(TESTFN, 'wb') as f:
987985
# UTF-7 is a convenient, seldom used encoding

Lib/test/test_io.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3815,6 +3815,7 @@ def __del__(self):
38153815
""".format(iomod=iomod, kwargs=kwargs)
38163816
return assert_python_ok("-c", code)
38173817

3818+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
38183819
def test_create_at_shutdown_without_encoding(self):
38193820
rc, out, err = self._check_create_at_shutdown()
38203821
if err:
@@ -3824,6 +3825,7 @@ def test_create_at_shutdown_without_encoding(self):
38243825
else:
38253826
self.assertEqual("ok", out.decode().strip())
38263827

3828+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
38273829
def test_create_at_shutdown_with_encoding(self):
38283830
rc, out, err = self._check_create_at_shutdown(encoding='utf-8',
38293831
errors='strict')

Lib/test/test_logging.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5165,6 +5165,7 @@ def __init__(self, name='MyLogger', level=logging.NOTSET):
51655165
h.close()
51665166
logging.setLoggerClass(logging.Logger)
51675167

5168+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
51685169
def test_logging_at_shutdown(self):
51695170
# bpo-20037: Doing text I/O late at interpreter shutdown must not crash
51705171
code = textwrap.dedent("""
@@ -5184,6 +5185,7 @@ def __del__(self):
51845185
self.assertIn("exception in __del__", err)
51855186
self.assertIn("ValueError: some error", err)
51865187

5188+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
51875189
def test_logging_at_shutdown_open(self):
51885190
# bpo-26789: FileHandler keeps a reference to the builtin open()
51895191
# function to be able to open or reopen the file during Python

Lib/test/test_plistlib.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,6 @@ def test_non_bmp_characters(self):
752752
data = plistlib.dumps(pl, fmt=fmt)
753753
self.assertEqual(plistlib.loads(data), pl)
754754

755-
@unittest.expectedFailure # TODO: RUSTPYTHON
756755
def test_lone_surrogates(self):
757756
for fmt in ALL_FORMATS:
758757
with self.subTest(fmt=fmt):

Lib/test/test_str.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def test_literals(self):
112112
# raw strings should not have unicode escapes
113113
self.assertNotEqual(r"\u0020", " ")
114114

115-
@unittest.expectedFailure # TODO: RUSTPYTHON
115+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: <class 'str'> is not <class 'test.test_str.StrSubclass'>
116116
def test_ascii(self):
117117
self.assertEqual(ascii('abc'), "'abc'")
118118
self.assertEqual(ascii('ab\\c'), "'ab\\\\c'")
@@ -793,7 +793,7 @@ def test_isdecimal(self):
793793
for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
794794
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))
795795

796-
@unittest.expectedFailure # TODO: RUSTPYTHON
796+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False != True
797797
def test_isdigit(self):
798798
super().test_isdigit()
799799
self.checkequalnofix(True, '\u2460', 'isdigit')
@@ -939,7 +939,7 @@ def test_upper(self):
939939
self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
940940
self.assertEqual('\u2177'.upper(), '\u2167')
941941

942-
@unittest.expectedFailure # TODO: RUSTPYTHON
942+
@unittest.expectedFailure # TODO: RUSTPYTHON; ? ^
943943
def test_capitalize(self):
944944
string_tests.StringLikeTest.test_capitalize(self)
945945
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
@@ -957,7 +957,7 @@ def test_capitalize(self):
957957
self.assertEqual('finnish'.capitalize(), 'Finnish')
958958
self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
959959

960-
@unittest.expectedFailure # TODO: RUSTPYTHON
960+
@unittest.expectedFailure # TODO: RUSTPYTHON; ? ^
961961
def test_title(self):
962962
super().test_title()
963963
self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -975,7 +975,7 @@ def test_title(self):
975975
self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
976976
self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
977977

978-
@unittest.expectedFailure # TODO: RUSTPYTHON
978+
@unittest.expectedFailure # TODO: RUSTPYTHON; + 𐐧
979979
def test_swapcase(self):
980980
string_tests.StringLikeTest.test_swapcase(self)
981981
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
@@ -1075,7 +1075,7 @@ def test_issue18183(self):
10751075
'\U00100000'.ljust(3, '\U00010000')
10761076
'\U00100000'.rjust(3, '\U00010000')
10771077

1078-
@unittest.expectedFailure # TODO: RUSTPYTHON
1078+
@unittest.expectedFailure # TODO: RUSTPYTHON; ? +
10791079
def test_format(self):
10801080
self.assertEqual(''.format(), '')
10811081
self.assertEqual('a'.format(), 'a')
@@ -1464,13 +1464,13 @@ def test_format_huge_precision(self):
14641464
with self.assertRaises(ValueError):
14651465
result = format(2.34, format_string)
14661466

1467-
@unittest.expectedFailure # TODO: RUSTPYTHON
1467+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised
14681468
def test_format_huge_width(self):
14691469
format_string = "{}f".format(sys.maxsize + 1)
14701470
with self.assertRaises(ValueError):
14711471
result = format(2.34, format_string)
14721472

1473-
@unittest.expectedFailure # TODO: RUSTPYTHON
1473+
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: tuple index out of range
14741474
def test_format_huge_item_number(self):
14751475
format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
14761476
with self.assertRaises(ValueError):
@@ -1506,7 +1506,7 @@ def __format__(self, spec):
15061506
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
15071507
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
15081508

1509-
@unittest.expectedFailure # TODO: RUSTPYTHON
1509+
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: %x format: an integer is required, not PseudoInt
15101510
def test_formatting(self):
15111511
string_tests.StringLikeTest.test_formatting(self)
15121512
# Testing Unicode formatting strings...
@@ -1755,7 +1755,7 @@ def __str__(self):
17551755
'character buffers are decoded to unicode'
17561756
)
17571757

1758-
@unittest.expectedFailure # TODO: RUSTPYTHON
1758+
@unittest.expectedFailure # TODO: RUSTPYTHON; Pass various keyword argument combinations to the constructor.
17591759
def test_constructor_keyword_args(self):
17601760
"""Pass various keyword argument combinations to the constructor."""
17611761
# The object argument can be passed as a keyword.
@@ -1765,7 +1765,7 @@ def test_constructor_keyword_args(self):
17651765
self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
17661766
self.assertEqual(str(object=b'foo', errors='strict'), 'foo')
17671767

1768-
@unittest.expectedFailure # TODO: RUSTPYTHON
1768+
@unittest.expectedFailure # TODO: RUSTPYTHON; Check the constructor argument defaults.
17691769
def test_constructor_defaults(self):
17701770
"""Check the constructor argument defaults."""
17711771
# The object argument defaults to '' or b''.
@@ -1777,7 +1777,6 @@ def test_constructor_defaults(self):
17771777
# The errors argument defaults to strict.
17781778
self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii')
17791779

1780-
@unittest.expectedFailure # TODO: RUSTPYTHON
17811780
def test_codecs_utf7(self):
17821781
utfTests = [
17831782
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
@@ -2287,7 +2286,6 @@ def test_codecs_errors(self):
22872286
self.assertRaises(ValueError, complex, "\ud800")
22882287
self.assertRaises(ValueError, complex, "\udf00")
22892288

2290-
@unittest.expectedFailure # TODO: RUSTPYTHON
22912289
def test_codecs(self):
22922290
# Encoding
22932291
self.assertEqual('hello'.encode('ascii'), b'hello')
@@ -2417,7 +2415,7 @@ def test_ucs4(self):
24172415
else:
24182416
self.fail("Should have raised UnicodeDecodeError")
24192417

2420-
@unittest.expectedFailure # TODO: RUSTPYTHON
2418+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: <class 'str'> is not <class 'test.test_str.StrSubclass'>
24212419
def test_conversion(self):
24222420
# Make sure __str__() works properly
24232421
class StrWithStr(str):
@@ -2476,7 +2474,7 @@ def test_expandtabs_optimization(self):
24762474
s = 'abc'
24772475
self.assertIs(s.expandtabs(), s)
24782476

2479-
@unittest.expectedFailure # TODO: RUSTPYTHON
2477+
@unittest.expectedFailure # TODO: RUSTPYTHON
24802478
def test_raiseMemError(self):
24812479
asciifields = "nnb"
24822480
compactfields = asciifields + "nP"
@@ -2616,12 +2614,12 @@ def test_compare(self):
26162614
self.assertTrue(astral >= bmp2)
26172615
self.assertFalse(astral >= astral2)
26182616

2619-
@unittest.expectedFailure # TODO: RUSTPYTHON
2617+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False is not true
26202618
def test_free_after_iterating(self):
26212619
support.check_free_after_iterating(self, iter, str)
26222620
support.check_free_after_iterating(self, reversed, str)
26232621

2624-
@unittest.expectedFailure # TODO: RUSTPYTHON
2622+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 22 != 10 : _PythonRunResult(rc=22, out=b'', err=b'')
26252623
def test_check_encoding_errors(self):
26262624
# bpo-37388: str(bytes) and str.decode() must check encoding and errors
26272625
# arguments in dev mode
@@ -2682,7 +2680,7 @@ def test_check_encoding_errors(self):
26822680
proc = assert_python_failure('-X', 'dev', '-c', code)
26832681
self.assertEqual(proc.rc, 10, proc)
26842682

2685-
@unittest.expectedFailure # TODO: RUSTPYTHON
2683+
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "str expected at most 3 arguments, got 4" does not match "expected at most 3 arguments, got 4"
26862684
def test_str_invalid_call(self):
26872685
# too many args
26882686
with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"):

Lib/test/test_tarfile.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,8 +1988,6 @@ class UnicodeTest:
19881988
def test_iso8859_1_filename(self):
19891989
self._test_unicode_filename("iso8859-1")
19901990

1991-
# TODO: RUSTPYTHON
1992-
@unittest.expectedFailure
19931991
def test_utf7_filename(self):
19941992
self._test_unicode_filename("utf7")
19951993

@@ -2416,8 +2414,7 @@ def test__all__(self):
24162414
'SubsequentHeaderError', 'ExFileObject', 'main'}
24172415
support.check__all__(self, tarfile, not_exported=not_exported)
24182416

2419-
# TODO: RUSTPYTHON
2420-
@unittest.expectedFailure
2417+
@unittest.expectedFailure # TODO: RUSTPYTHON; FileNotFoundError: [Errno 2] No such file or directory: '/Users/al03219714/Projects/RustPython3/crates/pylib/Lib/test/testtar.tar.xz'
24212418
def test_useful_error_message_when_modules_missing(self):
24222419
fname = os.path.join(os.path.dirname(__file__), 'testtar.tar.xz')
24232420
with self.assertRaises(tarfile.ReadError) as excinfo:

Lib/test/test_utf8_mode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_posix_locale(self):
4646
out = self.get_output('-c', code, LC_ALL=loc)
4747
self.assertEqual(out, '1')
4848

49+
@unittest.expectedFailureIf(MS_WINDOWS, "TODO: RUSTPYTHON")
4950
def test_xoption(self):
5051
code = 'import sys; print(sys.flags.utf8_mode)'
5152

Lib/test/test_weakref.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ def pop_and_collect(lst):
20452045
if exc:
20462046
raise exc[0]
20472047

2048+
@unittest.skip("TODO: RUSTPYTHON; occasionally crash (malloc corruption)")
20482049
@threading_helper.requires_working_threading()
20492050
@support.requires_resource('cpu')
20502051
def test_threaded_weak_key_dict_copy(self):

0 commit comments

Comments
 (0)