Skip to content

Commit 0194bb5

Browse files
committed
Fix various utf encodings
1 parent 377dece commit 0194bb5

File tree

11 files changed

+336
-255
lines changed

11 files changed

+336
-255
lines changed

Lib/_pycodecs.py

Lines changed: 309 additions & 231 deletions
Large diffs are not rendered by default.

Lib/test/test_codecs.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,6 @@ def test_badbom(self):
715715
f = codecs.getreader(self.encoding)(s)
716716
self.assertRaises(UnicodeDecodeError, f.read)
717717

718-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
719718
def test_partial(self):
720719
self.check_partial(
721720
"\x00\xff\u0100\uffff\U00010000",
@@ -737,7 +736,6 @@ def test_partial(self):
737736
]
738737
)
739738

740-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
741739
def test_handlers(self):
742740
self.assertEqual(('\ufffd', 1),
743741
codecs.utf_16_decode(b'\x01', 'replace', True))
@@ -781,7 +779,6 @@ def test_invalid_modes(self):
781779
self.assertIn("can't have text and binary mode at once",
782780
str(cm.exception))
783781

784-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
785782
def test_incremental_surrogatepass(self):
786783
return super().test_incremental_surrogatepass()
787784

@@ -791,7 +788,6 @@ class UTF16LETest(ReadTest, unittest.TestCase):
791788
encoding = "utf-16-le"
792789
ill_formed_sequence = b"\x80\xdc"
793790

794-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
795791
def test_partial(self):
796792
self.check_partial(
797793
"\x00\xff\u0100\uffff\U00010000",
@@ -832,7 +828,6 @@ def test_nonbmp(self):
832828
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
833829
"\U00010203")
834830

835-
@unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range
836831
def test_incremental_surrogatepass(self):
837832
return super().test_incremental_surrogatepass()
838833

@@ -841,7 +836,6 @@ class UTF16BETest(ReadTest, unittest.TestCase):
841836
encoding = "utf-16-be"
842837
ill_formed_sequence = b"\xdc\x80"
843838

844-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
845839
def test_partial(self):
846840
self.check_partial(
847841
"\x00\xff\u0100\uffff\U00010000",
@@ -882,7 +876,6 @@ def test_nonbmp(self):
882876
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
883877
"\U00010203")
884878

885-
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data
886879
def test_incremental_surrogatepass(self):
887880
return super().test_incremental_surrogatepass()
888881

@@ -1010,7 +1003,6 @@ def test_ascii(self):
10101003
b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
10111004
b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')
10121005

1013-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
10141006
def test_partial(self):
10151007
self.check_partial(
10161008
'a+-b\x00c\x80d\u0100e\U00010000f',
@@ -1115,11 +1107,9 @@ def test_lone_surrogates(self):
11151107
with self.subTest(raw=raw):
11161108
self.assertEqual(raw.decode('utf-7', 'replace'), expected)
11171109

1118-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1
11191110
def test_readline(self):
11201111
return super().test_readline()
11211112

1122-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: utf_7_decode() takes from 1 to 2 positional arguments but 3 were given
11231113
def test_incremental_surrogatepass(self):
11241114
return super().test_incremental_surrogatepass()
11251115

@@ -3475,7 +3465,7 @@ def check_encode(self, cp, tests):
34753465
self.assertRaises(UnicodeEncodeError,
34763466
text.encode, f'cp{cp}', errors)
34773467

3478-
@expectedFailure # TODO: RUSTPYTHON
3468+
@unittest.expectedFailure # TODO: RUSTPYTHON
34793469
def test_cp932(self):
34803470
self.check_encode(932, (
34813471
('abc', 'strict', b'abc'),
@@ -3583,6 +3573,7 @@ def test_cp20106(self):
35833573
(b'(\xbf)', 'surrogatepass', None),
35843574
))
35853575

3576+
@unittest.expectedFailure # TODO: RUSTPYTHON # TODO: RUSTPYTHON
35863577
def test_cp_utf7(self):
35873578
cp = 65000
35883579
self.check_encode(cp, (

Lib/test/test_fileinput.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -980,8 +980,6 @@ def check(errors, expected_lines):
980980
check('replace', ['\ufffdabc'])
981981
check('backslashreplace', ['\\x80abc'])
982982

983-
# TODO: RUSTPYTHON
984-
@unittest.expectedFailure
985983
def test_modes(self):
986984
with open(TESTFN, 'wb') as f:
987985
# UTF-7 is a convenient, seldom used encoding

Lib/test/test_io.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3815,6 +3815,7 @@ def __del__(self):
38153815
""".format(iomod=iomod, kwargs=kwargs)
38163816
return assert_python_ok("-c", code)
38173817

3818+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
38183819
def test_create_at_shutdown_without_encoding(self):
38193820
rc, out, err = self._check_create_at_shutdown()
38203821
if err:
@@ -3824,6 +3825,7 @@ def test_create_at_shutdown_without_encoding(self):
38243825
else:
38253826
self.assertEqual("ok", out.decode().strip())
38263827

3828+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
38273829
def test_create_at_shutdown_with_encoding(self):
38283830
rc, out, err = self._check_create_at_shutdown(encoding='utf-8',
38293831
errors='strict')

Lib/test/test_logging.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5165,6 +5165,7 @@ def __init__(self, name='MyLogger', level=logging.NOTSET):
51655165
h.close()
51665166
logging.setLoggerClass(logging.Logger)
51675167

5168+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
51685169
def test_logging_at_shutdown(self):
51695170
# bpo-20037: Doing text I/O late at interpreter shutdown must not crash
51705171
code = textwrap.dedent("""
@@ -5184,6 +5185,7 @@ def __del__(self):
51845185
self.assertIn("exception in __del__", err)
51855186
self.assertIn("ValueError: some error", err)
51865187

5188+
@unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError during module teardown in __del__
51875189
def test_logging_at_shutdown_open(self):
51885190
# bpo-26789: FileHandler keeps a reference to the builtin open()
51895191
# function to be able to open or reopen the file during Python

Lib/test/test_plistlib.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,6 @@ def test_non_bmp_characters(self):
752752
data = plistlib.dumps(pl, fmt=fmt)
753753
self.assertEqual(plistlib.loads(data), pl)
754754

755-
@unittest.expectedFailure # TODO: RUSTPYTHON
756755
def test_lone_surrogates(self):
757756
for fmt in ALL_FORMATS:
758757
with self.subTest(fmt=fmt):

Lib/test/test_str.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2287,7 +2287,6 @@ def test_codecs_errors(self):
22872287
self.assertRaises(ValueError, complex, "\ud800")
22882288
self.assertRaises(ValueError, complex, "\udf00")
22892289

2290-
@unittest.expectedFailure # TODO: RUSTPYTHON
22912290
def test_codecs(self):
22922291
# Encoding
22932292
self.assertEqual('hello'.encode('ascii'), b'hello')

Lib/test/test_tarfile.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,8 +1988,6 @@ class UnicodeTest:
19881988
def test_iso8859_1_filename(self):
19891989
self._test_unicode_filename("iso8859-1")
19901990

1991-
# TODO: RUSTPYTHON
1992-
@unittest.expectedFailure
19931991
def test_utf7_filename(self):
19941992
self._test_unicode_filename("utf7")
19951993

@@ -2182,6 +2180,10 @@ def test_binary_header(self):
21822180
except KeyError:
21832181
self.fail("unable to read POSIX.1-2008 binary header")
21842182

2183+
@unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-7' codec can't decode byte 0x2f in position 1: unexpected special character
2184+
def test_utf7_filename(self):
2185+
return super().test_utf7_filename()
2186+
21852187

21862188
class AppendTestBase:
21872189
# Test append mode (cp. patch #1652681).
@@ -2416,8 +2418,7 @@ def test__all__(self):
24162418
'SubsequentHeaderError', 'ExFileObject', 'main'}
24172419
support.check__all__(self, tarfile, not_exported=not_exported)
24182420

2419-
# TODO: RUSTPYTHON
2420-
@unittest.expectedFailure
2421+
@unittest.expectedFailure # TODO: RUSTPYTHON; FileNotFoundError: [Errno 2] No such file or directory: '/Users/al03219714/Projects/RustPython3/crates/pylib/Lib/test/testtar.tar.xz'
24212422
def test_useful_error_message_when_modules_missing(self):
24222423
fname = os.path.join(os.path.dirname(__file__), 'testtar.tar.xz')
24232424
with self.assertRaises(tarfile.ReadError) as excinfo:

Lib/test/test_utf8_mode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_posix_locale(self):
4646
out = self.get_output('-c', code, LC_ALL=loc)
4747
self.assertEqual(out, '1')
4848

49+
@unittest.expectedFailureIf(MS_WINDOWS, "TODO: RUSTPYTHON")
4950
def test_xoption(self):
5051
code = 'import sys; print(sys.flags.utf8_mode)'
5152

Lib/test/test_weakref.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ def pop_and_collect(lst):
20452045
if exc:
20462046
raise exc[0]
20472047

2048+
@unittest.skip("TODO: RUSTPYTHON; occasionally crash (malloc corruption)")
20482049
@threading_helper.requires_working_threading()
20492050
@support.requires_resource('cpu')
20502051
def test_threaded_weak_key_dict_copy(self):

0 commit comments

Comments
 (0)