From 30a9cbdbaee8d10b1eaa761ef217add32747d635 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:02:03 +0300 Subject: [PATCH 1/5] Update more tests --- Lib/test/support/__init__.py | 15 ++ Lib/test/test_bytes.py | 7 +- Lib/test/test_bz2.py | 15 ++ Lib/test/test_cmd_line.py | 2 + Lib/test/test_deque.py | 16 ++ Lib/test/test_genericalias.py | 13 +- Lib/test/test_grp.py | 90 ++++----- Lib/test/test_imaplib.py | 10 + Lib/test/test_import/__init__.py | 9 + .../data/circular_imports/subpkg2/__init__.py | 1 + .../test_import/data/package/submodule.py | 1 + .../test_import/data/package2/submodule2.py | 1 + Lib/test/test_import/data/unwritable/x.py | 1 + Lib/test/test_listcomps.py | 31 +++ Lib/test/test_memoryio.py | 19 ++ Lib/test/test_mimetypes.py | 57 +++++- Lib/test/test_pwd.py | 94 ++++----- Lib/test/test_pyexpat.py | 188 +++++++++++++++++- 18 files changed, 463 insertions(+), 107 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 6635ec3474e..701d34bba2d 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3227,3 +3227,18 @@ def control_characters_c0() -> list[str]: C0 control characters defined as the byte range 0x00-0x1F, and 0x7F. """ return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"] + + +STATUS_DLL_INIT_FAILED = 0xC0000142 +def skip_on_low_desktop_heap_memory_subprocess(returncode): + if sys.platform not in ('win32', 'cygwin'): + return + # On Windows, STATUS_DLL_INIT_FAILED is a generic error code that could + # come from any of the DLLs being loaded when a new Python process is + # created. In practice, it's likely a memory allocation failure in the + # desktop heap memory which caused the DLL init failure, especially on + # process created with CREATE_NEW_CONSOLE creation flag. See the article: + # https://learn.microsoft.com/en-us/troubleshoot/windows-server/performance/desktop-heap-limitation-out-of-memory + if returncode == STATUS_DLL_INIT_FAILED: + raise unittest.SkipTest('gh-150436: DLL init failed, likely because ' + 'of low desktop heap memory') diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index a72bc03c329..70af9af466d 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -2622,10 +2622,6 @@ def iconcat(b, a): # MODIFIES! b.wait() a += c - def irepeat(b, a): # MODIFIES! - b.wait() - a *= 2 - def subscript(b, a): b.wait() try: assert a[0] != 0xdd @@ -2747,9 +2743,10 @@ def check(funcs, a=None, *args): check([clear] + [repeat] * 10) check([clear] + [iconcat] * 10) - check([clear] + [irepeat] * 10) check([clear] + [ass_subscript] * 10) check([clear] + [repr_] * 10) + # gh-148605: Do not test "a *= 2" since it allocates up to 4 GiB using + # 10 threads # value errors diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 07db98ed51f..610cc45b968 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1033,6 +1033,21 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) + with self.assertRaisesRegex(ValueError, "previous error"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 8a0831ee45d..50f6c7572c1 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -1014,6 +1014,7 @@ def test_python_legacy_windows_stdio(self): p = subprocess.run([sys.executable, "-c", code], creationflags=subprocess.CREATE_NEW_CONSOLE, env=env) + support.skip_on_low_desktop_heap_memory_subprocess(p.returncode) self.assertEqual(p.returncode, 0) # Then test that FIleIO is used when PYTHONLEGACYWINDOWSSTDIO is set. @@ -1022,6 +1023,7 @@ def test_python_legacy_windows_stdio(self): p = subprocess.run([sys.executable, "-c", code], creationflags=subprocess.CREATE_NEW_CONSOLE, env=env) + support.skip_on_low_desktop_heap_memory_subprocess(p.returncode) self.assertEqual(p.returncode, 0) @unittest.expectedFailure # TODO: RUSTPYTHON diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py index d4b42c0bd55..908713f2806 100644 --- a/Lib/test/test_deque.py +++ b/Lib/test/test_deque.py @@ -287,6 +287,22 @@ def test_index(self): else: self.assertEqual(d.index(element, start, stop), target) + # Test stop argument + for elem in d: + index = d.index(elem) + self.assertEqual( + index, + d.index(elem, 0), + ) + self.assertEqual( + index, + d.index(elem, 0, len(d)), + ) + self.assertEqual( + index, + d.index(elem, 0, len(d) + 100), + ) + # Test large start argument d = deque(range(0, 10000, 10)) for step in range(100): diff --git a/Lib/test/test_genericalias.py b/Lib/test/test_genericalias.py index 4f5b10650ac..121d169dd07 100644 --- a/Lib/test/test_genericalias.py +++ b/Lib/test/test_genericalias.py @@ -55,15 +55,14 @@ from unittest.case import _AssertRaisesContext from queue import Queue, SimpleQueue from weakref import WeakSet, ReferenceType, ref -import typing -from typing import Unpack try: from tkinter import Event except ImportError: Event = None from string.templatelib import Template, Interpolation -from typing import TypeVar +import typing +from typing import TypeVar, Unpack T = TypeVar('T') K = TypeVar('K') V = TypeVar('V') @@ -619,6 +618,14 @@ def test_nested_paramspec_specialization(self): self.assertEqual(deeply_nested_specialized.__args__, ([str, [float], int], float)) self.assertEqual(deeply_nested_specialized.__parameters__, ()) + def test_gh150146(self): + # It used to crash: + for container in [memoryview, list, tuple]: + with self.subTest(container=container): + x = container[TypeVar("")] + with self.assertRaises(TypeError): + x[*typing.Mapping[..., ...]] + class TypeIterationTests(unittest.TestCase): _UNITERABLE_TYPES = (list, tuple) diff --git a/Lib/test/test_grp.py b/Lib/test/test_grp.py index e52e17b8dc7..ed86802f069 100644 --- a/Lib/test/test_grp.py +++ b/Lib/test/test_grp.py @@ -1,5 +1,7 @@ """Test script for the grp module.""" +import random +import string import unittest from test.support import import_helper @@ -50,61 +52,51 @@ def test_values_extended(self): def test_errors(self): self.assertRaises(TypeError, grp.getgrgid) self.assertRaises(TypeError, grp.getgrgid, 3.14) + self.assertRaises(TypeError, grp.getgrgid, 0.0) + self.assertRaises(TypeError, grp.getgrgid, 0, 0) + # should be out of gid_t range + self.assertRaises(OverflowError, grp.getgrgid, 2**128) + self.assertRaises(OverflowError, grp.getgrgid, -2**128) self.assertRaises(TypeError, grp.getgrnam) self.assertRaises(TypeError, grp.getgrnam, 42) - self.assertRaises(TypeError, grp.getgrall, 42) + self.assertRaises(TypeError, grp.getgrnam, b'root') + self.assertRaises(TypeError, grp.getgrnam, 'root', 0) # embedded null character self.assertRaisesRegex(ValueError, 'null', grp.getgrnam, 'a\x00b') + self.assertRaisesRegex(ValueError, 'null', grp.getgrnam, 'root\x00') + self.assertRaises(UnicodeEncodeError, grp.getgrnam, 'roo\udc74') + self.assertRaises(KeyError, grp.getgrnam, '') + self.assertRaises(TypeError, grp.getgrall, 42) - # try to get some errors - bynames = {} - bygids = {} - for (n, p, g, mem) in grp.getgrall(): - if not n or n == '+': - continue # skip NIS entries etc. - bynames[n] = g - bygids[g] = n - - allnames = list(bynames.keys()) - namei = 0 - fakename = allnames[namei] - while fakename in bynames: - chars = list(fakename) - for i in range(len(chars)): - if chars[i] == 'z': - chars[i] = 'A' - break - elif chars[i] == 'Z': - continue + # Find a non-existent group name. + # getgrall() will not necessarily report all existing groups + # (typical for LDAP based directories in big organizations). + for _ in range(30): + fakename = ''.join(random.choices(string.ascii_lowercase, k=6)) + try: + grp.getgrnam(fakename) + except KeyError: + break + else: + self.fail('Cannot find non-existent group name') + + # Find a non-existent gid. + maxgid = 2**31 + for _ in range(30): + fakegid = random.randrange(maxgid) + try: + grp.getgrgid(fakegid) + except KeyError: + break + except OverflowError: + if maxgid == 2**31: + maxgid = 2**16-1 + elif maxgid == 2**16-1: + maxgid = 2**15 else: - chars[i] = chr(ord(chars[i]) + 1) - break - else: - namei = namei + 1 - try: - fakename = allnames[namei] - except IndexError: - # should never happen... if so, just forget it - break - fakename = ''.join(chars) - - self.assertRaises(KeyError, grp.getgrnam, fakename) - - # Choose a non-existent gid. - fakegid = 4127 - while fakegid in bygids: - fakegid = (fakegid * 3) % 0x10000 - - self.assertRaises(KeyError, grp.getgrgid, fakegid) - - def test_noninteger_gid(self): - entries = grp.getgrall() - if not entries: - self.skipTest('no groups') - # Choose an existent gid. - gid = entries[0][2] - self.assertRaises(TypeError, grp.getgrgid, float(gid)) - self.assertRaises(TypeError, grp.getgrgid, str(gid)) + raise + else: + self.fail('Cannot find non-existent gid') if __name__ == "__main__": diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 9155a43a06e..f20120618cf 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -435,6 +435,16 @@ def cmd_AUTHENTICATE(self, tag, args): r'\[AUTHENTICATIONFAILED\] invalid'): client.authenticate('MYAUTH', lambda x: b'fake') + def test_invalid_login(self): + class MyServer(SimpleIMAPHandler): + def cmd_LOGIN(self, tag, args): + self.server.logged = args[0] + self._send_tagged(tag, 'NO', '[LOGIN] failed') + client, _ = self._setup(MyServer) + with self.assertRaisesRegex(imaplib.IMAP4.error, + r'\[LOGIN\] failed'): + client.login('user', 'wrongpass') + def test_valid_authentication_bytes(self): class MyServer(SimpleIMAPHandler): def cmd_AUTHENTICATE(self, tag, args): diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 60413aa7629..6791675800d 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -360,6 +360,15 @@ def test_import_raises_ModuleNotFoundError(self): with self.assertRaises(ModuleNotFoundError): import something_that_should_not_exist_anywhere + def test_import_null_byte_in_name_raises_ModuleNotFoundError(self): + # gh-150633: module names containing null bytes should not + # lead to duplicates in sys.modules + before = set(sys.modules.keys()) + with self.assertRaises(ModuleNotFoundError): + __import__('zipimport\x00junk') + + self.assertEqual(set(sys.modules.keys()), before) + def test_from_import_missing_module_raises_ModuleNotFoundError(self): with self.assertRaises(ModuleNotFoundError): from something_that_should_not_exist_anywhere import blah diff --git a/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py b/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py index e69de29bb2d..8b137891791 100644 --- a/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py +++ b/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py @@ -0,0 +1 @@ + diff --git a/Lib/test/test_import/data/package/submodule.py b/Lib/test/test_import/data/package/submodule.py index e69de29bb2d..8b137891791 100644 --- a/Lib/test/test_import/data/package/submodule.py +++ b/Lib/test/test_import/data/package/submodule.py @@ -0,0 +1 @@ + diff --git a/Lib/test/test_import/data/package2/submodule2.py b/Lib/test/test_import/data/package2/submodule2.py index e69de29bb2d..8b137891791 100644 --- a/Lib/test/test_import/data/package2/submodule2.py +++ b/Lib/test/test_import/data/package2/submodule2.py @@ -0,0 +1 @@ + diff --git a/Lib/test/test_import/data/unwritable/x.py b/Lib/test/test_import/data/unwritable/x.py index e69de29bb2d..8b137891791 100644 --- a/Lib/test/test_import/data/unwritable/x.py +++ b/Lib/test/test_import/data/unwritable/x.py @@ -0,0 +1 @@ + diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index 47c27bc3faa..e76169c69df 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -171,6 +171,17 @@ def test_references___class__(self): """ self._check_in_scopes(code, raises=NameError) + def test_references___class___nested(self): + code = """ + res = [(lambda: __class__)() for _ in [1]] + """ + self._check_in_scopes(code, raises=NameError) + + def test_references___class___nested_used(self): + class _C: + res = [lambda: __class__ for _ in [1]] + self.assertIs(_C.res[0](), _C) + def test_references___class___defined(self): code = """ __class__ = 2 @@ -180,18 +191,38 @@ def test_references___class___defined(self): code, outputs={"res": [2]}, scopes=["module", "function"]) self._check_in_scopes(code, raises=NameError, scopes=["class"]) + def test_references___class___defined_nested(self): + code = """ + __class__ = 2 + res = [(lambda: __class__)() for x in [1]] + """ + self._check_in_scopes( + code, outputs={"res": [2]}, scopes=["module", "function"]) + self._check_in_scopes(code, raises=NameError, scopes=["class"]) + def test_references___classdict__(self): code = """ class i: [__classdict__ for x in y] """ self._check_in_scopes(code, raises=NameError) + def test_references___classdict___nested(self): + class _C: + res = [(lambda: __classdict__)() for _ in [1]] + self.assertIn("res", _C.res[0]) + def test_references___conditional_annotations__(self): code = """ class i: [__conditional_annotations__ for x in y] """ self._check_in_scopes(code, raises=NameError) + def test_references___conditional_annotations___nested(self): + code = """ + class i: [lambda: __conditional_annotations__ for x in y] + """ + self._check_in_scopes(code, raises=NameError) + def test_references___class___enclosing(self): code = """ __class__ = 2 diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 7b321600e88..7214a377067 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -987,6 +987,25 @@ def test_setstate(self): memio.close() self.assertRaises(ValueError, memio.__setstate__, ("closed", "", 0, None)) + def test_write_str_subclass(self): + # Writing a str subclass should use the subclass's unicode data + # directly, not call __str__ on it (which may return a different + # value). gh-149047 + class MyStr(str): + def __str__(self): + return "WRONG" + + s = MyStr("correct") + memio = self.ioclass() + memio.write(s) + self.assertEqual(memio.getvalue(), "correct") + + # Also test the fast path where pos == string_size (STATE_ACCUMULATING) + memio2 = self.ioclass() + memio2.write(MyStr("hello ")) + memio2.write(MyStr("world")) + self.assertEqual(memio2.getvalue(), "hello world") + @unittest.expectedFailure # TODO: RUSTPYTHON; + def test_issue5265(self): return super().test_issue5265() diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index c1806b1c133..e263487e0be 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -6,8 +6,9 @@ import unittest.mock from platform import win32_edition from test import support -from test.support import cpython_only, force_not_colorized, os_helper +from test.support import cpython_only, force_not_colorized, os_helper, requires_subprocess from test.support.import_helper import ensure_lazy_imports +from test.support.script_helper import assert_python_ok, assert_python_failure try: import _winapi @@ -506,5 +507,59 @@ def test_invocation_error(self): self.assertEqual(result, expected) +@requires_subprocess() +class CommandLineSubprocessTest(unittest.TestCase): + def test_help(self): + rc, stdout, stderr = assert_python_ok('-m', 'mimetypes', '--help') + self.assertIn(b'mimetypes', stdout) + self.assertIn(b'--extension', stdout) + self.assertIn(b'--lenient', stdout) + + def test_type_lookup(self): + rc, stdout, stderr = assert_python_ok('-m', 'mimetypes', 'foo.pdf') + self.assertEqual(stdout.strip(), b'type: application/pdf encoding: None') + self.assertEqual(stderr, b'') + + def test_type_lookup_unknown(self): + rc, stdout, stderr = assert_python_failure('-m', 'mimetypes', 'foo.unknownext12345') + self.assertEqual(stdout.strip(), b'error: media type unknown for foo.unknownext12345') + self.assertEqual(stderr, b'') + + def test_extension_flag(self): + rc, stdout, stderr = assert_python_ok('-m', 'mimetypes', '-e', 'image/jpeg') + self.assertEqual(stdout.strip(), b'.jpg') + self.assertEqual(stderr, b'') + + def test_extension_flag_unknown(self): + rc, stdout, stderr = assert_python_failure('-m', 'mimetypes', '-e', 'image/unknowntype12345') + self.assertEqual(stdout.strip(), b'error: unknown type image/unknowntype12345') + self.assertEqual(stderr, b'') + + def test_lenient_flag(self): + rc, stdout, stderr = assert_python_ok('-m', 'mimetypes', '-e', '--lenient', 'text/xul') + self.assertIn(b'.xul', stdout) + self.assertEqual(stderr, b'') + + def test_multiple_inputs(self): + rc, stdout, stderr = assert_python_ok('-m', 'mimetypes', 'foo.pdf', 'foo.png') + self.assertIn(b'type: application/pdf encoding: None', stdout) + self.assertIn(b'type: image/png encoding: None', stdout) + self.assertEqual(stderr, b'') + + def test_multiple_inputs_with_error(self): + rc, stdout, stderr = assert_python_failure( + '-m', 'mimetypes', 'foo.pdf', 'foo.unknownext12345' + ) + self.assertIn(b'type: application/pdf encoding: None', stdout) + self.assertIn(b'error: media type unknown for foo.unknownext12345', stdout) + self.assertEqual(stderr, b'') + + @force_not_colorized + def test_unknown_flag(self): + rc, stdout, stderr = assert_python_failure('-m', 'mimetypes', '--unknown-flag', 'foo.pdf') + self.assertEqual(stdout, b'') + self.assertIn(b'error: unrecognized arguments: --unknown-flag', stderr) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_pwd.py b/Lib/test/test_pwd.py index aa090b464a7..bdf57776c82 100644 --- a/Lib/test/test_pwd.py +++ b/Lib/test/test_pwd.py @@ -1,3 +1,5 @@ +import random +import string import sys import unittest from test.support import import_helper @@ -56,59 +58,57 @@ def test_values_extended(self): def test_errors(self): self.assertRaises(TypeError, pwd.getpwuid) self.assertRaises(TypeError, pwd.getpwuid, 3.14) + self.assertRaises(TypeError, pwd.getpwuid, 0.0) + self.assertRaises(TypeError, pwd.getpwuid, 0, 0) + # should be out of uid_t range + self.assertRaises(KeyError, pwd.getpwuid, 2**128) + self.assertRaises(KeyError, pwd.getpwuid, -2**128) self.assertRaises(TypeError, pwd.getpwnam) self.assertRaises(TypeError, pwd.getpwnam, 42) - self.assertRaises(TypeError, pwd.getpwall, 42) + self.assertRaises(TypeError, pwd.getpwnam, b'root') + self.assertRaises(TypeError, pwd.getpwnam, 'root', 0) # embedded null character self.assertRaisesRegex(ValueError, 'null', pwd.getpwnam, 'a\x00b') + self.assertRaisesRegex(ValueError, 'null', pwd.getpwnam, 'root\x00') + self.assertRaises(UnicodeEncodeError, pwd.getpwnam, 'roo\udc74') + self.assertRaises(KeyError, pwd.getpwnam, '') + self.assertRaises(TypeError, pwd.getpwall, 42) - # try to get some errors - bynames = {} - byuids = {} - for (n, p, u, g, gecos, d, s) in pwd.getpwall(): - bynames[n] = u - byuids[u] = n - - allnames = list(bynames.keys()) - namei = 0 - fakename = allnames[namei] if allnames else "invaliduser" - while fakename in bynames: - chars = list(fakename) - for i in range(len(chars)): - if chars[i] == 'z': - chars[i] = 'A' - break - elif chars[i] == 'Z': - continue - else: - chars[i] = chr(ord(chars[i]) + 1) - break - else: - namei = namei + 1 - try: - fakename = allnames[namei] - except IndexError: - # should never happen... if so, just forget it - break - fakename = ''.join(chars) - - self.assertRaises(KeyError, pwd.getpwnam, fakename) - - # In some cases, byuids isn't a complete list of all users in the - # system, so if we try to pick a value not in byuids (via a perturbing - # loop, say), pwd.getpwuid() might still be able to find data for that - # uid. Using sys.maxint may provoke the same problems, but hopefully - # it will be a more repeatable failure. - fakeuid = sys.maxsize - self.assertNotIn(fakeuid, byuids) - self.assertRaises(KeyError, pwd.getpwuid, fakeuid) + # Find a non-existent user name. + # getpwall() will not necessarily report all existing users + # (typical for LDAP based directories in big organizations). + for _ in range(30): + fakename = ''.join(random.choices(string.ascii_lowercase, k=6)) + try: + pwd.getpwnam(fakename) + except KeyError: + break + else: + self.fail('Cannot find non-existent user name') + + # Find a non-existent uid. + maxuid = max(e.pw_uid for e in pwd.getpwall()) + if maxuid < 2**15: + maxuid = 2**15 + elif maxuid < 2**16: + maxuid = 2**16-1 + else: + maxuid = 2**31 + for _ in range(30): + fakeuid = random.randrange(maxuid) + try: + pwd.getpwuid(fakeuid) + except KeyError: + break + else: + self.fail('Cannot find non-existent uid') + + # On Cygwin, getpwuid(-1) returns 'Unknown+User' user + if sys.platform != 'cygwin': + # -1 shouldn't be a valid uid because it has a special meaning in many + # uid-related functions + self.assertRaises(KeyError, pwd.getpwuid, -1) - # -1 shouldn't be a valid uid because it has a special meaning in many - # uid-related functions - self.assertRaises(KeyError, pwd.getpwuid, -1) - # should be out of uid_t range - self.assertRaises(KeyError, pwd.getpwuid, 2**128) - self.assertRaises(KeyError, pwd.getpwuid, -2**128) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index afb83d2d9b3..5a4f0a21d1e 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -228,8 +228,7 @@ def _verify_parse_output(self, operations): "Character data: '\xb5'", "End element: 'root'", ] - for operation, expected_operation in zip(operations, expected_operations): - self.assertEqual(operation, expected_operation) + self.assertEqual(operations, expected_operations) def test_parse_bytes(self): out = self.Outputter() @@ -278,6 +277,119 @@ def test_parse_again(self): self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED) + @support.subTests('encoding', [ + 'utf-8', 'utf-16', 'utf-16be', 'utf-16le', + 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', + 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', + 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', + 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', + 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', + 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', + 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', + 'cp1256', 'cp1257', 'cp1258', + 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', + 'mac-roman', 'mac-turkish', + 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154', + ]) + def test_supported_encodings(self, encoding): + out = self.Outputter() + parser = expat.ParserCreate() + self._hookup_callbacks(parser, out) + c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0] + data = (f'\n' + f'{c}').encode(encoding) + parser.Parse(data, True) + self.assertEqual(out.out, [ + ('XML declaration', ('1.0', encoding, -1)), + "Start element: 'root' {}", + f'Character data: {c!r}', + "End element: 'root'", + ]) + + @support.subTests('encoding', [ + 'UTF-8', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be', + 'koi8-u', 'cp1125', 'cp1251', 'iso8859-5', 'mac-cyrillic', + ]) + def test_supported_encodings2(self, encoding): + out = self.Outputter() + parser = expat.ParserCreate() + self._hookup_callbacks(parser, out) + data = (f'\n' + '' + '<корінь атрибут="значення">зміст').encode(encoding) + parser.Parse(data, True) + self.assertEqual(out.out, [ + ('XML declaration', ('1.0', encoding, -1)), + "Comment: ' коментар '", + "Start element: 'корінь' {'атрибут': 'значення'}", + "Character data: 'зміст'", + "End element: 'корінь'", + ]) + + @support.subTests('encoding', [ + 'UTF-7', + "Big5-HKSCS", "Big5", + "cp932", "cp949", "cp950", + "EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR", + "GB18030", "GB2312", "GBK", + "ISO-2022-KR", + "johab", + "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213", + ]) + def test_unsupported_encodings(self, encoding): + parser = expat.ParserCreate() + data = (f'\n' + '').encode(encoding) + with self.assertRaises(ValueError): + parser.Parse(data, True) + + parser = expat.ParserCreate() + data = (f'\n' + '').encode() + with self.assertRaises(ValueError): + parser.Parse(data, True) + + @support.subTests('encoding', [ + 'cp037', 'cp273', 'cp424', 'cp500', 'cp864', 'cp875', + 'cp1026', 'cp1140', + 'mac_arabic', 'mac_farsi', + ]) + def test_incompatible_encodings(self, encoding): + parser = expat.ParserCreate() + data = (f'\n' + '').encode(encoding) + with self.assertRaises(expat.ExpatError): + parser.Parse(data, True) + + parser = expat.ParserCreate() + data = (f'\n' + '').encode() + with self.assertRaisesRegex(expat.ExpatError, 'unknown encoding'): + parser.Parse(data, True) + + @support.subTests('encoding', [ + 'hex_codec', 'rot_13', + ]) + def test_non_text_encodings(self, encoding): + parser = expat.ParserCreate() + data = (f'\n' + '').encode() + with self.assertRaises(LookupError): + parser.Parse(data, True) + + def test_undefined_encoding(self): + parser = expat.ParserCreate() + data = b'\n' + with self.assertRaises(UnicodeError): + parser.Parse(data, True) + + def test_unknown_encoding(self): + parser = expat.ParserCreate() + data = b'\n' + with self.assertRaises(LookupError): + parser.Parse(data, True) + + class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): # Tests that make sure we get errors when the namespace_separator value @@ -688,6 +800,20 @@ def test_change_size_2(self): parser.Parse(xml2, True) self.assertEqual(self.n, 4) + @support.requires_resource('cpu') + @support.requires_resource('walltime') + @support.bigmemtest(size=2**31, memuse=4, dry_run=False) + def test_large_character_data_does_not_crash(self): + # See https://github.com/python/cpython/issues/148441 + parser = expat.ParserCreate() + parser.buffer_text = True + parser.buffer_size = 2**31 - 1 # INT_MAX + N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX + self.assertGreater(N, parser.buffer_size) + parser.CharacterDataHandler = lambda text: None + xml_data = b"" + b"A" * N + b"" + self.assertEqual(parser.Parse(xml_data, True), 1) + class ElementDeclHandlerTest(unittest.TestCase): @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: TypeError not raised by Parse def test_trigger_leak(self): @@ -1087,6 +1213,64 @@ def test_set_maximum_amplification__fail_for_subparser(self): self.assert_root_parser_failure(setter, 123.45) +@unittest.skipIf(expat.version_info < (2, 4, 0), "requires Expat >= 2.4.0") +class ExpansionProtectionTest(AttackProtectionTestBase, unittest.TestCase): + + def assert_rejected(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) hits the allocation limit.""" + msg = ( + r"limit on input amplification factor \(from DTD and entities\) " + r"breached: line \d+, column \d+" + ) + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def set_activation_threshold(self, parser, threshold): + return parser.SetBillionLaughsAttackProtectionActivationThreshold(threshold) + + def set_maximum_amplification(self, parser, max_factor): + return parser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor) + + def test_set_activation_threshold__threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_activation_threshold__threshold_not_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_maximum_amplification__amplification_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_maximum_amplification__amplification_not_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assertIsNotNone(parser.Parse(payload, True)) + + @unittest.skipIf(not hasattr(expat.XMLParserType, "SetAllocTrackerMaximumAmplification"), "requires Python compiled with Expat >= 2.7.2") From 7b3aa9e79ac197dbc2660e495da8d6848fe08703 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:02:59 +0300 Subject: [PATCH 2/5] Fix newlines --- Lib/test/test_import/data/circular_imports/subpkg2/__init__.py | 1 - Lib/test/test_import/data/package/submodule.py | 1 - Lib/test/test_import/data/package2/submodule2.py | 1 - Lib/test/test_import/data/unwritable/x.py | 1 - 4 files changed, 4 deletions(-) diff --git a/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py b/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py index 8b137891791..e69de29bb2d 100644 --- a/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py +++ b/Lib/test/test_import/data/circular_imports/subpkg2/__init__.py @@ -1 +0,0 @@ - diff --git a/Lib/test/test_import/data/package/submodule.py b/Lib/test/test_import/data/package/submodule.py index 8b137891791..e69de29bb2d 100644 --- a/Lib/test/test_import/data/package/submodule.py +++ b/Lib/test/test_import/data/package/submodule.py @@ -1 +0,0 @@ - diff --git a/Lib/test/test_import/data/package2/submodule2.py b/Lib/test/test_import/data/package2/submodule2.py index 8b137891791..e69de29bb2d 100644 --- a/Lib/test/test_import/data/package2/submodule2.py +++ b/Lib/test/test_import/data/package2/submodule2.py @@ -1 +0,0 @@ - diff --git a/Lib/test/test_import/data/unwritable/x.py b/Lib/test/test_import/data/unwritable/x.py index 8b137891791..e69de29bb2d 100644 --- a/Lib/test/test_import/data/unwritable/x.py +++ b/Lib/test/test_import/data/unwritable/x.py @@ -1 +0,0 @@ - From c3924ef8800738d68cc9f9a3710d2beb75d1ed0a Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:16:47 +0300 Subject: [PATCH 3/5] Mark failing tests --- Lib/test/test_listcomps.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index e76169c69df..5dbc130b4c5 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -206,9 +206,11 @@ class i: [__classdict__ for x in y] """ self._check_in_scopes(code, raises=NameError) + @unittest.expectedFailure # TODO: RUSTPYTHON; SyntaxError: compiler_make_closure: cannot find '__classdict__' in parent vars def test_references___classdict___nested(self): class _C: - res = [(lambda: __classdict__)() for _ in [1]] + # res = [(lambda: __classdict__)() for _ in [1]] # TODO: RUSTPYTHON + pass # TODO: RUSTPYTHON self.assertIn("res", _C.res[0]) def test_references___conditional_annotations__(self): @@ -217,6 +219,7 @@ class i: [__conditional_annotations__ for x in y] """ self._check_in_scopes(code, raises=NameError) + @unittest.expectedFailure # TODO: RUSTPYTHON; SyntaxError: compiler_make_closure: cannot find '__conditional_annotations__' in parent vars def test_references___conditional_annotations___nested(self): code = """ class i: [lambda: __conditional_annotations__ for x in y] From c10ace74fbf435d497a44d2bbeee980f060d9aca Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 12 Jun 2026 14:06:30 +0300 Subject: [PATCH 4/5] Mark failing tests --- Lib/test/test_bz2.py | 1 + Lib/test/test_grp.py | 1 + 2 files changed, 2 insertions(+) diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 610cc45b968..a7e152fb7e7 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1033,6 +1033,7 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + @unittest.expectedFailure # TODO: RUSTPYTHON; Wrong error message def test_decompress_after_data_error(self): data = bytes.fromhex( "425a6839314159265359000000000000007fffff000000000000000000000000" diff --git a/Lib/test/test_grp.py b/Lib/test/test_grp.py index ed86802f069..289228924c7 100644 --- a/Lib/test/test_grp.py +++ b/Lib/test/test_grp.py @@ -49,6 +49,7 @@ def test_values_extended(self): # Discovered on Ubuntu 5.04 (custom). self.assertEqual(e2.gr_name.lower(), name.lower()) + @unittest.expectedFailure # TODO: RUSTPYTHON; KeyError: 'getgrgid: group id 340282366920938463463374607431768211456 not found' def test_errors(self): self.assertRaises(TypeError, grp.getgrgid) self.assertRaises(TypeError, grp.getgrgid, 3.14) From 02bbc53ff9a3f1cc13993848baeffe54241f54e6 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 12 Jun 2026 14:06:50 +0300 Subject: [PATCH 5/5] Restore `test_pyexpat.py` --- Lib/test/test_pyexpat.py | 188 +-------------------------------------- 1 file changed, 2 insertions(+), 186 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 5a4f0a21d1e..afb83d2d9b3 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -228,7 +228,8 @@ def _verify_parse_output(self, operations): "Character data: '\xb5'", "End element: 'root'", ] - self.assertEqual(operations, expected_operations) + for operation, expected_operation in zip(operations, expected_operations): + self.assertEqual(operation, expected_operation) def test_parse_bytes(self): out = self.Outputter() @@ -277,119 +278,6 @@ def test_parse_again(self): self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED) - @support.subTests('encoding', [ - 'utf-8', 'utf-16', 'utf-16be', 'utf-16le', - 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', - 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', - 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', - 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', - 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', - 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', - 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', - 'cp1256', 'cp1257', 'cp1258', - 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', - 'mac-roman', 'mac-turkish', - 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154', - ]) - def test_supported_encodings(self, encoding): - out = self.Outputter() - parser = expat.ParserCreate() - self._hookup_callbacks(parser, out) - c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0] - data = (f'\n' - f'{c}').encode(encoding) - parser.Parse(data, True) - self.assertEqual(out.out, [ - ('XML declaration', ('1.0', encoding, -1)), - "Start element: 'root' {}", - f'Character data: {c!r}', - "End element: 'root'", - ]) - - @support.subTests('encoding', [ - 'UTF-8', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be', - 'koi8-u', 'cp1125', 'cp1251', 'iso8859-5', 'mac-cyrillic', - ]) - def test_supported_encodings2(self, encoding): - out = self.Outputter() - parser = expat.ParserCreate() - self._hookup_callbacks(parser, out) - data = (f'\n' - '' - '<корінь атрибут="значення">зміст').encode(encoding) - parser.Parse(data, True) - self.assertEqual(out.out, [ - ('XML declaration', ('1.0', encoding, -1)), - "Comment: ' коментар '", - "Start element: 'корінь' {'атрибут': 'значення'}", - "Character data: 'зміст'", - "End element: 'корінь'", - ]) - - @support.subTests('encoding', [ - 'UTF-7', - "Big5-HKSCS", "Big5", - "cp932", "cp949", "cp950", - "EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR", - "GB18030", "GB2312", "GBK", - "ISO-2022-KR", - "johab", - "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213", - ]) - def test_unsupported_encodings(self, encoding): - parser = expat.ParserCreate() - data = (f'\n' - '').encode(encoding) - with self.assertRaises(ValueError): - parser.Parse(data, True) - - parser = expat.ParserCreate() - data = (f'\n' - '').encode() - with self.assertRaises(ValueError): - parser.Parse(data, True) - - @support.subTests('encoding', [ - 'cp037', 'cp273', 'cp424', 'cp500', 'cp864', 'cp875', - 'cp1026', 'cp1140', - 'mac_arabic', 'mac_farsi', - ]) - def test_incompatible_encodings(self, encoding): - parser = expat.ParserCreate() - data = (f'\n' - '').encode(encoding) - with self.assertRaises(expat.ExpatError): - parser.Parse(data, True) - - parser = expat.ParserCreate() - data = (f'\n' - '').encode() - with self.assertRaisesRegex(expat.ExpatError, 'unknown encoding'): - parser.Parse(data, True) - - @support.subTests('encoding', [ - 'hex_codec', 'rot_13', - ]) - def test_non_text_encodings(self, encoding): - parser = expat.ParserCreate() - data = (f'\n' - '').encode() - with self.assertRaises(LookupError): - parser.Parse(data, True) - - def test_undefined_encoding(self): - parser = expat.ParserCreate() - data = b'\n' - with self.assertRaises(UnicodeError): - parser.Parse(data, True) - - def test_unknown_encoding(self): - parser = expat.ParserCreate() - data = b'\n' - with self.assertRaises(LookupError): - parser.Parse(data, True) - - class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): # Tests that make sure we get errors when the namespace_separator value @@ -800,20 +688,6 @@ def test_change_size_2(self): parser.Parse(xml2, True) self.assertEqual(self.n, 4) - @support.requires_resource('cpu') - @support.requires_resource('walltime') - @support.bigmemtest(size=2**31, memuse=4, dry_run=False) - def test_large_character_data_does_not_crash(self): - # See https://github.com/python/cpython/issues/148441 - parser = expat.ParserCreate() - parser.buffer_text = True - parser.buffer_size = 2**31 - 1 # INT_MAX - N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX - self.assertGreater(N, parser.buffer_size) - parser.CharacterDataHandler = lambda text: None - xml_data = b"" + b"A" * N + b"" - self.assertEqual(parser.Parse(xml_data, True), 1) - class ElementDeclHandlerTest(unittest.TestCase): @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: TypeError not raised by Parse def test_trigger_leak(self): @@ -1213,64 +1087,6 @@ def test_set_maximum_amplification__fail_for_subparser(self): self.assert_root_parser_failure(setter, 123.45) -@unittest.skipIf(expat.version_info < (2, 4, 0), "requires Expat >= 2.4.0") -class ExpansionProtectionTest(AttackProtectionTestBase, unittest.TestCase): - - def assert_rejected(self, func, /, *args, **kwargs): - """Check that func(*args, **kwargs) hits the allocation limit.""" - msg = ( - r"limit on input amplification factor \(from DTD and entities\) " - r"breached: line \d+, column \d+" - ) - self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) - - def set_activation_threshold(self, parser, threshold): - return parser.SetBillionLaughsAttackProtectionActivationThreshold(threshold) - - def set_maximum_amplification(self, parser, max_factor): - return parser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor) - - def test_set_activation_threshold__threshold_reached(self): - parser = expat.ParserCreate() - # Choose a threshold expected to be always reached. - self.set_activation_threshold(parser, 3) - # Check that the threshold is reached by choosing a small factor - # and a payload whose peak amplification factor exceeds it. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(ncols=10, nrows=4) - self.assert_rejected(parser.Parse, payload, True) - - def test_set_activation_threshold__threshold_not_reached(self): - parser = expat.ParserCreate() - # Choose a threshold expected to be never reached. - self.set_activation_threshold(parser, pow(10, 5)) - # Check that the threshold is reached by choosing a small factor - # and a payload whose peak amplification factor exceeds it. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - payload = self.exponential_expansion_payload(ncols=10, nrows=4) - self.assertIsNotNone(parser.Parse(payload, True)) - - def test_set_maximum_amplification__amplification_exceeded(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to always be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) - # Craft a payload for which the peak amplification factor is > 1.0. - payload = self.exponential_expansion_payload(ncols=1, nrows=2) - self.assert_rejected(parser.Parse, payload, True) - - def test_set_maximum_amplification__amplification_not_exceeded(self): - parser = expat.ParserCreate() - # Unconditionally enable maximum activation factor. - self.set_activation_threshold(parser, 0) - # Choose a max amplification factor expected to never be exceeded. - self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) - # Craft a payload for which the peak amplification factor is < 1e4. - payload = self.exponential_expansion_payload(ncols=1, nrows=2) - self.assertIsNotNone(parser.Parse(payload, True)) - - @unittest.skipIf(not hasattr(expat.XMLParserType, "SetAllocTrackerMaximumAmplification"), "requires Python compiled with Expat >= 2.7.2")