From 19931e1839d78000657bfcc96b17887acb734984 Mon Sep 17 00:00:00 2001 From: changjoon-park Date: Sun, 3 May 2026 19:31:36 +0900 Subject: [PATCH] Validate compile() filename type and dont_inherit __bool__ protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two CPython parity gaps in `compile()` argument handling: 1. **filename rejects only non-buffer-protocol types.** CPython's `PyUnicode_FSDecoder` accepts `str`, `bytes`, and objects with `__fspath__` only — `bytearray` / `memoryview` / `array.array` raise TypeError. RustPython's `FsPath::TryFromObject` impl falls back to any buffer-protocol object and silently converts to bytes, accepting them. ```python >>> compile('pass', bytearray(b'file.py'), 'exec') # CPython 3.14.4: TypeError # RustPython main: ❌ ``` Change `CompileArgs::filename` from `FsPath` (which uses the permissive `TryFromObject` impl) to `PyObjectRef`, then call the strict `FsPath::try_from_path_like` at the top of `compile()`. Other `FsPath` consumers are unchanged. 2. **dont_inherit strict-checks the `bool` type.** CPython routes `dont_inherit` through `PyObject_IsTrue`, calling `__bool__` on arbitrary objects and propagating exceptions raised there. RustPython typed it `OptionalArg`, which rejects subclass-less objects at binding time before `__bool__` runs. ```python >>> class EvilBool: ... def __bool__(self): raise ValueError('hi') >>> compile('pass', 'f', 'exec', dont_inherit=EvilBool()) # CPython 3.14.4: ValueError('hi') # RustPython main: TypeError('Expected type bool, not EvilBool') ❌ ``` Switch to `OptionalArg` — already used elsewhere in `builtins` (`all`, `any`, `print(..., flush=...)`). Verified byte-identical with CPython 3.14.4 across: - 6 filename types (str, bytes, bytearray, memoryview, list, int) - 7 dont_inherit values (True/False/1/0/""/"yes"/None) plus the `__bool__`-raises-ValueError case Unmasks `Lib/test/test_compile.py`: - `test_compile_filename` - `test_compile_filename_refleak` `cargo run --release -- -m test test_compile test_builtin test_compileall test_codeop test_ast` — 703 tests pass, 0 regressions. All 188 `extra_tests/snippets/*.py` pass under the CI feature set. --- Lib/test/test_compile.py | 2 -- crates/vm/src/stdlib/builtins.rs | 22 +++++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index bdce48e3447..fd1743e6701 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -647,7 +647,6 @@ def f(): d = {f(): f(), f(): f()} self.assertEqual(d, {1: 2, 3: 4}) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: TypeError not raised def test_compile_filename(self): for filename in 'file.py', b'file.py': code = compile('pass', filename, 'exec') @@ -657,7 +656,6 @@ def test_compile_filename(self): compile('pass', filename, 'exec') self.assertRaises(TypeError, compile, 'pass', list(b'file.py'), 'exec') - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Expected type bool, not EvilBool def test_compile_filename_refleak(self): # Regression tests for reference leak in PyUnicode_FSDecoder. # See https://github.com/python/cpython/issues/139748. diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index fd35b287211..2e749563691 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -97,15 +97,21 @@ mod builtins { #[allow(dead_code)] struct CompileArgs { source: PyObjectRef, - filename: FsPath, + // Resolved to FsPath at the start of compile() so that bytearray / + // memoryview / other buffer-protocol objects raise TypeError, matching + // CPython's PyUnicode_FSDecoder (str / bytes / __fspath__ only). + filename: PyObjectRef, mode: PyUtf8StrRef, // CPython parity: flags / optimize accept any object with __index__, // not just exact int. Matches the behavior of `int(x)` arg conversion // used by Python/Python-ast.c::compile. #[pyarg(any, optional)] flags: OptionalArg>, + // CPython parity: dont_inherit goes through PyObject_IsTrue, so + // arbitrary objects with `__bool__` are accepted (and any exception + // raised inside `__bool__` propagates) — not the strict bool type. #[pyarg(any, optional)] - dont_inherit: OptionalArg, + dont_inherit: OptionalArg, #[pyarg(any, optional)] optimize: OptionalArg>, #[pyarg(any, optional)] @@ -261,6 +267,12 @@ mod builtins { } #[cfg(feature = "ast")] { + // CPython parity: PyUnicode_FSDecoder accepts only str / bytes / + // __fspath__-bearing objects. Reject buffer-protocol types like + // bytearray and memoryview that would otherwise pass through + // `FsPath::TryFromObject`'s permissive fallback. + let filename = FsPath::try_from_path_like(args.filename, true, vm)?; + use crate::{class::PyClassImpl, stdlib::_ast}; let feature_version = feature_version_from_arg(args._feature_version, vm)?; @@ -321,7 +333,7 @@ mod builtins { return _ast::compile( vm, args.source, - &args.filename.to_string_lossy(), + &filename.to_string_lossy(), mode, Some(optimize), ); @@ -342,7 +354,7 @@ mod builtins { let source = ArgStrOrBytesLike::try_from_object(vm, args.source)?; let source = source.borrow_bytes(); - let source = decode_source_bytes(&source, &args.filename.to_string_lossy(), vm)?; + let source = decode_source_bytes(&source, &filename.to_string_lossy(), vm)?; let source = source.as_str(); let flags: i32 = args.flags.map_or(0, |v| v.value); @@ -389,7 +401,7 @@ mod builtins { .compile_with_opts( source, mode, - args.filename.to_string_lossy().into_owned(), + filename.to_string_lossy().into_owned(), opts, ) .map_err(|err| {