From d4d329f6df13607f1416c830998f8aaddeeaab38 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Thu, 5 Mar 2026 09:14:30 +0900 Subject: [PATCH 01/10] gc: add CollectResult, stats fields, get_referrers, and fix count reset - Add CollectResult struct with collected/uncollectable/candidates/duration - Add candidates and duration fields to GcStats and gc.get_stats() - Pass CollectResult to gc.callbacks info dict - Reset generation counts for all collected generations (0..=N) - Return 0 for third value in gc.get_threshold() (3.13+) - Implement gc.get_referrers() by scanning all tracked objects - Add DEBUG_COLLECTABLE output for collectable objects - Update test_gc.py to expect candidates/duration in stats --- Lib/test/test_gc.py | 2 +- crates/vm/src/gc_state.rs | 121 +++++++++++++++++++++++++++++-------- crates/vm/src/stdlib/gc.rs | 60 ++++++++++++------ 3 files changed, 138 insertions(+), 45 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 3e3092dcae1..ec36154e1d7 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -822,7 +822,7 @@ def test_get_stats(self): for st in stats: self.assertIsInstance(st, dict) self.assertEqual(set(st), - {"collected", "collections", "uncollectable"}) + {"collected", "collections", "uncollectable", "candidates", "duration"}) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index e8e83bba49c..ab769fc30fc 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -1,7 +1,6 @@ //! Garbage Collection State and Algorithm //! -//! This module implements CPython-compatible generational garbage collection -//! for RustPython, using an intrusive doubly-linked list approach. +//! Generational garbage collection using an intrusive doubly-linked list. use crate::common::linked_list::LinkedList; use crate::common::lock::{PyMutex, PyRwLock}; @@ -28,12 +27,23 @@ bitflags::bitflags! { } } +/// Result from a single collection run +#[derive(Debug, Default)] +pub struct CollectResult { + pub collected: usize, + pub uncollectable: usize, + pub candidates: usize, + pub duration: f64, +} + /// Statistics for a single generation (gc_generation_stats) #[derive(Debug, Default)] pub struct GcStats { pub collections: usize, pub collected: usize, pub uncollectable: usize, + pub candidates: usize, + pub duration: f64, } /// A single GC generation with intrusive linked list @@ -55,6 +65,8 @@ impl GcGeneration { collections: 0, collected: 0, uncollectable: 0, + candidates: 0, + duration: 0.0, }), } } @@ -77,14 +89,24 @@ impl GcGeneration { collections: guard.collections, collected: guard.collected, uncollectable: guard.uncollectable, + candidates: guard.candidates, + duration: guard.duration, } } - pub fn update_stats(&self, collected: usize, uncollectable: usize) { + pub fn update_stats( + &self, + collected: usize, + uncollectable: usize, + candidates: usize, + duration: f64, + ) { let mut guard = self.stats.lock(); guard.collections += 1; guard.collected += collected; guard.uncollectable += uncollectable; + guard.candidates += candidates; + guard.duration += duration; } /// Reset the stats mutex to unlocked state after fork(). @@ -340,25 +362,27 @@ impl GcState { } /// Perform garbage collection on the given generation - pub fn collect(&self, generation: usize) -> (usize, usize) { + pub fn collect(&self, generation: usize) -> CollectResult { self.collect_inner(generation, false) } /// Force collection even if GC is disabled (for manual gc.collect() calls) - pub fn collect_force(&self, generation: usize) -> (usize, usize) { + pub fn collect_force(&self, generation: usize) -> CollectResult { self.collect_inner(generation, true) } - fn collect_inner(&self, generation: usize, force: bool) -> (usize, usize) { + fn collect_inner(&self, generation: usize, force: bool) -> CollectResult { if !force && !self.is_enabled() { - return (0, 0); + return CollectResult::default(); } // Try to acquire the collecting lock let Some(_guard) = self.collecting.try_lock() else { - return (0, 0); + return CollectResult::default(); }; + let start_time = std::time::Instant::now(); + // Memory barrier to ensure visibility of all reference count updates // from other threads before we start analyzing the object graph. core::sync::atomic::fence(Ordering::SeqCst); @@ -386,11 +410,24 @@ impl GcState { } if collecting.is_empty() { - self.generations[0].count.store(0, Ordering::SeqCst); - self.generations[generation].update_stats(0, 0); - return (0, 0); + // Reset counts for generations whose objects were promoted away. + // For gen2 (oldest), survivors stay in-place so don't reset gen2 count. + let reset_end = if generation >= 2 { 2 } else { generation + 1 }; + for i in 0..reset_end { + self.generations[i].count.store(0, Ordering::SeqCst); + } + let duration = start_time.elapsed().as_secs_f64(); + self.generations[generation].update_stats(0, 0, 0, duration); + return CollectResult { + collected: 0, + uncollectable: 0, + candidates: 0, + duration, + }; } + let candidates = collecting.len(); + if debug.contains(GcDebugFlags::STATS) { eprintln!( "gc: collecting {} objects from generations 0..={}", @@ -486,9 +523,17 @@ impl GcState { if unreachable.is_empty() { drop(gen_locks); self.promote_survivors(generation, &survivor_refs); - self.generations[0].count.store(0, Ordering::SeqCst); - self.generations[generation].update_stats(0, 0); - return (0, 0); + for i in 0..generation { + self.generations[i].count.store(0, Ordering::SeqCst); + } + let duration = start_time.elapsed().as_secs_f64(); + self.generations[generation].update_stats(0, 0, candidates, duration); + return CollectResult { + collected: 0, + uncollectable: 0, + candidates, + duration, + }; } // Release read locks before finalization phase. @@ -498,9 +543,17 @@ impl GcState { if unreachable_refs.is_empty() { self.promote_survivors(generation, &survivor_refs); - self.generations[0].count.store(0, Ordering::SeqCst); - self.generations[generation].update_stats(0, 0); - return (0, 0); + for i in 0..generation { + self.generations[i].count.store(0, Ordering::SeqCst); + } + let duration = start_time.elapsed().as_secs_f64(); + self.generations[generation].update_stats(0, 0, candidates, duration); + return CollectResult { + collected: 0, + uncollectable: 0, + candidates, + duration, + }; } // 6b: Record initial strong counts (for resurrection detection) @@ -594,15 +647,25 @@ impl GcState { }; // Promote survivors to next generation BEFORE tp_clear. - // This matches CPython's order (move_legacy_finalizer_reachable → delete_garbage) - // and ensures survivor_refs are dropped before tp_clear, so reachable objects - // (e.g. LateFin) aren't kept alive beyond the deferred-drop phase. + // move_legacy_finalizer_reachable → delete_garbage order ensures + // survivor_refs are dropped before tp_clear, so reachable objects + // aren't kept alive beyond the deferred-drop phase. self.promote_survivors(generation, &survivor_refs); drop(survivor_refs); // Resurrected objects stay tracked — just drop our references drop(resurrected); + if debug.contains(GcDebugFlags::COLLECTABLE) { + for obj in &truly_dead { + eprintln!( + "gc: collectable <{} {:p}>", + obj.class().name(), + obj.as_ref() + ); + } + } + if debug.contains(GcDebugFlags::SAVEALL) { let mut garbage_guard = self.garbage.lock(); for obj_ref in truly_dead.iter() { @@ -624,12 +687,22 @@ impl GcState { }); } - // Reset gen0 count - self.generations[0].count.store(0, Ordering::SeqCst); + // Reset counts for generations whose objects were promoted away. + // For gen2 (oldest), survivors stay in-place so don't reset gen2 count. + let reset_end = if generation >= 2 { 2 } else { generation + 1 }; + for i in 0..reset_end { + self.generations[i].count.store(0, Ordering::SeqCst); + } - self.generations[generation].update_stats(collected, 0); + let duration = start_time.elapsed().as_secs_f64(); + self.generations[generation].update_stats(collected, 0, candidates, duration); - (collected, 0) + CollectResult { + collected, + uncollectable: 0, + candidates, + duration, + } } /// Promote surviving objects to the next generation. diff --git a/crates/vm/src/stdlib/gc.rs b/crates/vm/src/stdlib/gc.rs index f6adc6f4a95..245922b4c61 100644 --- a/crates/vm/src/stdlib/gc.rs +++ b/crates/vm/src/stdlib/gc.rs @@ -55,11 +55,11 @@ mod gc { } // Invoke callbacks with "start" phase - invoke_callbacks(vm, "start", generation_num as usize, 0, 0); + invoke_callbacks(vm, "start", generation_num as usize, &Default::default()); // Manual gc.collect() should run even if GC is disabled let gc = gc_state::gc_state(); - let (collected, uncollectable) = gc.collect_force(generation_num as usize); + let result = gc.collect_force(generation_num as usize); // Move objects from gc_state.garbage to vm.ctx.gc_garbage (for DEBUG_SAVEALL) { @@ -74,26 +74,21 @@ mod gc { } // Invoke callbacks with "stop" phase - invoke_callbacks( - vm, - "stop", - generation_num as usize, - collected, - uncollectable, - ); + invoke_callbacks(vm, "stop", generation_num as usize, &result); - Ok(collected as i32) + Ok((result.collected + result.uncollectable) as i32) } /// Return the current collection thresholds as a tuple. + /// The third value is always 0. #[pyfunction] fn get_threshold(vm: &VirtualMachine) -> PyObjectRef { - let (t0, t1, t2) = gc_state::gc_state().get_threshold(); + let (t0, t1, _t2) = gc_state::gc_state().get_threshold(); vm.ctx .new_tuple(vec![ vm.ctx.new_int(t0).into(), vm.ctx.new_int(t1).into(), - vm.ctx.new_int(t2).into(), + vm.ctx.new_int(0).into(), ]) .into() } @@ -148,6 +143,8 @@ mod gc { vm.ctx.new_int(stat.uncollectable).into(), vm, )?; + dict.set_item("candidates", vm.ctx.new_int(stat.candidates).into(), vm)?; + dict.set_item("duration", vm.ctx.new_float(stat.duration).into(), vm)?; result.push(dict.into()); } @@ -189,10 +186,30 @@ mod gc { /// Return the list of objects that directly refer to any of the arguments. #[pyfunction] fn get_referrers(args: FuncArgs, vm: &VirtualMachine) -> PyListRef { - // This is expensive: we need to scan all tracked objects - // For now, return an empty list (would need full object tracking to implement) - let _ = args; - vm.ctx.new_list(vec![]) + use std::collections::HashSet; + + // Build a set of target object pointers for fast lookup + let targets: HashSet = args + .args + .iter() + .map(|obj| obj.as_ref() as *const crate::PyObject as usize) + .collect(); + + let mut result = Vec::new(); + + // Scan all tracked objects across all generations + let all_objects = gc_state::gc_state().get_objects(None); + for obj in all_objects { + let referent_ptrs = unsafe { obj.gc_get_referent_ptrs() }; + for child_ptr in referent_ptrs { + if targets.contains(&(child_ptr.as_ptr() as usize)) { + result.push(obj.clone()); + break; + } + } + } + + vm.ctx.new_list(result) } /// Return True if the object is tracked by the garbage collector. @@ -243,8 +260,7 @@ mod gc { vm: &VirtualMachine, phase: &str, generation: usize, - collected: usize, - uncollectable: usize, + result: &gc_state::CollectResult, ) { let callbacks_list = &vm.ctx.gc_callbacks; let callbacks: Vec = callbacks_list.borrow_vec().to_vec(); @@ -255,8 +271,12 @@ mod gc { let phase_str: PyObjectRef = vm.ctx.new_str(phase).into(); let info = vm.ctx.new_dict(); let _ = info.set_item("generation", vm.ctx.new_int(generation).into(), vm); - let _ = info.set_item("collected", vm.ctx.new_int(collected).into(), vm); - let _ = info.set_item("uncollectable", vm.ctx.new_int(uncollectable).into(), vm); + let _ = info.set_item("collected", vm.ctx.new_int(result.collected).into(), vm); + let _ = info.set_item( + "uncollectable", + vm.ctx.new_int(result.uncollectable).into(), + vm, + ); for callback in callbacks { let _ = callback.call((phase_str.clone(), info.clone()), vm); From c889a96bdc3f25ce9f025f7573f53ca44dc63a04 Mon Sep 17 00:00:00 2001 From: CPython Developers <> Date: Thu, 5 Mar 2026 23:12:29 +0900 Subject: [PATCH 02/10] Update test_gc from v3.14.3 --- Lib/test/_test_gc_fast_cycles.py | 48 ++++++++++++++++++++++++++++++++ Lib/test/test_gc.py | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 Lib/test/_test_gc_fast_cycles.py diff --git a/Lib/test/_test_gc_fast_cycles.py b/Lib/test/_test_gc_fast_cycles.py new file mode 100644 index 00000000000..4e2c7d72a02 --- /dev/null +++ b/Lib/test/_test_gc_fast_cycles.py @@ -0,0 +1,48 @@ +# Run by test_gc. +from test import support +import _testinternalcapi +import gc +import unittest + +class IncrementalGCTests(unittest.TestCase): + + # Use small increments to emulate longer running process in a shorter time + @support.gc_threshold(200, 10) + def test_incremental_gc_handles_fast_cycle_creation(self): + + class LinkedList: + + #Use slots to reduce number of implicit objects + __slots__ = "next", "prev", "surprise" + + def __init__(self, next=None, prev=None): + self.next = next + if next is not None: + next.prev = self + self.prev = prev + if prev is not None: + prev.next = self + + def make_ll(depth): + head = LinkedList() + for i in range(depth): + head = LinkedList(head, head.prev) + return head + + head = make_ll(1000) + + assert(gc.isenabled()) + olds = [] + initial_heap_size = _testinternalcapi.get_tracked_heap_size() + for i in range(20_000): + newhead = make_ll(20) + newhead.surprise = head + olds.append(newhead) + if len(olds) == 20: + new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + del olds[:] + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index ec36154e1d7..3e3092dcae1 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -822,7 +822,7 @@ def test_get_stats(self): for st in stats: self.assertIsInstance(st, dict) self.assertEqual(set(st), - {"collected", "collections", "uncollectable", "candidates", "duration"}) + {"collected", "collections", "uncollectable"}) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) From a34d701ab26d077832b2366e6c2104bf6bf0fd3f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 6 Mar 2026 00:01:43 +0900 Subject: [PATCH 03/10] Update test_gc.py from CPython v3.15.0a5 Taken from v3.15 (not v3.14.3) because get_stats() candidates/duration fields were added in 3.13+ and the corresponding test assertions only exist in 3.15. --- Lib/test/test_gc.py | 111 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 94 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 3e3092dcae1..6aa6361d5d0 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,16 +295,40 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + def test_datetime_weakref_cycle(self): + # https://github.com/python/cpython/issues/132413 + # If the weakref used by the datetime extension gets cleared by the GC (due to being + # in an unreachable cycle) then datetime functions would crash (get_module_state() + # was returning a NULL pointer). This bug is fixed by clearing weakrefs without + # callbacks *after* running finalizers. + code = """if 1: + import _datetime + class C: + def __del__(self): + print('__del__ called') + _datetime.timedelta(days=1) # crash? + + l = [C()] + l.append(l) + """ + rc, stdout, stderr = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(stdout.strip(), b'__del__ called') + @refcount_test def test_frame(self): def f(): @@ -652,9 +678,8 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # The weakref should be cleared before executing the callback. + self.assertIsNone(x) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -821,11 +846,15 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "candidates", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["candidates"], 0) + self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): self.addCleanup(gc.enable) @@ -836,11 +865,25 @@ def test_get_stats(self): self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"]) + self.assertGreater(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertEqual(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertGreaterEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertEqual(new[2][stat], old[2][stat]) gc.collect(2) - new = gc.get_stats() - self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) + old, new = new, gc.get_stats() + self.assertEqual(new[0]["collections"], old[0]["collections"]) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"] + 1) + self.assertEqual(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertGreater(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertGreaterEqual(new[2][stat], old[2][stat]) def test_freeze(self): gc.freeze() @@ -1156,6 +1199,37 @@ def test_something(self): """) assert_python_ok("-c", source) + def test_do_not_cleanup_type_subclasses_before_finalization(self): + # See https://github.com/python/cpython/issues/135552 + # If we cleanup weakrefs for tp_subclasses before calling + # the finalizer (__del__) then the line `fail = BaseNode.next.next` + # should fail because we are trying to access a subclass + # attribute. But subclass type cache was not properly invalidated. + code = """ + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + fail = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + """ + # this test checks garbage collection while interp + # finalization + assert_python_ok("-c", textwrap.dedent(code)) + + code_inside_function = textwrap.dedent(F""" + def test(): + {textwrap.indent(code, ' ')} + + test() + """) + # this test checks regular garbage collection + assert_python_ok("-c", code_inside_function) + @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC") @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @@ -1260,9 +1334,11 @@ def test_collect(self): # Check that we got the right info dict for all callbacks for v in self.visit: info = v[2] - self.assertTrue("generation" in info) - self.assertTrue("collected" in info) - self.assertTrue("uncollectable" in info) + self.assertIn("generation", info) + self.assertIn("collected", info) + self.assertIn("uncollectable", info) + self.assertIn("candidates", info) + self.assertIn("duration", info) def test_collect_generation(self): self.preclean() @@ -1450,6 +1526,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref From 463eb1dcf71a13d1c69a6fa349b7fc2f9e1a2763 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 6 Mar 2026 00:23:54 +0900 Subject: [PATCH 04/10] Fix gc_state build on wasm32: skip Instant timing --- crates/vm/src/gc_state.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index ab769fc30fc..b63de219541 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -10,6 +10,16 @@ use core::ptr::NonNull; use core::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}; use std::collections::HashSet; +#[cfg(not(target_arch = "wasm32"))] +fn elapsed_secs(start: &std::time::Instant) -> f64 { + start.elapsed().as_secs_f64() +} + +#[cfg(target_arch = "wasm32")] +fn elapsed_secs(_start: &()) -> f64 { + 0.0 +} + bitflags::bitflags! { /// GC debug flags (see Include/internal/pycore_gc.h) #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] @@ -381,7 +391,10 @@ impl GcState { return CollectResult::default(); }; + #[cfg(not(target_arch = "wasm32"))] let start_time = std::time::Instant::now(); + #[cfg(target_arch = "wasm32")] + let start_time = (); // Memory barrier to ensure visibility of all reference count updates // from other threads before we start analyzing the object graph. @@ -416,7 +429,7 @@ impl GcState { for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = start_time.elapsed().as_secs_f64(); + let duration = elapsed_secs(&start_time); self.generations[generation].update_stats(0, 0, 0, duration); return CollectResult { collected: 0, @@ -526,7 +539,7 @@ impl GcState { for i in 0..generation { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = start_time.elapsed().as_secs_f64(); + let duration = elapsed_secs(&start_time); self.generations[generation].update_stats(0, 0, candidates, duration); return CollectResult { collected: 0, @@ -546,7 +559,7 @@ impl GcState { for i in 0..generation { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = start_time.elapsed().as_secs_f64(); + let duration = elapsed_secs(&start_time); self.generations[generation].update_stats(0, 0, candidates, duration); return CollectResult { collected: 0, @@ -694,7 +707,7 @@ impl GcState { self.generations[i].count.store(0, Ordering::SeqCst); } - let duration = start_time.elapsed().as_secs_f64(); + let duration = elapsed_secs(&start_time); self.generations[generation].update_stats(collected, 0, candidates, duration); CollectResult { From dfee9903ddf5d8212f7da77195ebfd8e7d14d406 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 6 Mar 2026 01:16:32 +0900 Subject: [PATCH 05/10] Add candidates/duration to gc callback info, mark v3.15 test failures --- Lib/test/test_gc.py | 4 ++++ crates/vm/src/stdlib/gc.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 6aa6361d5d0..879a2875aaa 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -236,6 +236,8 @@ def test_function(self): # is 3 because it includes f's code object. self.assertIn(gc.collect(), (2, 3)) + # TODO: RUSTPYTHON - weakref clear ordering differs from 3.15+ + @unittest.expectedFailure def test_function_tp_clear_leaves_consistent_state(self): # https://github.com/python/cpython/issues/91636 code = """if 1: @@ -309,6 +311,8 @@ def func(): self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + # TODO: RUSTPYTHON - _datetime module not available + @unittest.expectedFailure def test_datetime_weakref_cycle(self): # https://github.com/python/cpython/issues/132413 # If the weakref used by the datetime extension gets cleared by the GC (due to being diff --git a/crates/vm/src/stdlib/gc.rs b/crates/vm/src/stdlib/gc.rs index 245922b4c61..6492ecafb9b 100644 --- a/crates/vm/src/stdlib/gc.rs +++ b/crates/vm/src/stdlib/gc.rs @@ -277,6 +277,8 @@ mod gc { vm.ctx.new_int(result.uncollectable).into(), vm, ); + let _ = info.set_item("candidates", vm.ctx.new_int(result.candidates).into(), vm); + let _ = info.set_item("duration", vm.ctx.new_float(result.duration).into(), vm); for callback in callbacks { let _ = callback.call((phase_str.clone(), info.clone()), vm); From 2547ef8fcfc9e809d30d82e9354a9e1941910500 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 6 Mar 2026 09:28:42 +0900 Subject: [PATCH 06/10] Fix gc.get_referrers to exclude executing frames, fix Future cancelled exc leak - get_referrers: skip frame objects on the execution stack, since they are not GC-tracked in CPython (_PyInterpreterFrame) - _asyncio Future/Task make_cancelled_error_impl: clear the stored cancelled exception after returning it, matching the Python _make_cancelled_error behavior --- crates/stdlib/src/_asyncio.rs | 18 ++++++++---------- crates/vm/src/stdlib/gc.rs | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/crates/stdlib/src/_asyncio.rs b/crates/stdlib/src/_asyncio.rs index 2299a1c822c..8b32e4625da 100644 --- a/crates/stdlib/src/_asyncio.rs +++ b/crates/stdlib/src/_asyncio.rs @@ -499,7 +499,8 @@ pub(crate) mod _asyncio { } fn make_cancelled_error_impl(&self, vm: &VirtualMachine) -> PyBaseExceptionRef { - if let Some(exc) = self.fut_cancelled_exc.read().clone() + // If a saved CancelledError exists, take it (clearing the stored reference) + if let Some(exc) = self.fut_cancelled_exc.write().take() && let Ok(exc) = exc.downcast::() { return exc; @@ -508,12 +509,10 @@ pub(crate) mod _asyncio { let msg = self.fut_cancel_msg.read().clone(); let args = if let Some(m) = msg { vec![m] } else { vec![] }; - let exc = match get_cancelled_error_type(vm) { + match get_cancelled_error_type(vm) { Ok(cancelled_error) => vm.new_exception(cancelled_error, args), Err(_) => vm.new_runtime_error("cancelled"), - }; - *self.fut_cancelled_exc.write() = Some(exc.clone().into()); - exc + } } fn schedule_callbacks(zelf: &PyRef, vm: &VirtualMachine) -> PyResult<()> { @@ -1309,7 +1308,8 @@ pub(crate) mod _asyncio { } fn make_cancelled_error_impl(&self, vm: &VirtualMachine) -> PyBaseExceptionRef { - if let Some(exc) = self.base.fut_cancelled_exc.read().clone() + // If a saved CancelledError exists, take it (clearing the stored reference) + if let Some(exc) = self.base.fut_cancelled_exc.write().take() && let Ok(exc) = exc.downcast::() { return exc; @@ -1318,12 +1318,10 @@ pub(crate) mod _asyncio { let msg = self.base.fut_cancel_msg.read().clone(); let args = if let Some(m) = msg { vec![m] } else { vec![] }; - let exc = match get_cancelled_error_type(vm) { + match get_cancelled_error_type(vm) { Ok(cancelled_error) => vm.new_exception(cancelled_error, args), Err(_) => vm.new_runtime_error("cancelled"), - }; - *self.base.fut_cancelled_exc.write() = Some(exc.clone().into()); - exc + } } #[pymethod] diff --git a/crates/vm/src/stdlib/gc.rs b/crates/vm/src/stdlib/gc.rs index 6492ecafb9b..df9986fe097 100644 --- a/crates/vm/src/stdlib/gc.rs +++ b/crates/vm/src/stdlib/gc.rs @@ -195,11 +195,30 @@ mod gc { .map(|obj| obj.as_ref() as *const crate::PyObject as usize) .collect(); + // Collect pointers of frames currently on the execution stack. + // In CPython, executing frames (_PyInterpreterFrame) are not GC-tracked + // PyObjects, so they never appear in get_referrers results. Since + // RustPython materializes every frame as a PyObject, we must exclude + // them manually to match the expected behavior. + let stack_frames: HashSet = vm + .frames + .borrow() + .iter() + .map(|fp| { + let frame: &crate::PyObject = unsafe { fp.as_ref() }.as_ref(); + frame as *const crate::PyObject as usize + }) + .collect(); + let mut result = Vec::new(); // Scan all tracked objects across all generations let all_objects = gc_state::gc_state().get_objects(None); for obj in all_objects { + let obj_ptr = obj.as_ref() as *const crate::PyObject as usize; + if stack_frames.contains(&obj_ptr) { + continue; + } let referent_ptrs = unsafe { obj.gc_get_referent_ptrs() }; for child_ptr in referent_ptrs { if targets.contains(&(child_ptr.as_ptr() as usize)) { From 980c73127361a831e472eedf6f9d2e86eafeff74 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 9 Mar 2026 12:21:13 +0900 Subject: [PATCH 07/10] Fix gc.get_threshold to return actual gen2 threshold value --- crates/vm/src/stdlib/gc.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/vm/src/stdlib/gc.rs b/crates/vm/src/stdlib/gc.rs index df9986fe097..3909186b5c0 100644 --- a/crates/vm/src/stdlib/gc.rs +++ b/crates/vm/src/stdlib/gc.rs @@ -80,15 +80,14 @@ mod gc { } /// Return the current collection thresholds as a tuple. - /// The third value is always 0. #[pyfunction] fn get_threshold(vm: &VirtualMachine) -> PyObjectRef { - let (t0, t1, _t2) = gc_state::gc_state().get_threshold(); + let (t0, t1, t2) = gc_state::gc_state().get_threshold(); vm.ctx .new_tuple(vec![ vm.ctx.new_int(t0).into(), vm.ctx.new_int(t1).into(), - vm.ctx.new_int(0).into(), + vm.ctx.new_int(t2).into(), ]) .into() } From f1d0b448b0a3e739708f4dec4d9985518c129633 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 9 Mar 2026 22:33:10 +0900 Subject: [PATCH 08/10] Fix inconsistent GC count reset in early-return paths Use the same reset_end formula in unreachable-empty early returns as in the main collection path and collecting-empty path. --- crates/vm/src/gc_state.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index b63de219541..d86c3d4d560 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -536,7 +536,8 @@ impl GcState { if unreachable.is_empty() { drop(gen_locks); self.promote_survivors(generation, &survivor_refs); - for i in 0..generation { + let reset_end = if generation >= 2 { 2 } else { generation + 1 }; + for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } let duration = elapsed_secs(&start_time); @@ -556,7 +557,8 @@ impl GcState { if unreachable_refs.is_empty() { self.promote_survivors(generation, &survivor_refs); - for i in 0..generation { + let reset_end = if generation >= 2 { 2 } else { generation + 1 }; + for i in 0..reset_end { self.generations[i].count.store(0, Ordering::SeqCst); } let duration = elapsed_secs(&start_time); From 2282a515d65d993758238219b7910e631fea1d89 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 9 Mar 2026 22:51:18 +0900 Subject: [PATCH 09/10] Accept keyword arguments in socket.__init__ Use a FromArgs struct instead of a positional-only tuple so that family, type, proto, and fileno can be passed as keyword arguments. --- crates/stdlib/src/socket.rs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/crates/stdlib/src/socket.rs b/crates/stdlib/src/socket.rs index 617e50c002f..cecfbed4298 100644 --- a/crates/stdlib/src/socket.rs +++ b/crates/stdlib/src/socket.rs @@ -1384,13 +1384,20 @@ mod _socket { impl DefaultConstructor for PySocket {} + #[derive(FromArgs)] + pub struct SocketInitArgs { + #[pyarg(any, optional)] + family: OptionalArg, + #[pyarg(any, optional)] + r#type: OptionalArg, + #[pyarg(any, optional)] + proto: OptionalArg, + #[pyarg(any, optional)] + fileno: OptionalOption, + } + impl Initializer for PySocket { - type Args = ( - OptionalArg, - OptionalArg, - OptionalArg, - OptionalOption, - ); + type Args = SocketInitArgs; fn init(zelf: PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { Self::_init(zelf, args, vm).map_err(|e| e.into_pyexception(vm)) @@ -1414,13 +1421,14 @@ mod _socket { impl PySocket { fn _init( zelf: PyRef, - (family, socket_kind, proto, fileno): ::Args, + args: ::Args, vm: &VirtualMachine, ) -> Result<(), IoOrPyException> { - let mut family = family.unwrap_or(-1); - let mut socket_kind = socket_kind.unwrap_or(-1); - let mut proto = proto.unwrap_or(-1); + let mut family = args.family.unwrap_or(-1); + let mut socket_kind = args.r#type.unwrap_or(-1); + let mut proto = args.proto.unwrap_or(-1); + let fileno = args.fileno; let sock; // On Windows, fileno can be bytes from socket.share() for fromshare() From d62481f552d5eccd1cf60ccdfb5b843541e42eab Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 9 Mar 2026 22:51:24 +0900 Subject: [PATCH 10/10] Disable comp_inlined in symbol table to match compiler The compiler does not yet implement PEP 709 inlined comprehensions (is_inlined_comprehension_context always returns false), but the symbol table was marking comprehensions as inlined. This mismatch could cause comprehension-local symbols to be merged into the parent scope while the compiler still looks them up in a separate scope. --- Lib/test/test_symtable.py | 1 - crates/codegen/src/symboltable.rs | 21 +++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index ae93ee8d91f..1653ab4a718 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -561,7 +561,6 @@ def get_identifiers_recursive(self, st, res): for ch in st.get_children(): self.get_identifiers_recursive(ch, res) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 2 != 1 def test_loopvar_in_only_one_scope(self): # ensure that the loop variable appears only once in the symtable comps = [ diff --git a/crates/codegen/src/symboltable.rs b/crates/codegen/src/symboltable.rs index 0d868bc0468..fdbdac2b2a7 100644 --- a/crates/codegen/src/symboltable.rs +++ b/crates/codegen/src/symboltable.rs @@ -2037,20 +2037,13 @@ impl SymbolTableBuilder { self.line_index_start(range), ); - // Mark non-generator comprehensions as inlined (PEP 709) - // inline_comp = entry->ste_comprehension && !entry->ste_generator && !ste->ste_can_see_class_scope - // We check is_generator and can_see_class_scope of parent - let parent_can_see_class = self - .tables - .get(self.tables.len().saturating_sub(2)) - .map(|t| t.can_see_class_scope) - .unwrap_or(false); - if !is_generator - && !parent_can_see_class - && let Some(table) = self.tables.last_mut() - { - table.comp_inlined = true; - } + // PEP 709: inlined comprehensions are not yet implemented in the + // compiler (is_inlined_comprehension_context always returns false), + // so do NOT mark comp_inlined here. Setting it would cause the + // symbol-table analyzer to merge comprehension-local symbols into + // the parent scope, while the compiler still emits a separate code + // object — leading to the merged symbols being missing from the + // comprehension's own symbol table lookup. // Register the passed argument to the generator function as the name ".0" self.register_name(".0", SymbolUsage::Parameter, range)?;