From 7f951b4a78afd3dcdc33960d339f0f499e52282d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 31 Jan 2026 17:08:11 +0900 Subject: [PATCH 1/4] gc module internal structure and API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add gc_state module with GcState, GcGeneration, GcDebugFlags, GcStats. Replace gc module stubs with working API backed by gc_state. Add gc_callbacks and gc_garbage to Context. Add is_gc_tracked, gc_finalized, gc_get_referents to PyObject. Collection is stubbed (returns 0) — actual algorithm to follow. --- crates/stdlib/src/gc.rs | 248 +++++++++++++++--- crates/vm/Cargo.toml | 3 +- crates/vm/src/gc_state.rs | 473 +++++++++++++++++++++++++++++++++++ crates/vm/src/lib.rs | 1 + crates/vm/src/object/core.rs | 21 +- crates/vm/src/vm/context.rs | 12 + 6 files changed, 727 insertions(+), 31 deletions(-) create mode 100644 crates/vm/src/gc_state.rs diff --git a/crates/stdlib/src/gc.rs b/crates/stdlib/src/gc.rs index 9359119a603..4887e35f584 100644 --- a/crates/stdlib/src/gc.rs +++ b/crates/stdlib/src/gc.rs @@ -2,75 +2,265 @@ pub(crate) use gc::module_def; #[pymodule] mod gc { - use crate::vm::{PyResult, VirtualMachine, function::FuncArgs}; + use crate::vm::{ + PyObjectRef, PyResult, VirtualMachine, + builtins::PyListRef, + function::{FuncArgs, OptionalArg}, + gc_state, + }; + // Debug flag constants + #[pyattr] + const DEBUG_STATS: u32 = gc_state::GcDebugFlags::STATS.bits(); + #[pyattr] + const DEBUG_COLLECTABLE: u32 = gc_state::GcDebugFlags::COLLECTABLE.bits(); + #[pyattr] + const DEBUG_UNCOLLECTABLE: u32 = gc_state::GcDebugFlags::UNCOLLECTABLE.bits(); + #[pyattr] + const DEBUG_SAVEALL: u32 = gc_state::GcDebugFlags::SAVEALL.bits(); + #[pyattr] + const DEBUG_LEAK: u32 = gc_state::GcDebugFlags::LEAK.bits(); + + /// Enable automatic garbage collection. + #[pyfunction] + fn enable() { + gc_state::gc_state().enable(); + } + + /// Disable automatic garbage collection. + #[pyfunction] + fn disable() { + gc_state::gc_state().disable(); + } + + /// Return true if automatic gc is enabled. + #[pyfunction] + fn isenabled() -> bool { + gc_state::gc_state().is_enabled() + } + + /// Run a garbage collection. Returns the number of unreachable objects found. + #[derive(FromArgs)] + struct CollectArgs { + #[pyarg(any, optional)] + generation: OptionalArg, + } + + #[pyfunction] + fn collect(args: CollectArgs, vm: &VirtualMachine) -> PyResult { + let generation = args.generation; + let generation_num = generation.unwrap_or(2); + if !(0..=2).contains(&generation_num) { + return Err(vm.new_value_error("invalid generation".to_owned())); + } + + // Invoke callbacks with "start" phase + invoke_callbacks(vm, "start", generation_num as usize, 0, 0); + + // Manual gc.collect() should run even if GC is disabled + let gc = gc_state::gc_state(); + let (collected, uncollectable) = gc.collect_force(generation_num as usize); + + // Move objects from gc_state.garbage to vm.ctx.gc_garbage (for DEBUG_SAVEALL) + { + let mut state_garbage = gc.garbage.lock(); + if !state_garbage.is_empty() { + let py_garbage = &vm.ctx.gc_garbage; + let mut garbage_vec = py_garbage.borrow_vec_mut(); + for obj in state_garbage.drain(..) { + garbage_vec.push(obj); + } + } + } + + // Invoke callbacks with "stop" phase + invoke_callbacks( + vm, + "stop", + generation_num as usize, + collected, + uncollectable, + ); + + Ok(collected as i32) + } + + /// Return the current collection thresholds as a tuple. #[pyfunction] - fn collect(_args: FuncArgs, _vm: &VirtualMachine) -> i32 { - 0 + fn get_threshold(vm: &VirtualMachine) -> PyObjectRef { + let (t0, t1, t2) = gc_state::gc_state().get_threshold(); + vm.ctx + .new_tuple(vec![ + vm.ctx.new_int(t0).into(), + vm.ctx.new_int(t1).into(), + vm.ctx.new_int(t2).into(), + ]) + .into() } + /// Set the collection thresholds. #[pyfunction] - fn isenabled(_args: FuncArgs, _vm: &VirtualMachine) -> bool { - false + fn set_threshold(threshold0: u32, threshold1: OptionalArg, threshold2: OptionalArg) { + gc_state::gc_state().set_threshold( + threshold0, + threshold1.into_option(), + threshold2.into_option(), + ); } + /// Return the current collection counts as a tuple. #[pyfunction] - fn enable(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_count(vm: &VirtualMachine) -> PyObjectRef { + let (c0, c1, c2) = gc_state::gc_state().get_count(); + vm.ctx + .new_tuple(vec![ + vm.ctx.new_int(c0).into(), + vm.ctx.new_int(c1).into(), + vm.ctx.new_int(c2).into(), + ]) + .into() } + /// Return the current debugging flags. #[pyfunction] - fn disable(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_debug() -> u32 { + gc_state::gc_state().get_debug().bits() } + /// Set the debugging flags. #[pyfunction] - fn get_count(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn set_debug(flags: u32) { + gc_state::gc_state().set_debug(gc_state::GcDebugFlags::from_bits_truncate(flags)); } + /// Return a list of per-generation gc stats. #[pyfunction] - fn get_debug(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_stats(vm: &VirtualMachine) -> PyResult { + let stats = gc_state::gc_state().get_stats(); + let mut result = Vec::with_capacity(3); + + for stat in stats.iter() { + let dict = vm.ctx.new_dict(); + dict.set_item("collections", vm.ctx.new_int(stat.collections).into(), vm)?; + dict.set_item("collected", vm.ctx.new_int(stat.collected).into(), vm)?; + dict.set_item( + "uncollectable", + vm.ctx.new_int(stat.uncollectable).into(), + vm, + )?; + result.push(dict.into()); + } + + Ok(vm.ctx.new_list(result)) + } + + /// Return the list of objects tracked by the collector. + #[derive(FromArgs)] + struct GetObjectsArgs { + #[pyarg(any, optional)] + generation: OptionalArg>, } #[pyfunction] - fn get_objects(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_objects(args: GetObjectsArgs, vm: &VirtualMachine) -> PyResult { + let generation_opt = args.generation.flatten(); + if let Some(g) = generation_opt + && !(0..=2).contains(&g) + { + return Err(vm.new_value_error(format!("generation must be in range(0, 3), not {}", g))); + } + let objects = gc_state::gc_state().get_objects(generation_opt); + Ok(vm.ctx.new_list(objects)) } + /// Return the list of objects directly referred to by any of the arguments. #[pyfunction] - fn get_referents(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_referents(args: FuncArgs, vm: &VirtualMachine) -> PyListRef { + let mut result = Vec::new(); + + for obj in args.args { + // Use the gc_get_referents method to get references + result.extend(obj.gc_get_referents()); + } + + vm.ctx.new_list(result) } + /// Return the list of objects that directly refer to any of the arguments. #[pyfunction] - fn get_referrers(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_referrers(args: FuncArgs, vm: &VirtualMachine) -> PyListRef { + // This is expensive: we need to scan all tracked objects + // For now, return an empty list (would need full object tracking to implement) + let _ = args; + vm.ctx.new_list(vec![]) } + /// Return True if the object is tracked by the garbage collector. #[pyfunction] - fn get_stats(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn is_tracked(obj: PyObjectRef) -> bool { + // An object is tracked if it has IS_TRACE = true (has a trace function) + obj.is_gc_tracked() } + /// Return True if the object has been finalized by the garbage collector. #[pyfunction] - fn get_threshold(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn is_finalized(obj: PyObjectRef) -> bool { + // Check the per-object finalized flag directly + obj.gc_finalized() } + /// Freeze all objects tracked by gc. #[pyfunction] - fn is_tracked(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn freeze() { + gc_state::gc_state().freeze(); } + /// Unfreeze all objects in the permanent generation. #[pyfunction] - fn set_debug(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn unfreeze() { + gc_state::gc_state().unfreeze(); } + /// Return the number of objects in the permanent generation. #[pyfunction] - fn set_threshold(_args: FuncArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("")) + fn get_freeze_count() -> usize { + gc_state::gc_state().get_freeze_count() + } + + /// gc.garbage - list of uncollectable objects + #[pyattr] + fn garbage(vm: &VirtualMachine) -> PyListRef { + vm.ctx.gc_garbage.clone() + } + + /// gc.callbacks - list of callbacks to be invoked + #[pyattr] + fn callbacks(vm: &VirtualMachine) -> PyListRef { + vm.ctx.gc_callbacks.clone() + } + + /// Helper function to invoke GC callbacks + fn invoke_callbacks( + vm: &VirtualMachine, + phase: &str, + generation: usize, + collected: usize, + uncollectable: usize, + ) { + let callbacks_list = &vm.ctx.gc_callbacks; + let callbacks: Vec = callbacks_list.borrow_vec().to_vec(); + if callbacks.is_empty() { + return; + } + + let phase_str: PyObjectRef = vm.ctx.new_str(phase).into(); + let info = vm.ctx.new_dict(); + let _ = info.set_item("generation", vm.ctx.new_int(generation).into(), vm); + let _ = info.set_item("collected", vm.ctx.new_int(collected).into(), vm); + let _ = info.set_item("uncollectable", vm.ctx.new_int(uncollectable).into(), vm); + + for callback in callbacks { + let _ = callback.call((phase_str.clone(), info.clone()), vm); + } } } diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index da01eff65b9..6e05c7cde48 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -10,7 +10,7 @@ repository.workspace = true license.workspace = true [features] -default = ["compiler", "wasmbind", "stdio"] +default = ["compiler", "wasmbind", "stdio", "gc"] stdio = [] importlib = [] encodings = ["importlib"] @@ -19,6 +19,7 @@ flame-it = ["flame", "flamer"] freeze-stdlib = ["encodings"] jit = ["rustpython-jit"] threading = ["rustpython-common/threading"] +gc = [] compiler = ["parser", "codegen", "rustpython-compiler"] ast = ["ruff_python_ast", "ruff_text_size"] codegen = ["rustpython-codegen", "ast"] diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs new file mode 100644 index 00000000000..54b362b5934 --- /dev/null +++ b/crates/vm/src/gc_state.rs @@ -0,0 +1,473 @@ +//! Garbage Collection State and Algorithm +//! +//! This module implements CPython-compatible generational garbage collection +//! for RustPython, using an intrusive doubly-linked list approach. + +use crate::common::lock::PyMutex; +use crate::{PyObject, PyObjectRef}; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering}; +use std::collections::HashSet; +use std::sync::{Mutex, RwLock}; + +bitflags::bitflags! { + /// GC debug flags (see Include/internal/pycore_gc.h) + #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] + pub struct GcDebugFlags: u32 { + /// Print collection statistics + const STATS = 1 << 0; + /// Print collectable objects + const COLLECTABLE = 1 << 1; + /// Print uncollectable objects + const UNCOLLECTABLE = 1 << 2; + /// Save all garbage in gc.garbage + const SAVEALL = 1 << 5; + /// DEBUG_COLLECTABLE | DEBUG_UNCOLLECTABLE | DEBUG_SAVEALL + const LEAK = Self::COLLECTABLE.bits() | Self::UNCOLLECTABLE.bits() | Self::SAVEALL.bits(); + } +} + + +/// Statistics for a single generation (gc_generation_stats) +#[derive(Debug, Default, Clone, Copy)] +pub struct GcStats { + pub collections: usize, + pub collected: usize, + pub uncollectable: usize, +} + +/// A single GC generation with intrusive linked list +pub struct GcGeneration { + /// Number of objects in this generation + count: AtomicUsize, + /// Threshold for triggering collection + threshold: AtomicU32, + /// Collection statistics + stats: PyMutex, +} + +impl GcGeneration { + pub const fn new(threshold: u32) -> Self { + Self { + count: AtomicUsize::new(0), + threshold: AtomicU32::new(threshold), + stats: PyMutex::new(GcStats { + collections: 0, + collected: 0, + uncollectable: 0, + }), + } + } + + pub fn count(&self) -> usize { + self.count.load(Ordering::SeqCst) + } + + pub fn threshold(&self) -> u32 { + self.threshold.load(Ordering::SeqCst) + } + + pub fn set_threshold(&self, value: u32) { + self.threshold.store(value, Ordering::SeqCst); + } + + pub fn stats(&self) -> GcStats { + let guard = self.stats.lock(); + GcStats { + collections: guard.collections, + collected: guard.collected, + uncollectable: guard.uncollectable, + } + } + + pub fn update_stats(&self, collected: usize, uncollectable: usize) { + let mut guard = self.stats.lock(); + guard.collections += 1; + guard.collected += collected; + guard.uncollectable += uncollectable; + } +} + +/// Wrapper for raw pointer to make it Send + Sync +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +struct GcObjectPtr(NonNull); + +// SAFETY: We only use this for tracking objects, and proper synchronization is used +unsafe impl Send for GcObjectPtr {} +unsafe impl Sync for GcObjectPtr {} + +/// Global GC state +pub struct GcState { + /// 3 generations (0 = youngest, 2 = oldest) + pub generations: [GcGeneration; 3], + /// Permanent generation (frozen objects) + pub permanent: GcGeneration, + /// GC enabled flag + pub enabled: AtomicBool, + /// Per-generation object tracking (for correct gc_refs algorithm) + /// Objects start in gen0, survivors move to gen1, then gen2 + generation_objects: [RwLock>; 3], + /// Frozen/permanent objects (excluded from normal GC) + permanent_objects: RwLock>, + /// Debug flags + pub debug: AtomicU32, + /// gc.garbage list (uncollectable objects with __del__) + pub garbage: PyMutex>, + /// gc.callbacks list + pub callbacks: PyMutex>, + /// Mutex for collection (prevents concurrent collections). + /// Used by collect_inner when the actual collection algorithm is enabled. + #[allow(dead_code)] + collecting: Mutex<()>, + /// Allocation counter for gen0 + alloc_count: AtomicUsize, + /// Registry of all tracked objects (for cycle detection) + tracked_objects: RwLock>, + /// Objects that have been finalized (__del__ already called) + /// Prevents calling __del__ multiple times on resurrected objects + finalized_objects: RwLock>, +} + +// SAFETY: All fields are either inherently Send/Sync (atomics, RwLock, Mutex) or protected by PyMutex. +// PyMutex> is safe to share/send across threads because access is synchronized. +// PyObjectRef itself is Send, and interior mutability is guarded by the mutex. +unsafe impl Send for GcState {} +unsafe impl Sync for GcState {} + +impl Default for GcState { + fn default() -> Self { + Self::new() + } +} + +impl GcState { + pub fn new() -> Self { + Self { + generations: [ + GcGeneration::new(2000), // young + GcGeneration::new(10), // old[0] + GcGeneration::new(0), // old[1] + ], + permanent: GcGeneration::new(0), + enabled: AtomicBool::new(true), + generation_objects: [ + RwLock::new(HashSet::new()), + RwLock::new(HashSet::new()), + RwLock::new(HashSet::new()), + ], + permanent_objects: RwLock::new(HashSet::new()), + debug: AtomicU32::new(0), + garbage: PyMutex::new(Vec::new()), + callbacks: PyMutex::new(Vec::new()), + collecting: Mutex::new(()), + alloc_count: AtomicUsize::new(0), + tracked_objects: RwLock::new(HashSet::new()), + finalized_objects: RwLock::new(HashSet::new()), + } + } + + /// Check if GC is enabled + pub fn is_enabled(&self) -> bool { + self.enabled.load(Ordering::SeqCst) + } + + /// Enable GC + pub fn enable(&self) { + self.enabled.store(true, Ordering::SeqCst); + } + + /// Disable GC + pub fn disable(&self) { + self.enabled.store(false, Ordering::SeqCst); + } + + /// Get debug flags + pub fn get_debug(&self) -> GcDebugFlags { + GcDebugFlags::from_bits_truncate(self.debug.load(Ordering::SeqCst)) + } + + /// Set debug flags + pub fn set_debug(&self, flags: GcDebugFlags) { + self.debug.store(flags.bits(), Ordering::SeqCst); + } + + /// Get thresholds for all generations + pub fn get_threshold(&self) -> (u32, u32, u32) { + ( + self.generations[0].threshold(), + self.generations[1].threshold(), + self.generations[2].threshold(), + ) + } + + /// Set thresholds + pub fn set_threshold(&self, t0: u32, t1: Option, t2: Option) { + self.generations[0].set_threshold(t0); + if let Some(t1) = t1 { + self.generations[1].set_threshold(t1); + } + if let Some(t2) = t2 { + self.generations[2].set_threshold(t2); + } + } + + /// Get counts for all generations + pub fn get_count(&self) -> (usize, usize, usize) { + ( + self.generations[0].count(), + self.generations[1].count(), + self.generations[2].count(), + ) + } + + /// Get statistics for all generations + pub fn get_stats(&self) -> [GcStats; 3] { + [ + self.generations[0].stats(), + self.generations[1].stats(), + self.generations[2].stats(), + ] + } + + /// Track a new object (add to gen0) + /// Called when IS_TRACE objects are created + /// + /// # Safety + /// obj must be a valid pointer to a PyObject + pub unsafe fn track_object(&self, obj: NonNull) { + let gc_ptr = GcObjectPtr(obj); + + // Add to generation 0 tracking first (for correct gc_refs algorithm) + // Only increment count if we successfully add to the set + if let Ok(mut gen0) = self.generation_objects[0].write() + && gen0.insert(gc_ptr) + { + self.generations[0].count.fetch_add(1, Ordering::SeqCst); + self.alloc_count.fetch_add(1, Ordering::SeqCst); + } + + // Also add to global tracking (for get_objects, etc.) + if let Ok(mut tracked) = self.tracked_objects.write() { + tracked.insert(gc_ptr); + } + } + + /// Untrack an object (remove from GC lists) + /// Called when objects are deallocated + /// + /// # Safety + /// obj must be a valid pointer to a PyObject + pub unsafe fn untrack_object(&self, obj: NonNull) { + let gc_ptr = GcObjectPtr(obj); + + // Remove from generation tracking lists and decrement the correct generation's count + for (gen_idx, generation) in self.generation_objects.iter().enumerate() { + if let Ok(mut gen_set) = generation.write() + && gen_set.remove(&gc_ptr) + { + // Decrement count for the generation we removed from + let count = self.generations[gen_idx].count.load(Ordering::SeqCst); + if count > 0 { + self.generations[gen_idx] + .count + .fetch_sub(1, Ordering::SeqCst); + } + break; // Object can only be in one generation + } + } + + // Remove from global tracking + if let Ok(mut tracked) = self.tracked_objects.write() { + tracked.remove(&gc_ptr); + } + + // Remove from finalized set + if let Ok(mut finalized) = self.finalized_objects.write() { + finalized.remove(&gc_ptr); + } + } + + /// Check if an object has been finalized + pub fn is_finalized(&self, obj: NonNull) -> bool { + let gc_ptr = GcObjectPtr(obj); + if let Ok(finalized) = self.finalized_objects.read() { + finalized.contains(&gc_ptr) + } else { + false + } + } + + /// Mark an object as finalized + pub fn mark_finalized(&self, obj: NonNull) { + let gc_ptr = GcObjectPtr(obj); + if let Ok(mut finalized) = self.finalized_objects.write() { + finalized.insert(gc_ptr); + } + } + + /// Get tracked objects (for gc.get_objects) + /// If generation is None, returns all tracked objects. + /// If generation is Some(n), returns objects in generation n only. + pub fn get_objects(&self, generation: Option) -> Vec { + match generation { + None => { + // Return all tracked objects + if let Ok(tracked) = self.tracked_objects.read() { + tracked + .iter() + .filter_map(|ptr| { + let obj = unsafe { ptr.0.as_ref() }; + if obj.strong_count() > 0 { + Some(obj.to_owned()) + } else { + None + } + }) + .collect() + } else { + Vec::new() + } + } + Some(g) if (0..=2).contains(&g) => { + // Return objects in specific generation + let gen_idx = g as usize; + if let Ok(gen_set) = self.generation_objects[gen_idx].read() { + gen_set + .iter() + .filter_map(|ptr| { + let obj = unsafe { ptr.0.as_ref() }; + if obj.strong_count() > 0 { + Some(obj.to_owned()) + } else { + None + } + }) + .collect() + } else { + Vec::new() + } + } + _ => Vec::new(), + } + } + + /// Check if automatic GC should run and run it if needed. + /// Called after object allocation. + /// Currently a stub — returns false. + pub fn maybe_collect(&self) -> bool { + false + } + + /// Perform garbage collection on the given generation. + /// Returns (collected_count, uncollectable_count). + /// + /// Currently a stub — the actual collection algorithm requires EBR + /// and will be added in a follow-up. + pub fn collect(&self, _generation: usize) -> (usize, usize) { + (0, 0) + } + + /// Force collection even if GC is disabled (for manual gc.collect() calls). + /// Currently a stub. + pub fn collect_force(&self, _generation: usize) -> (usize, usize) { + (0, 0) + } + + /// Get count of frozen objects + pub fn get_freeze_count(&self) -> usize { + self.permanent.count() + } + + /// Freeze all tracked objects (move to permanent generation) + pub fn freeze(&self) { + // Move all objects from gen0-2 to permanent + let mut objects_to_freeze: Vec = Vec::new(); + + for (gen_idx, generation) in self.generation_objects.iter().enumerate() { + if let Ok(mut gen_set) = generation.write() { + objects_to_freeze.extend(gen_set.drain()); + self.generations[gen_idx].count.store(0, Ordering::SeqCst); + } + } + + // Add to permanent set + if let Ok(mut permanent) = self.permanent_objects.write() { + let count = objects_to_freeze.len(); + for ptr in objects_to_freeze { + permanent.insert(ptr); + } + self.permanent.count.fetch_add(count, Ordering::SeqCst); + } + } + + /// Unfreeze all objects (move from permanent to gen2) + pub fn unfreeze(&self) { + let mut objects_to_unfreeze: Vec = Vec::new(); + + if let Ok(mut permanent) = self.permanent_objects.write() { + objects_to_unfreeze.extend(permanent.drain()); + self.permanent.count.store(0, Ordering::SeqCst); + } + + // Add to generation 2 + if let Ok(mut gen2) = self.generation_objects[2].write() { + let count = objects_to_unfreeze.len(); + for ptr in objects_to_unfreeze { + gen2.insert(ptr); + } + self.generations[2].count.fetch_add(count, Ordering::SeqCst); + } + } +} + +use std::sync::OnceLock; + +/// Global GC state instance +/// Using a static because GC needs to be accessible from object allocation/deallocation +static GC_STATE: OnceLock = OnceLock::new(); + +/// Get a reference to the global GC state +pub fn gc_state() -> &'static GcState { + GC_STATE.get_or_init(GcState::new) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gc_state_default() { + let state = GcState::new(); + assert!(state.is_enabled()); + assert_eq!(state.get_debug(), GcDebugFlags::empty()); + assert_eq!(state.get_threshold(), (2000, 10, 0)); + assert_eq!(state.get_count(), (0, 0, 0)); + } + + #[test] + fn test_gc_enable_disable() { + let state = GcState::new(); + assert!(state.is_enabled()); + state.disable(); + assert!(!state.is_enabled()); + state.enable(); + assert!(state.is_enabled()); + } + + #[test] + fn test_gc_threshold() { + let state = GcState::new(); + state.set_threshold(100, Some(20), Some(30)); + assert_eq!(state.get_threshold(), (100, 20, 30)); + } + + #[test] + fn test_gc_debug_flags() { + let state = GcState::new(); + state.set_debug(GcDebugFlags::STATS | GcDebugFlags::COLLECTABLE); + assert_eq!( + state.get_debug(), + GcDebugFlags::STATS | GcDebugFlags::COLLECTABLE + ); + } +} diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index aaa94ff52c1..a458bc0cbc0 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -77,6 +77,7 @@ pub mod py_io; #[cfg(feature = "serde")] pub mod py_serde; +pub mod gc_state; pub mod readline; pub mod recursion; pub mod scope; diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 43b2f7dc61a..c3493271ead 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -811,7 +811,7 @@ impl PyObject { /// Check if the object has been finalized (__del__ already called). /// _PyGC_FINALIZED in Py_GIL_DISABLED mode. #[inline] - fn gc_finalized(&self) -> bool { + pub fn gc_finalized(&self) -> bool { use core::sync::atomic::Ordering::Relaxed; GcBits::from_bits_retain(self.0.gc_bits.load(Relaxed)).contains(GcBits::FINALIZED) } @@ -898,6 +898,25 @@ impl PyObject { pub(crate) fn set_slot(&self, offset: usize, value: Option) { *self.0.slots[offset].write() = value; } + + /// Check if this object is tracked by the garbage collector. + /// Returns true if the object has a trace function or has an instance dict. + pub fn is_gc_tracked(&self) -> bool { + if self.0.vtable.trace.is_some() { + return true; + } + self.0.dict.is_some() + } + + /// Get the referents (objects directly referenced) of this object. + /// Uses the full traverse including dict and slots. + pub fn gc_get_referents(&self) -> Vec { + let mut result = Vec::new(); + self.0.traverse(&mut |child: &PyObject| { + result.push(child.to_owned()); + }); + result + } } impl Borrow for PyObjectRef { diff --git a/crates/vm/src/vm/context.rs b/crates/vm/src/vm/context.rs index f4368e91a1d..34ad66c53c0 100644 --- a/crates/vm/src/vm/context.rs +++ b/crates/vm/src/vm/context.rs @@ -51,6 +51,10 @@ pub struct Context { pub(crate) string_pool: StringPool, pub(crate) slot_new_wrapper: PyMethodDef, pub names: ConstName, + + // GC module state (callbacks and garbage lists) + pub gc_callbacks: PyListRef, + pub gc_garbage: PyListRef, } macro_rules! declare_const_name { @@ -333,6 +337,11 @@ impl Context { let empty_str = unsafe { string_pool.intern("", types.str_type.to_owned()) }; let empty_bytes = create_object(PyBytes::from(Vec::new()), types.bytes_type); + + // GC callbacks and garbage lists + let gc_callbacks = PyRef::new_ref(PyList::default(), types.list_type.to_owned(), None); + let gc_garbage = PyRef::new_ref(PyList::default(), types.list_type.to_owned(), None); + Self { true_value, false_value, @@ -352,6 +361,9 @@ impl Context { string_pool, slot_new_wrapper, names, + + gc_callbacks, + gc_garbage, } } From 2d5be2f522121002033edc714c47c55a25762996 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 31 Jan 2026 17:37:49 +0900 Subject: [PATCH 2/4] fix dict/weakref/generators --- .cspell.dict/cpython.txt | 2 ++ Lib/test/test_dict.py | 2 -- Lib/test/test_generators.py | 1 - Lib/test/test_weakref.py | 4 ---- 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 7574d45c5f1..94c760a7998 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -147,6 +147,7 @@ repr resinfo Rshift SA_ONSTACK +saveall scls setdict setfunc @@ -178,6 +179,7 @@ Typeparam typeparams typeslots unaryop +uncollectable Unhandle unparse unparser diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 9598a7ab962..ce0f09dd763 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -369,8 +369,6 @@ def test_copy_fuzz(self): self.assertNotEqual(d, d2) self.assertEqual(len(d2), len(d) + 1) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_copy_maintains_tracking(self): class A: pass diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 853767135aa..5559d58cad4 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -176,7 +176,6 @@ def f(): g.send(0) self.assertEqual(next(g), 1) - @unittest.expectedFailure # TODO: RUSTPYTHON; NotImplementedError def test_handle_frame_object_in_creation(self): #Attempt to expose partially constructed frames diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index bfecca43909..1eb01d16226 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -846,11 +846,9 @@ def cb(self, ignore): gc.collect() self.assertEqual(alist, []) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_gc_during_ref_creation(self): self.check_gc_during_creation(weakref.ref) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_gc_during_proxy_creation(self): self.check_gc_during_creation(weakref.proxy) @@ -1365,11 +1363,9 @@ def check_len_race(self, dict_type, cons): self.assertGreaterEqual(n2, 0) self.assertLessEqual(n2, n1) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weak_keyed_len_race(self): self.check_len_race(weakref.WeakKeyDictionary, lambda k: (k, 1)) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weak_valued_len_race(self): self.check_len_race(weakref.WeakValueDictionary, lambda k: (1, k)) From 4ff9fb86ca5e3765a72b7a05830173768bff9cb5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 31 Jan 2026 17:57:32 +0900 Subject: [PATCH 3/4] unmark test_asyncio --- Lib/test/_test_multiprocessing.py | 2 -- Lib/test/test_asyncio/test_futures.py | 6 ------ Lib/test/test_subprocess.py | 1 - Lib/test/test_weakset.py | 2 -- Lib/test/test_zoneinfo/test_zoneinfo.py | 2 -- 5 files changed, 13 deletions(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 0855e384f24..d1234448da5 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4057,8 +4057,6 @@ def test_heap(self): self.assertEqual(len(heap._allocated_blocks), 0, heap._allocated_blocks) self.assertEqual(len(heap._len_to_seq), 0) - # TODO: RUSTPYTHON - gc.enable() not implemented - @unittest.expectedFailure def test_free_from_gc(self): # Check that freeing of blocks by the garbage collector doesn't deadlock # (issue #12352). diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 571fbace020..ec3029983b7 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -678,8 +678,6 @@ def __del__(self): fut = self._new_future(loop=self.loop) fut.set_result(Evil()) - # TODO: RUSTPYTHON - gc.get_referrers not implemented - @unittest.expectedFailure def test_future_cancelled_result_refcycles(self): f = self._new_future(loop=self.loop) f.cancel() @@ -691,8 +689,6 @@ def test_future_cancelled_result_refcycles(self): self.assertIsNotNone(exc) self.assertListEqual(gc.get_referrers(exc), []) - # TODO: RUSTPYTHON - gc.get_referrers not implemented - @unittest.expectedFailure def test_future_cancelled_exception_refcycles(self): f = self._new_future(loop=self.loop) f.cancel() @@ -720,8 +716,6 @@ def test_future_del_segfault(self): with self.assertRaises(AttributeError): del fut._log_traceback - # TODO: RUSTPYTHON - gc.get_referents not implemented - @unittest.expectedFailure def test_future_iter_get_referents_segfault(self): # See https://github.com/python/cpython/issues/122695 import _asyncio diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 4d058652723..e58ea9c20ea 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -2445,7 +2445,6 @@ def raise_it(): stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=raise_it) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_preexec_gc_module_failure(self): # This tests the code that disables garbage collection if the child # process will execute any Python. diff --git a/Lib/test/test_weakset.py b/Lib/test/test_weakset.py index 5e8cacc09dc..af9bbe7cd41 100644 --- a/Lib/test/test_weakset.py +++ b/Lib/test/test_weakset.py @@ -425,8 +425,6 @@ def test_len_cycles(self): self.assertIn(n1, (0, 1)) self.assertEqual(n2, 0) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_len_race(self): # Extended sanity checks for len() in the face of cyclic collection self.addCleanup(gc.set_threshold, *gc.get_threshold()) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index e05bd046e83..46aa42063a4 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1937,8 +1937,6 @@ def test_cache_location(self): self.assertFalse(hasattr(c_zoneinfo.ZoneInfo, "_weak_cache")) self.assertTrue(hasattr(py_zoneinfo.ZoneInfo, "_weak_cache")) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_gc_tracked(self): import gc From 06c8270640467bac67552a8f02c4e4454aa37562 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 31 Jan 2026 23:03:33 +0900 Subject: [PATCH 4/4] apply review --- crates/vm/src/gc_state.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/vm/src/gc_state.rs b/crates/vm/src/gc_state.rs index 54b362b5934..b4f9165ea17 100644 --- a/crates/vm/src/gc_state.rs +++ b/crates/vm/src/gc_state.rs @@ -27,7 +27,6 @@ bitflags::bitflags! { } } - /// Statistics for a single generation (gc_generation_stats) #[derive(Debug, Default, Clone, Copy)] pub struct GcStats { @@ -281,6 +280,16 @@ impl GcState { tracked.remove(&gc_ptr); } + // Remove from permanent tracking + if let Ok(mut permanent) = self.permanent_objects.write() + && permanent.remove(&gc_ptr) + { + let count = self.permanent.count.load(Ordering::SeqCst); + if count > 0 { + self.permanent.count.fetch_sub(1, Ordering::SeqCst); + } + } + // Remove from finalized set if let Ok(mut finalized) = self.finalized_objects.write() { finalized.remove(&gc_ptr);