Skip to content

Commit c578ac0

Browse files
authored
gc: add CollectResult, stats fields, get_referrers, and fix count reset (#7354)
* gc: add CollectResult, stats fields, get_referrers, and fix count reset - Add CollectResult struct with collected/uncollectable/candidates/duration - Add candidates and duration fields to GcStats and gc.get_stats() - Pass CollectResult to gc.callbacks info dict - Reset generation counts for all collected generations (0..=N) - Return 0 for third value in gc.get_threshold() (3.13+) - Implement gc.get_referrers() by scanning all tracked objects - Add DEBUG_COLLECTABLE output for collectable objects - Update test_gc.py to expect candidates/duration in stats * Update test_gc from v3.14.3 * Update test_gc.py from CPython v3.15.0a5 Taken from v3.15 (not v3.14.3) because get_stats() candidates/duration fields were added in 3.13+ and the corresponding test assertions only exist in 3.15. * Fix gc_state build on wasm32: skip Instant timing * Add candidates/duration to gc callback info, mark v3.15 test failures * Fix gc.get_referrers to exclude executing frames, fix Future cancelled exc leak - get_referrers: skip frame objects on the execution stack, since they are not GC-tracked in CPython (_PyInterpreterFrame) - _asyncio Future/Task make_cancelled_error_impl: clear the stored cancelled exception after returning it, matching the Python _make_cancelled_error behavior * Fix gc.get_threshold to return actual gen2 threshold value * Fix inconsistent GC count reset in early-return paths Use the same reset_end formula in unreachable-empty early returns as in the main collection path and collecting-empty path. * Accept keyword arguments in socket.__init__ Use a FromArgs struct instead of a positional-only tuple so that family, type, proto, and fileno can be passed as keyword arguments. * Disable comp_inlined in symbol table to match compiler The compiler does not yet implement PEP 709 inlined comprehensions (is_inlined_comprehension_context always returns false), but the symbol table was marking comprehensions as inlined. This mismatch could cause comprehension-local symbols to be merged into the parent scope while the compiler still looks them up in a separate scope. --------- Co-authored-by: CPython Developers <>
1 parent 331029e commit c578ac0

File tree

8 files changed

+349
-94
lines changed

8 files changed

+349
-94
lines changed

Lib/test/_test_gc_fast_cycles.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Run by test_gc.
2+
from test import support
3+
import _testinternalcapi
4+
import gc
5+
import unittest
6+
7+
class IncrementalGCTests(unittest.TestCase):
8+
9+
# Use small increments to emulate longer running process in a shorter time
10+
@support.gc_threshold(200, 10)
11+
def test_incremental_gc_handles_fast_cycle_creation(self):
12+
13+
class LinkedList:
14+
15+
#Use slots to reduce number of implicit objects
16+
__slots__ = "next", "prev", "surprise"
17+
18+
def __init__(self, next=None, prev=None):
19+
self.next = next
20+
if next is not None:
21+
next.prev = self
22+
self.prev = prev
23+
if prev is not None:
24+
prev.next = self
25+
26+
def make_ll(depth):
27+
head = LinkedList()
28+
for i in range(depth):
29+
head = LinkedList(head, head.prev)
30+
return head
31+
32+
head = make_ll(1000)
33+
34+
assert(gc.isenabled())
35+
olds = []
36+
initial_heap_size = _testinternalcapi.get_tracked_heap_size()
37+
for i in range(20_000):
38+
newhead = make_ll(20)
39+
newhead.surprise = head
40+
olds.append(newhead)
41+
if len(olds) == 20:
42+
new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
43+
self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
44+
del olds[:]
45+
46+
47+
if __name__ == "__main__":
48+
unittest.main()

Lib/test/test_gc.py

Lines changed: 98 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ def test_function(self):
236236
# is 3 because it includes f's code object.
237237
self.assertIn(gc.collect(), (2, 3))
238238

239+
# TODO: RUSTPYTHON - weakref clear ordering differs from 3.15+
240+
@unittest.expectedFailure
239241
def test_function_tp_clear_leaves_consistent_state(self):
240242
# https://github.com/python/cpython/issues/91636
241243
code = """if 1:
@@ -262,9 +264,11 @@ class Cyclic(tuple):
262264
# finalizer.
263265
def __del__(self):
264266
265-
# 5. Create a weakref to `func` now. If we had created
266-
# it earlier, it would have been cleared by the
267-
# garbage collector before calling the finalizers.
267+
# 5. Create a weakref to `func` now. In previous
268+
# versions of Python, this would avoid having it
269+
# cleared by the garbage collector before calling
270+
# the finalizers. Now, weakrefs get cleared after
271+
# calling finalizers.
268272
self[1].ref = weakref.ref(self[0])
269273
270274
# 6. Drop the global reference to `latefin`. The only
@@ -293,16 +297,42 @@ def func():
293297
# which will find `cyc` and `func` as garbage.
294298
gc.collect()
295299
296-
# 9. Previously, this would crash because `func_qualname`
297-
# had been NULL-ed out by func_clear().
300+
# 9. Previously, this would crash because the weakref
301+
# created in the finalizer revealed the function after
302+
# `tp_clear` was called and `func_qualname`
303+
# had been NULL-ed out by func_clear(). Now, we clear
304+
# weakrefs to unreachable objects before calling `tp_clear`
305+
# but after calling finalizers.
298306
print(f"{func=}")
299307
"""
300-
# We're mostly just checking that this doesn't crash.
301308
rc, stdout, stderr = assert_python_ok("-c", code)
302309
self.assertEqual(rc, 0)
303-
self.assertRegex(stdout, rb"""\A\s*func=<function at \S+>\s*\z""")
310+
# The `func` global is None because the weakref was cleared.
311+
self.assertRegex(stdout, rb"""\A\s*func=None""")
304312
self.assertFalse(stderr)
305313

314+
# TODO: RUSTPYTHON - _datetime module not available
315+
@unittest.expectedFailure
316+
def test_datetime_weakref_cycle(self):
317+
# https://github.com/python/cpython/issues/132413
318+
# If the weakref used by the datetime extension gets cleared by the GC (due to being
319+
# in an unreachable cycle) then datetime functions would crash (get_module_state()
320+
# was returning a NULL pointer). This bug is fixed by clearing weakrefs without
321+
# callbacks *after* running finalizers.
322+
code = """if 1:
323+
import _datetime
324+
class C:
325+
def __del__(self):
326+
print('__del__ called')
327+
_datetime.timedelta(days=1) # crash?
328+
329+
l = [C()]
330+
l.append(l)
331+
"""
332+
rc, stdout, stderr = assert_python_ok("-c", code)
333+
self.assertEqual(rc, 0)
334+
self.assertEqual(stdout.strip(), b'__del__ called')
335+
306336
@refcount_test
307337
def test_frame(self):
308338
def f():
@@ -652,9 +682,8 @@ def callback(ignored):
652682
gc.collect()
653683
self.assertEqual(len(ouch), 2) # else the callbacks didn't run
654684
for x in ouch:
655-
# If the callback resurrected one of these guys, the instance
656-
# would be damaged, with an empty __dict__.
657-
self.assertEqual(x, None)
685+
# The weakref should be cleared before executing the callback.
686+
self.assertIsNone(x)
658687

659688
def test_bug21435(self):
660689
# This is a poor test - its only virtue is that it happened to
@@ -821,11 +850,15 @@ def test_get_stats(self):
821850
self.assertEqual(len(stats), 3)
822851
for st in stats:
823852
self.assertIsInstance(st, dict)
824-
self.assertEqual(set(st),
825-
{"collected", "collections", "uncollectable"})
853+
self.assertEqual(
854+
set(st),
855+
{"collected", "collections", "uncollectable", "candidates", "duration"}
856+
)
826857
self.assertGreaterEqual(st["collected"], 0)
827858
self.assertGreaterEqual(st["collections"], 0)
828859
self.assertGreaterEqual(st["uncollectable"], 0)
860+
self.assertGreaterEqual(st["candidates"], 0)
861+
self.assertGreaterEqual(st["duration"], 0)
829862
# Check that collection counts are incremented correctly
830863
if gc.isenabled():
831864
self.addCleanup(gc.enable)
@@ -836,11 +869,25 @@ def test_get_stats(self):
836869
self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
837870
self.assertEqual(new[1]["collections"], old[1]["collections"])
838871
self.assertEqual(new[2]["collections"], old[2]["collections"])
872+
self.assertGreater(new[0]["duration"], old[0]["duration"])
873+
self.assertEqual(new[1]["duration"], old[1]["duration"])
874+
self.assertEqual(new[2]["duration"], old[2]["duration"])
875+
for stat in ["collected", "uncollectable", "candidates"]:
876+
self.assertGreaterEqual(new[0][stat], old[0][stat])
877+
self.assertEqual(new[1][stat], old[1][stat])
878+
self.assertEqual(new[2][stat], old[2][stat])
839879
gc.collect(2)
840-
new = gc.get_stats()
841-
self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
880+
old, new = new, gc.get_stats()
881+
self.assertEqual(new[0]["collections"], old[0]["collections"])
842882
self.assertEqual(new[1]["collections"], old[1]["collections"])
843883
self.assertEqual(new[2]["collections"], old[2]["collections"] + 1)
884+
self.assertEqual(new[0]["duration"], old[0]["duration"])
885+
self.assertEqual(new[1]["duration"], old[1]["duration"])
886+
self.assertGreater(new[2]["duration"], old[2]["duration"])
887+
for stat in ["collected", "uncollectable", "candidates"]:
888+
self.assertEqual(new[0][stat], old[0][stat])
889+
self.assertEqual(new[1][stat], old[1][stat])
890+
self.assertGreaterEqual(new[2][stat], old[2][stat])
844891

845892
def test_freeze(self):
846893
gc.freeze()
@@ -1156,6 +1203,37 @@ def test_something(self):
11561203
""")
11571204
assert_python_ok("-c", source)
11581205

1206+
def test_do_not_cleanup_type_subclasses_before_finalization(self):
1207+
# See https://github.com/python/cpython/issues/135552
1208+
# If we cleanup weakrefs for tp_subclasses before calling
1209+
# the finalizer (__del__) then the line `fail = BaseNode.next.next`
1210+
# should fail because we are trying to access a subclass
1211+
# attribute. But subclass type cache was not properly invalidated.
1212+
code = """
1213+
class BaseNode:
1214+
def __del__(self):
1215+
BaseNode.next = BaseNode.next.next
1216+
fail = BaseNode.next.next
1217+
1218+
class Node(BaseNode):
1219+
pass
1220+
1221+
BaseNode.next = Node()
1222+
BaseNode.next.next = Node()
1223+
"""
1224+
# this test checks garbage collection while interp
1225+
# finalization
1226+
assert_python_ok("-c", textwrap.dedent(code))
1227+
1228+
code_inside_function = textwrap.dedent(F"""
1229+
def test():
1230+
{textwrap.indent(code, ' ')}
1231+
1232+
test()
1233+
""")
1234+
# this test checks regular garbage collection
1235+
assert_python_ok("-c", code_inside_function)
1236+
11591237

11601238
@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
11611239
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
@@ -1260,9 +1338,11 @@ def test_collect(self):
12601338
# Check that we got the right info dict for all callbacks
12611339
for v in self.visit:
12621340
info = v[2]
1263-
self.assertTrue("generation" in info)
1264-
self.assertTrue("collected" in info)
1265-
self.assertTrue("uncollectable" in info)
1341+
self.assertIn("generation", info)
1342+
self.assertIn("collected", info)
1343+
self.assertIn("uncollectable", info)
1344+
self.assertIn("candidates", info)
1345+
self.assertIn("duration", info)
12661346

12671347
def test_collect_generation(self):
12681348
self.preclean()
@@ -1450,6 +1530,7 @@ def callback(ignored):
14501530
self.assertEqual(x, None)
14511531

14521532
@gc_threshold(1000, 0, 0)
1533+
@unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments")
14531534
def test_bug1055820d(self):
14541535
# Corresponds to temp2d.py in the bug report. This is very much like
14551536
# test_bug1055820c, but uses a __del__ method instead of a weakref

Lib/test/test_symtable.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,6 @@ def get_identifiers_recursive(self, st, res):
561561
for ch in st.get_children():
562562
self.get_identifiers_recursive(ch, res)
563563

564-
@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 2 != 1
565564
def test_loopvar_in_only_one_scope(self):
566565
# ensure that the loop variable appears only once in the symtable
567566
comps = [

crates/codegen/src/symboltable.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,20 +2037,13 @@ impl SymbolTableBuilder {
20372037
self.line_index_start(range),
20382038
);
20392039

2040-
// Mark non-generator comprehensions as inlined (PEP 709)
2041-
// inline_comp = entry->ste_comprehension && !entry->ste_generator && !ste->ste_can_see_class_scope
2042-
// We check is_generator and can_see_class_scope of parent
2043-
let parent_can_see_class = self
2044-
.tables
2045-
.get(self.tables.len().saturating_sub(2))
2046-
.map(|t| t.can_see_class_scope)
2047-
.unwrap_or(false);
2048-
if !is_generator
2049-
&& !parent_can_see_class
2050-
&& let Some(table) = self.tables.last_mut()
2051-
{
2052-
table.comp_inlined = true;
2053-
}
2040+
// PEP 709: inlined comprehensions are not yet implemented in the
2041+
// compiler (is_inlined_comprehension_context always returns false),
2042+
// so do NOT mark comp_inlined here. Setting it would cause the
2043+
// symbol-table analyzer to merge comprehension-local symbols into
2044+
// the parent scope, while the compiler still emits a separate code
2045+
// object — leading to the merged symbols being missing from the
2046+
// comprehension's own symbol table lookup.
20542047

20552048
// Register the passed argument to the generator function as the name ".0"
20562049
self.register_name(".0", SymbolUsage::Parameter, range)?;

crates/stdlib/src/_asyncio.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,8 @@ pub(crate) mod _asyncio {
499499
}
500500

501501
fn make_cancelled_error_impl(&self, vm: &VirtualMachine) -> PyBaseExceptionRef {
502-
if let Some(exc) = self.fut_cancelled_exc.read().clone()
502+
// If a saved CancelledError exists, take it (clearing the stored reference)
503+
if let Some(exc) = self.fut_cancelled_exc.write().take()
503504
&& let Ok(exc) = exc.downcast::<PyBaseException>()
504505
{
505506
return exc;
@@ -508,12 +509,10 @@ pub(crate) mod _asyncio {
508509
let msg = self.fut_cancel_msg.read().clone();
509510
let args = if let Some(m) = msg { vec![m] } else { vec![] };
510511

511-
let exc = match get_cancelled_error_type(vm) {
512+
match get_cancelled_error_type(vm) {
512513
Ok(cancelled_error) => vm.new_exception(cancelled_error, args),
513514
Err(_) => vm.new_runtime_error("cancelled"),
514-
};
515-
*self.fut_cancelled_exc.write() = Some(exc.clone().into());
516-
exc
515+
}
517516
}
518517

519518
fn schedule_callbacks(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult<()> {
@@ -1309,7 +1308,8 @@ pub(crate) mod _asyncio {
13091308
}
13101309

13111310
fn make_cancelled_error_impl(&self, vm: &VirtualMachine) -> PyBaseExceptionRef {
1312-
if let Some(exc) = self.base.fut_cancelled_exc.read().clone()
1311+
// If a saved CancelledError exists, take it (clearing the stored reference)
1312+
if let Some(exc) = self.base.fut_cancelled_exc.write().take()
13131313
&& let Ok(exc) = exc.downcast::<PyBaseException>()
13141314
{
13151315
return exc;
@@ -1318,12 +1318,10 @@ pub(crate) mod _asyncio {
13181318
let msg = self.base.fut_cancel_msg.read().clone();
13191319
let args = if let Some(m) = msg { vec![m] } else { vec![] };
13201320

1321-
let exc = match get_cancelled_error_type(vm) {
1321+
match get_cancelled_error_type(vm) {
13221322
Ok(cancelled_error) => vm.new_exception(cancelled_error, args),
13231323
Err(_) => vm.new_runtime_error("cancelled"),
1324-
};
1325-
*self.base.fut_cancelled_exc.write() = Some(exc.clone().into());
1326-
exc
1324+
}
13271325
}
13281326

13291327
#[pymethod]

crates/stdlib/src/socket.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,13 +1384,20 @@ mod _socket {
13841384

13851385
impl DefaultConstructor for PySocket {}
13861386

1387+
#[derive(FromArgs)]
1388+
pub struct SocketInitArgs {
1389+
#[pyarg(any, optional)]
1390+
family: OptionalArg<i32>,
1391+
#[pyarg(any, optional)]
1392+
r#type: OptionalArg<i32>,
1393+
#[pyarg(any, optional)]
1394+
proto: OptionalArg<i32>,
1395+
#[pyarg(any, optional)]
1396+
fileno: OptionalOption<PyObjectRef>,
1397+
}
1398+
13871399
impl Initializer for PySocket {
1388-
type Args = (
1389-
OptionalArg<i32>,
1390-
OptionalArg<i32>,
1391-
OptionalArg<i32>,
1392-
OptionalOption<PyObjectRef>,
1393-
);
1400+
type Args = SocketInitArgs;
13941401

13951402
fn init(zelf: PyRef<Self>, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> {
13961403
Self::_init(zelf, args, vm).map_err(|e| e.into_pyexception(vm))
@@ -1414,13 +1421,14 @@ mod _socket {
14141421
impl PySocket {
14151422
fn _init(
14161423
zelf: PyRef<Self>,
1417-
(family, socket_kind, proto, fileno): <Self as Initializer>::Args,
1424+
args: <Self as Initializer>::Args,
14181425
vm: &VirtualMachine,
14191426
) -> Result<(), IoOrPyException> {
1420-
let mut family = family.unwrap_or(-1);
1421-
let mut socket_kind = socket_kind.unwrap_or(-1);
1422-
let mut proto = proto.unwrap_or(-1);
1427+
let mut family = args.family.unwrap_or(-1);
1428+
let mut socket_kind = args.r#type.unwrap_or(-1);
1429+
let mut proto = args.proto.unwrap_or(-1);
14231430

1431+
let fileno = args.fileno;
14241432
let sock;
14251433

14261434
// On Windows, fileno can be bytes from socket.share() for fromshare()

0 commit comments

Comments
 (0)