Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ struct _gc_runtime_state {
/* a permanent generation which won't be collected */
struct gc_generation permanent_generation;
struct gc_stats *generation_stats;
#ifdef Py_GIL_DISABLED
/* Serializes access to generation_stats between gc_get_stats_impl()
(reader) and gc_collect_main() (writer) so they can run concurrently
under free-threading without a data race (gh-151646). */
PyMutex stats_mutex;
#endif
/* true if we are currently running the collector */
int collecting;
// The frame that started the current collection. It might be NULL even when
Expand Down
107 changes: 107 additions & 0 deletions Lib/test/test_free_threading/test_gc_get_stats_race.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Reproduction for the gc.get_stats() data race under free-threading.
#
# CPython issue gh-151646: in free-threading builds (``--disable-gil``) the
# concurrent collector and ``gc.get_stats()`` touch the same per-generation
# statistics struct (``gcstate->generation_stats``) without any synchronisation:
#
# * READER -- ``gc_get_stats_impl()`` in ``Modules/gcmodule.c`` copies the
# ``struct gc_generation_stats`` for each generation (``collections``,
# ``collected``, ``uncollectable``, ``candidates``, ``duration``) with a
# plain struct assignment and no lock.
#
# * WRITER -- at the end of every collection cycle ``gc_collect_main()`` in
# ``Python/gc_free_threading.c`` mutates that very struct in place
# (``stats->collections++``, ``stats->collected += m``, ...), again with no
# lock.
#
# When one thread is collecting while several others read the stats, the
# unsynchronised read/write to the same memory is a data race. The values are
# only statistics, so the race is benign at the Python level (no crash), which
# is exactly what lets this script double as a regression test: once the fix
# adds proper synchronisation, ThreadSanitizer should stay quiet while the
# script keeps exiting cleanly.
#
# Running this script under a free-threading CPython build compiled with
# ThreadSanitizer (``./configure --disable-gil --with-thread-sanitizer``) is
# expected to print a ``WARNING: ThreadSanitizer: data race`` report whose stack
# traces point at ``gc_get_stats_impl`` (gcmodule.c) and ``gc_collect_main``
# (gc_free_threading.c).
#
# Standalone usage:
#
# ./python Lib/test/test_free_threading/test_gc_get_stats_race.py
#
# or as part of the test suite (only meaningful under a TSAN build):
#
# ./python -m test test_free_threading.test_gc_get_stats_race

import gc
import threading
import unittest

from test.support import threading_helper


# One thread hammers gc.collect() (the writer side); enough reader threads run
# gc.get_stats() concurrently to make the race easy to observe. The iteration
# counts are deliberately fixed and finite so the script always terminates.
NUM_COLLECTORS = 1
NUM_READERS = 8
ITERATIONS = 50_000


def _stress_get_stats_race(num_collectors=NUM_COLLECTORS,
num_readers=NUM_READERS,
iterations=ITERATIONS):
"""Race gc.collect() against gc.get_stats() and return collected stats."""

# Synchronise the start so collectors and readers overlap for as long as
# possible, maximising the chance of the read and write landing on the
# statistics struct at the same time.
barrier = threading.Barrier(num_collectors + num_readers)

def collector():
barrier.wait()
for _ in range(iterations):
# Writer: each full collection updates gcstate->generation_stats.
gc.collect()

def reader():
barrier.wait()
for _ in range(iterations):
# Reader: copies the per-generation stats structs with no lock.
gc.get_stats()

threads = [threading.Thread(target=collector) for _ in range(num_collectors)]
threads += [threading.Thread(target=reader) for _ in range(num_readers)]

with threading_helper.start_threads(threads):
pass


@threading_helper.requires_working_threading()
class TestGCGetStatsRace(unittest.TestCase):
def test_get_stats_collect_race(self):
# Use a reduced iteration count under the regular test suite so the test
# stays reasonably quick while still exercising the race; the standalone
# __main__ path below uses the full ITERATIONS for a reliable repro.
_stress_get_stats_race(iterations=2_000)

# The race is benign at the Python level: gc.get_stats() must still
# return well-formed data and the interpreter must not crash.
stats = gc.get_stats()
self.assertEqual(len(stats), 3)
for generation in stats:
self.assertIn("collections", generation)
self.assertIn("collected", generation)
self.assertIn("uncollectable", generation)


if __name__ == "__main__":
# Standalone reproduction: run the full-size race and exit cleanly so the
# script can be reused as a regression check once the fix lands.
print(f"Racing {NUM_COLLECTORS} gc.collect() thread(s) against "
f"{NUM_READERS} gc.get_stats() thread(s), {ITERATIONS} iterations each...")
_stress_get_stats_race()
print("Done (no Python-level crash). "
"Run under a free-threading + TSAN build to observe the data race.")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a data race in free-threading builds between :func:`gc.get_stats` and a
concurrent garbage collection cycle. Access to the per-generation statistics
is now serialized with a mutex so the reader observes a consistent snapshot.
8 changes: 8 additions & 0 deletions Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,17 @@ gc_get_stats_impl(PyObject *module)
/* To get consistent values despite allocations while constructing
the result list, we use a snapshot of the running stats. */
GCState *gcstate = get_gc_state();
#ifdef Py_GIL_DISABLED
/* Hold stats_mutex so the snapshot is consistent with a concurrent
collector updating the same struct (gh-151646). */
PyMutex_Lock(&gcstate->stats_mutex);
#endif
stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index];
stats[1] = gcstate->generation_stats->old[0].items[gcstate->generation_stats->old[0].index];
stats[2] = gcstate->generation_stats->old[1].items[gcstate->generation_stats->old[1].index];
#ifdef Py_GIL_DISABLED
PyMutex_Unlock(&gcstate->stats_mutex);
#endif

PyObject *result = PyList_New(0);
if (result == NULL)
Expand Down
7 changes: 6 additions & 1 deletion Python/gc_free_threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -2281,7 +2281,11 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
}
}

/* Update stats */
/* Update stats. Hold stats_mutex so a concurrent gc.get_stats() reader
sees a consistent snapshot rather than racing on these fields
(gh-151646). get_stats() reads buffer->index inside the lock so that
both writer and reader resolve the same slot under the same mutex. */
PyMutex_Lock(&gcstate->stats_mutex);
struct gc_generation_stats *stats = get_stats(gcstate, generation);
stats->ts_start = start;
stats->ts_stop = stop;
Expand All @@ -2290,6 +2294,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
stats->uncollectable += n;
stats->duration += duration;
stats->candidates += state.candidates;
PyMutex_Unlock(&gcstate->stats_mutex);

GC_STAT_ADD(generation, objects_collected, m);
#ifdef Py_STATS
Expand Down
Loading