Skip to content

Commit eef25ad

Browse files
committed
FTP: Add exhaustive phase timing instrumentation
Add timing for all intermediate GC phases to diagnose the 50-80ms gap between timed phases and total gc.collect() time. New timing fields: - scan_heap_ns: parallel scan_heap phase - disable_deferred_ns: disable deferred refcounting loop - find_weakrefs_ns: find_weakref_callbacks - stw1_ns: StartTheWorld python#1 - objs_decref_ns: cleanup_worklist(objs_to_decref) - weakref_callbacks_ns: call_weakref_callbacks - finalize_ns: finalize_garbage - stw2_ns: StopTheWorld python#2 - resurrection_ns: handle_resurrected_objects - freelists_ns: _PyGC_ClearAllFreeLists - clear_weakrefs_ns: clear_weakrefs - stw3_ns: StartTheWorld python#3 All values exposed via gc.get_parallel_stats()['phase_timing'].
1 parent 548215d commit eef25ad

3 files changed

Lines changed: 158 additions & 39 deletions

File tree

Include/internal/pycore_interp_structs.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,18 @@ struct _gc_runtime_state {
277277
int64_t phase_start_ns; /* Start of parallel GC */
278278
int64_t update_refs_end_ns; /* End of update_refs phase */
279279
int64_t mark_heap_end_ns; /* End of mark_heap phase */
280+
int64_t scan_heap_end_ns; /* End of scan_heap phase */
281+
int64_t disable_deferred_end_ns; /* End of disable_deferred_refcounting loop */
282+
int64_t find_weakrefs_end_ns; /* End of find_weakref_callbacks */
283+
int64_t stw1_end_ns; /* After StartTheWorld #1 */
284+
int64_t objs_decref_end_ns; /* After cleanup_worklist(objs_to_decref) */
285+
int64_t weakref_callbacks_end_ns; /* After call_weakref_callbacks */
286+
int64_t finalize_end_ns; /* After finalize_garbage */
287+
int64_t stw2_end_ns; /* After StopTheWorld #2 */
288+
int64_t resurrection_end_ns; /* After handle_resurrected_objects */
289+
int64_t freelists_end_ns; /* After _PyGC_ClearAllFreeLists */
290+
int64_t clear_weakrefs_end_ns; /* After clear_weakrefs */
291+
int64_t stw3_end_ns; /* After StartTheWorld #3 */
280292
int64_t cleanup_start_ns; /* Start of cleanup phase (actual tp_clear work) */
281293
int64_t cleanup_end_ns; /* End of cleanup phase */
282294
#endif

Python/gc_free_threading.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,6 +1714,11 @@ deduce_unreachable_heap(PyInterpreterState *interp,
17141714
return -1;
17151715
}
17161716

1717+
// Record scan_heap end time
1718+
PyTime_t scan_end;
1719+
(void)PyTime_PerfCounterRaw(&scan_end);
1720+
interp->gc.scan_heap_end_ns = scan_end;
1721+
17171722
// Disable deferred refcounting serially for unreachable objects
17181723
// (this uses internal locks that aren't safe for parallel access).
17191724
// Use the worklist-safe version that preserves ob_tid (worklist link).
@@ -1731,6 +1736,11 @@ deduce_unreachable_heap(PyInterpreterState *interp,
17311736
op = next;
17321737
}
17331738

1739+
// Record disable_deferred end time
1740+
PyTime_t deferred_end;
1741+
(void)PyTime_PerfCounterRaw(&deferred_end);
1742+
interp->gc.disable_deferred_end_ns = deferred_end;
1743+
17341744
// Also scan the abandoned pool (serial, since it's typically small)
17351745
Py_ssize_t offset_base = 0;
17361746
if (_PyMem_DebugEnabled()) {
@@ -2596,26 +2606,82 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
25962606

25972607
// Find weakref callbacks we will honor (but do not call them).
25982608
find_weakref_callbacks(state);
2609+
2610+
// Record find_weakrefs end time
2611+
PyTime_t find_weakrefs_end;
2612+
(void)PyTime_PerfCounterRaw(&find_weakrefs_end);
2613+
interp->gc.find_weakrefs_end_ns = find_weakrefs_end;
2614+
25992615
_PyEval_StartTheWorld(interp);
26002616

2617+
// Record STW #1 end time
2618+
PyTime_t stw1_end;
2619+
(void)PyTime_PerfCounterRaw(&stw1_end);
2620+
interp->gc.stw1_end_ns = stw1_end;
2621+
26012622
// Deallocate any object from the refcount merge step
26022623
cleanup_worklist(&state->objs_to_decref);
26032624

2625+
// Record objs_decref end time
2626+
PyTime_t objs_decref_end;
2627+
(void)PyTime_PerfCounterRaw(&objs_decref_end);
2628+
interp->gc.objs_decref_end_ns = objs_decref_end;
2629+
26042630
// Call weakref callbacks and finalizers after unpausing other threads to
26052631
// avoid potential deadlocks.
26062632
call_weakref_callbacks(state);
2633+
2634+
// Record weakref_callbacks end time
2635+
PyTime_t weakref_callbacks_end;
2636+
(void)PyTime_PerfCounterRaw(&weakref_callbacks_end);
2637+
interp->gc.weakref_callbacks_end_ns = weakref_callbacks_end;
2638+
26072639
finalize_garbage(state);
26082640

2641+
// Record finalize end time
2642+
PyTime_t finalize_end;
2643+
(void)PyTime_PerfCounterRaw(&finalize_end);
2644+
interp->gc.finalize_end_ns = finalize_end;
2645+
26092646
_PyEval_StopTheWorld(interp);
2647+
2648+
// Record STW #2 end time
2649+
PyTime_t stw2_end;
2650+
(void)PyTime_PerfCounterRaw(&stw2_end);
2651+
interp->gc.stw2_end_ns = stw2_end;
2652+
26102653
// Handle any objects that may have resurrected after the finalization.
26112654
err = handle_resurrected_objects(state);
2655+
2656+
// Record resurrection end time
2657+
PyTime_t resurrection_end;
2658+
(void)PyTime_PerfCounterRaw(&resurrection_end);
2659+
interp->gc.resurrection_end_ns = resurrection_end;
2660+
26122661
// Clear free lists in all threads
26132662
_PyGC_ClearAllFreeLists(interp);
2663+
2664+
// Record freelists end time
2665+
PyTime_t freelists_end;
2666+
(void)PyTime_PerfCounterRaw(&freelists_end);
2667+
interp->gc.freelists_end_ns = freelists_end;
2668+
26142669
if (err == 0) {
26152670
clear_weakrefs(state);
26162671
}
2672+
2673+
// Record clear_weakrefs end time
2674+
PyTime_t clear_weakrefs_end;
2675+
(void)PyTime_PerfCounterRaw(&clear_weakrefs_end);
2676+
interp->gc.clear_weakrefs_end_ns = clear_weakrefs_end;
2677+
26172678
_PyEval_StartTheWorld(interp);
26182679

2680+
// Record STW #3 end time
2681+
PyTime_t stw3_end;
2682+
(void)PyTime_PerfCounterRaw(&stw3_end);
2683+
interp->gc.stw3_end_ns = stw3_end;
2684+
26192685
if (err < 0) {
26202686
cleanup_worklist(&state->unreachable);
26212687
cleanup_worklist(&state->legacy_finalizers);

Python/gc_free_threading_parallel.c

Lines changed: 80 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3631,18 +3631,79 @@ _PyGC_FTParallelGetStats(PyInterpreterState *interp)
36313631
return NULL;
36323632
}
36333633

3634+
// Helper macro to add a timing value to the dict
3635+
#define ADD_TIMING(name, value) do { \
3636+
PyObject *obj = PyLong_FromLongLong(value); \
3637+
if (obj == NULL || PyDict_SetItemString(phase_timing, name, obj) < 0) { \
3638+
Py_XDECREF(obj); \
3639+
Py_DECREF(phase_timing); \
3640+
Py_DECREF(result); \
3641+
return NULL; \
3642+
} \
3643+
Py_DECREF(obj); \
3644+
} while(0)
3645+
36343646
// Calculate phase durations from recorded timestamps
36353647
int64_t update_refs_ns = 0;
36363648
int64_t mark_heap_ns = 0;
3649+
int64_t scan_heap_ns = 0;
3650+
int64_t disable_deferred_ns = 0;
3651+
int64_t find_weakrefs_ns = 0;
3652+
int64_t stw1_ns = 0;
3653+
int64_t objs_decref_ns = 0;
3654+
int64_t weakref_callbacks_ns = 0;
3655+
int64_t finalize_ns = 0;
3656+
int64_t stw2_ns = 0;
3657+
int64_t resurrection_ns = 0;
3658+
int64_t freelists_ns = 0;
3659+
int64_t clear_weakrefs_ns = 0;
3660+
int64_t stw3_ns = 0;
36373661
int64_t cleanup_ns = 0;
36383662
int64_t total_ns = 0;
36393663

3664+
// Calculate durations between timestamps
36403665
if (interp->gc.phase_start_ns > 0 && interp->gc.update_refs_end_ns > 0) {
36413666
update_refs_ns = interp->gc.update_refs_end_ns - interp->gc.phase_start_ns;
36423667
}
36433668
if (interp->gc.update_refs_end_ns > 0 && interp->gc.mark_heap_end_ns > 0) {
36443669
mark_heap_ns = interp->gc.mark_heap_end_ns - interp->gc.update_refs_end_ns;
36453670
}
3671+
if (interp->gc.mark_heap_end_ns > 0 && interp->gc.scan_heap_end_ns > 0) {
3672+
scan_heap_ns = interp->gc.scan_heap_end_ns - interp->gc.mark_heap_end_ns;
3673+
}
3674+
if (interp->gc.scan_heap_end_ns > 0 && interp->gc.disable_deferred_end_ns > 0) {
3675+
disable_deferred_ns = interp->gc.disable_deferred_end_ns - interp->gc.scan_heap_end_ns;
3676+
}
3677+
if (interp->gc.disable_deferred_end_ns > 0 && interp->gc.find_weakrefs_end_ns > 0) {
3678+
find_weakrefs_ns = interp->gc.find_weakrefs_end_ns - interp->gc.disable_deferred_end_ns;
3679+
}
3680+
if (interp->gc.find_weakrefs_end_ns > 0 && interp->gc.stw1_end_ns > 0) {
3681+
stw1_ns = interp->gc.stw1_end_ns - interp->gc.find_weakrefs_end_ns;
3682+
}
3683+
if (interp->gc.stw1_end_ns > 0 && interp->gc.objs_decref_end_ns > 0) {
3684+
objs_decref_ns = interp->gc.objs_decref_end_ns - interp->gc.stw1_end_ns;
3685+
}
3686+
if (interp->gc.objs_decref_end_ns > 0 && interp->gc.weakref_callbacks_end_ns > 0) {
3687+
weakref_callbacks_ns = interp->gc.weakref_callbacks_end_ns - interp->gc.objs_decref_end_ns;
3688+
}
3689+
if (interp->gc.weakref_callbacks_end_ns > 0 && interp->gc.finalize_end_ns > 0) {
3690+
finalize_ns = interp->gc.finalize_end_ns - interp->gc.weakref_callbacks_end_ns;
3691+
}
3692+
if (interp->gc.finalize_end_ns > 0 && interp->gc.stw2_end_ns > 0) {
3693+
stw2_ns = interp->gc.stw2_end_ns - interp->gc.finalize_end_ns;
3694+
}
3695+
if (interp->gc.stw2_end_ns > 0 && interp->gc.resurrection_end_ns > 0) {
3696+
resurrection_ns = interp->gc.resurrection_end_ns - interp->gc.stw2_end_ns;
3697+
}
3698+
if (interp->gc.resurrection_end_ns > 0 && interp->gc.freelists_end_ns > 0) {
3699+
freelists_ns = interp->gc.freelists_end_ns - interp->gc.resurrection_end_ns;
3700+
}
3701+
if (interp->gc.freelists_end_ns > 0 && interp->gc.clear_weakrefs_end_ns > 0) {
3702+
clear_weakrefs_ns = interp->gc.clear_weakrefs_end_ns - interp->gc.freelists_end_ns;
3703+
}
3704+
if (interp->gc.clear_weakrefs_end_ns > 0 && interp->gc.stw3_end_ns > 0) {
3705+
stw3_ns = interp->gc.stw3_end_ns - interp->gc.clear_weakrefs_end_ns;
3706+
}
36463707
if (interp->gc.cleanup_start_ns > 0 && interp->gc.cleanup_end_ns > 0) {
36473708
cleanup_ns = interp->gc.cleanup_end_ns - interp->gc.cleanup_start_ns;
36483709
}
@@ -3654,45 +3715,25 @@ _PyGC_FTParallelGetStats(PyInterpreterState *interp)
36543715
total_ns = interp->gc.mark_heap_end_ns - interp->gc.phase_start_ns;
36553716
}
36563717

3657-
PyObject *update_refs_obj = PyLong_FromLongLong(update_refs_ns);
3658-
if (update_refs_obj == NULL ||
3659-
PyDict_SetItemString(phase_timing, "update_refs_ns", update_refs_obj) < 0) {
3660-
Py_XDECREF(update_refs_obj);
3661-
Py_DECREF(phase_timing);
3662-
Py_DECREF(result);
3663-
return NULL;
3664-
}
3665-
Py_DECREF(update_refs_obj);
3666-
3667-
PyObject *mark_heap_obj = PyLong_FromLongLong(mark_heap_ns);
3668-
if (mark_heap_obj == NULL ||
3669-
PyDict_SetItemString(phase_timing, "mark_heap_ns", mark_heap_obj) < 0) {
3670-
Py_XDECREF(mark_heap_obj);
3671-
Py_DECREF(phase_timing);
3672-
Py_DECREF(result);
3673-
return NULL;
3674-
}
3675-
Py_DECREF(mark_heap_obj);
3676-
3677-
PyObject *cleanup_obj = PyLong_FromLongLong(cleanup_ns);
3678-
if (cleanup_obj == NULL ||
3679-
PyDict_SetItemString(phase_timing, "cleanup_ns", cleanup_obj) < 0) {
3680-
Py_XDECREF(cleanup_obj);
3681-
Py_DECREF(phase_timing);
3682-
Py_DECREF(result);
3683-
return NULL;
3684-
}
3685-
Py_DECREF(cleanup_obj);
3686-
3687-
PyObject *total_obj = PyLong_FromLongLong(total_ns);
3688-
if (total_obj == NULL ||
3689-
PyDict_SetItemString(phase_timing, "total_ns", total_obj) < 0) {
3690-
Py_XDECREF(total_obj);
3691-
Py_DECREF(phase_timing);
3692-
Py_DECREF(result);
3693-
return NULL;
3694-
}
3695-
Py_DECREF(total_obj);
3718+
// Add all timing values
3719+
ADD_TIMING("update_refs_ns", update_refs_ns);
3720+
ADD_TIMING("mark_heap_ns", mark_heap_ns);
3721+
ADD_TIMING("scan_heap_ns", scan_heap_ns);
3722+
ADD_TIMING("disable_deferred_ns", disable_deferred_ns);
3723+
ADD_TIMING("find_weakrefs_ns", find_weakrefs_ns);
3724+
ADD_TIMING("stw1_ns", stw1_ns);
3725+
ADD_TIMING("objs_decref_ns", objs_decref_ns);
3726+
ADD_TIMING("weakref_callbacks_ns", weakref_callbacks_ns);
3727+
ADD_TIMING("finalize_ns", finalize_ns);
3728+
ADD_TIMING("stw2_ns", stw2_ns);
3729+
ADD_TIMING("resurrection_ns", resurrection_ns);
3730+
ADD_TIMING("freelists_ns", freelists_ns);
3731+
ADD_TIMING("clear_weakrefs_ns", clear_weakrefs_ns);
3732+
ADD_TIMING("stw3_ns", stw3_ns);
3733+
ADD_TIMING("cleanup_ns", cleanup_ns);
3734+
ADD_TIMING("total_ns", total_ns);
3735+
3736+
#undef ADD_TIMING
36963737

36973738
if (PyDict_SetItemString(result, "phase_timing", phase_timing) < 0) {
36983739
Py_DECREF(phase_timing);

0 commit comments

Comments
 (0)