Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
gh-138122: Validate base frame before caching in remote debugging fra…
…me cache

The frame cache in the remote debugging module was storing frame chains
without validating that they reached the base frame. This could happen
when a frame chain was interrupted or when the process state changed
during reading, resulting in incomplete stacks being cached. Subsequent
samples that hit the cache would then produce flamegraphs that didn't
reach the bottom of the call stack.

The fix passes base_frame_addr through to process_frame_chain() which
already has validation logic to ensure the frame walk terminates at the
expected sentinel frame. By enabling this validation in the caching code
path and tracking whether we've confirmed reaching the base frame, we
now only store complete frame chains in the cache. When extending from
cached data, we trust that the cached frames were already validated at
storage time, maintaining the invariant that cached stacks are always
complete.

An integration test using deeply nested generators that oscillate the
stack depth is added to verify that all sampled stacks contain the entry
point function. This catches regressions where incomplete stacks might
be cached and returned.
  • Loading branch information
pablogsal committed Dec 17, 2025
commit 2cd7686564014dde232851e88c3f9370cf5d4e7a
Original file line number Diff line number Diff line change
Expand Up @@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
self.assertGreater(cpu_percentage, 90.0,
f"cpu_leaf should dominate samples in 'running' mode, "
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")


def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
"""Generate a script with deep nested generators for stress testing."""
lines = [
'import sys',
'sys.setrecursionlimit(5000)',
'',
]
# Generate chain of yield-from functions
for i in range(chain_depth - 1):
lines.extend([
f'def deep_yield_chain_{i}(n):',
f' yield ("L{i}", n)',
f' yield from deep_yield_chain_{i + 1}(n)',
'',
])
# Last chain function calls recursive_diver
lines.extend([
f'def deep_yield_chain_{chain_depth - 1}(n):',
f' yield ("L{chain_depth - 1}", n)',
f' yield from recursive_diver(n, {chain_depth})',
'',
'def recursive_diver(n, depth):',
' yield (f"DIVE_{depth}", n)',
f' if depth < {recurse_depth}:',
' yield from recursive_diver(n, depth + 1)',
' else:',
' for i in range(5):',
' yield (f"BOTTOM_{depth}", i)',
'',
'def oscillating_generator(iterations=1000):',
' for i in range(iterations):',
' yield ("OSCILLATE", i)',
' yield from deep_yield_chain_0(i)',
'',
'def run_forever():',
' while True:',
' for _ in oscillating_generator(10):',
' pass',
'',
'_test_sock.sendall(b"working")',
'run_forever()',
])
return '\n'.join(lines)


@requires_remote_subprocess_debugging()
class TestDeepGeneratorFrameCache(unittest.TestCase):
"""Test frame cache consistency with deep oscillating generator stacks."""

def test_all_stacks_share_same_base_frame(self):
"""Verify all sampled stacks reach the entry point function.

When profiling deep generators that oscillate up and down the call
stack, every sample should include the entry point function
(run_forever) in its call chain. If the frame cache stores
incomplete stacks, some samples will be missing this base function,
causing broken flamegraphs.
"""
script = _generate_deep_generators_script()
with test_subprocess(script, wait_for_working=True) as subproc:
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)

with (
io.StringIO() as captured_output,
mock.patch("sys.stdout", captured_output),
):
profiling.sampling.sample.sample(
subproc.process.pid,
collector,
duration_sec=2,
)

samples_with_entry_point = 0
samples_without_entry_point = 0
total_samples = 0

for (call_tree, _thread_id), count in collector.stack_counter.items():
total_samples += count
if call_tree:
has_entry_point = call_tree and call_tree[0][2] == "<module>"
if has_entry_point:
samples_with_entry_point += count
else:
samples_without_entry_point += count
Comment thread
pablogsal marked this conversation as resolved.

self.assertGreater(total_samples, 100,
f"Expected at least 100 samples, got {total_samples}")

self.assertEqual(samples_without_entry_point, 0,
f"Found {samples_without_entry_point}/{total_samples} samples "
f"missing the entry point function 'run_forever'. This indicates "
f"incomplete stacks are being returned, likely due to frame cache "
f"storing partial stack traces.")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix incomplete stack traces in the Tachyon profiler's frame cache when
profiling code with deeply nested generators. The frame cache now validates
that stack traces reach the base frame before caching, preventing broken
flamegraphs. Patch by Pablo Galindo.
9 changes: 7 additions & 2 deletions Modules/_remote_debugging/_remote_debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,8 @@ extern int process_frame_chain(
int *stopped_at_cached_frame,
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs
Py_ssize_t max_addrs,
uintptr_t *out_last_frame_addr
);

/* Frame cache functions */
Expand All @@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs);
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
// Only stores complete stacks that reach base_frame_addr
extern int frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
Py_ssize_t num_addrs);
Py_ssize_t num_addrs,
uintptr_t base_frame_addr,
uintptr_t last_frame_visited);

extern int collect_frames_with_cache(
RemoteUnwinderObject *unwinder,
uintptr_t frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
uintptr_t base_frame_addr,
uintptr_t gc_frame,
uintptr_t last_profiled_frame,
uint64_t thread_id);
Expand Down
16 changes: 15 additions & 1 deletion Modules/_remote_debugging/frame_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,19 +194,33 @@ frame_cache_lookup_and_extend(
}

// Store frame list with addresses in cache
// Only stores complete stacks that reach base_frame_addr (validation done internally)
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
int
frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
Py_ssize_t num_addrs)
Py_ssize_t num_addrs,
uintptr_t base_frame_addr,
uintptr_t last_frame_visited)
{
if (!unwinder->frame_cache || thread_id == 0) {
return 0;
}

// Validate we have a complete stack before caching.
// Only cache if last_frame_visited matches base_frame_addr (the sentinel
// at the bottom of the stack). Note: we use last_frame_visited rather than
// addrs[num_addrs-1] because the base frame is visited but not added to the
// addrs array (it returns frame==NULL from is_frame_valid due to
// owner==FRAME_OWNED_BY_INTERPRETER).
if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
// Incomplete stack - don't cache (graceful degradation)
return 0;
}

// Clamp to max frames
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
num_addrs = FRAME_CACHE_MAX_FRAMES;
Expand Down
41 changes: 29 additions & 12 deletions Modules/_remote_debugging/frames.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,18 +265,22 @@ process_frame_chain(
int *stopped_at_cached_frame,
uintptr_t *frame_addrs, // optional: C array to receive frame addresses
Py_ssize_t *num_addrs, // in/out: current count / updated count
Py_ssize_t max_addrs) // max capacity of frame_addrs array
Py_ssize_t max_addrs, // max capacity of frame_addrs array
uintptr_t *out_last_frame_addr) // optional: receives last frame address visited
{
uintptr_t frame_addr = initial_frame_addr;
uintptr_t prev_frame_addr = 0;
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
const size_t MAX_FRAMES = 1024 + 512;
size_t frame_count = 0;

// Initialize output flag
// Initialize output parameters
if (stopped_at_cached_frame) {
*stopped_at_cached_frame = 0;
}
if (out_last_frame_addr) {
*out_last_frame_addr = 0;
}

// Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
Expand Down Expand Up @@ -390,6 +394,11 @@ process_frame_chain(
return -1;
}

// Set output parameter for caller (needed for cache validation)
if (out_last_frame_addr) {
*out_last_frame_addr = last_frame_addr;
}

return 0;
}

Expand Down Expand Up @@ -537,6 +546,7 @@ collect_frames_with_cache(
uintptr_t frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
uintptr_t base_frame_addr,
uintptr_t gc_frame,
uintptr_t last_profiled_frame,
uint64_t thread_id)
Expand All @@ -551,11 +561,13 @@ collect_frames_with_cache(
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
Py_ssize_t num_addrs = 0;
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
uintptr_t last_frame_visited = 0;

int stopped_at_cached = 0;
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
last_profiled_frame, &stopped_at_cached,
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
&last_frame_visited) < 0) {
return -1;
}

Expand All @@ -575,23 +587,28 @@ collect_frames_with_cache(
// Cache miss - continue walking from last_profiled_frame to get the rest
STATS_INC(unwinder, frame_cache_misses);
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
&last_frame_visited) < 0) {
return -1;
}
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
} else {
// Partial cache hit
// Partial cache hit - cache was validated when stored, so we trust it
STATS_INC(unwinder, frame_cache_partial_hits);
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
}
} else if (last_profiled_frame == 0) {
// No cache involvement (no last_profiled_frame or cache disabled)
STATS_INC(unwinder, frame_cache_misses);
} else {
if (last_profiled_frame == 0) {
// No cache involvement (no last_profiled_frame or cache disabled)
STATS_INC(unwinder, frame_cache_misses);
}
}

// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
// Store in cache - frame_cache_store validates internally that we have a
// complete stack (reached base_frame_addr) before actually storing
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
base_frame_addr, last_frame_visited) < 0) {
return -1;
}

Expand Down
4 changes: 2 additions & 2 deletions Modules/_remote_debugging/threads.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ unwind_stack_for_thread(
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
unwinder->debug_offsets.thread_state.last_profiled_frame);
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
gc_frame, last_profiled_frame, tid) < 0) {
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
goto error;
}
Expand All @@ -444,7 +444,7 @@ unwind_stack_for_thread(
} else {
// No caching - process entire frame chain with base_frame validation
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
goto error;
}
Expand Down
Loading