Skip to content

Commit a548b24

Browse files
committed
gh-151613: Fix remote debugging frame cache ABA
Fixes #151613. The remote debugging frame cache previously used only the last_profiled_frame address as its cache anchor. If a frame returned and a later frame reused the same _PyInterpreterFrame address, the profiler could accept a stale cache entry and splice parent frames from a different call chain into the current stack. This adds a last_profiled_frame_seq counter next to last_profiled_frame, increments it when the anchor advances, stores it in frame cache entries, and validates cache hits against both the frame address and the sequence. Cache miss walks now copy stack chunks before storing new cache entries so stored continuations come from a stable snapshot. The new regression test exercises alternating call chains and checks that cached stacks never contain frames from both branches.
1 parent a8d74c0 commit a548b24

12 files changed

Lines changed: 218 additions & 14 deletions

Include/cpython/pystate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct _ts {
143143
struct _PyInterpreterFrame *base_frame;
144144

145145
struct _PyInterpreterFrame *last_profiled_frame;
146+
uintptr_t last_profiled_frame_seq;
146147

147148
Py_tracefunc c_profilefunc;
148149
Py_tracefunc c_tracefunc;

Include/internal/pycore_debug_offsets.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ typedef struct _Py_DebugOffsets {
104104
uint64_t current_frame;
105105
uint64_t base_frame;
106106
uint64_t last_profiled_frame;
107+
uint64_t last_profiled_frame_seq;
107108
uint64_t thread_id;
108109
uint64_t native_thread_id;
109110
uint64_t datastack_chunk;
@@ -294,6 +295,7 @@ typedef struct _Py_DebugOffsets {
294295
.current_frame = offsetof(PyThreadState, current_frame), \
295296
.base_frame = offsetof(PyThreadState, base_frame), \
296297
.last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
298+
.last_profiled_frame_seq = offsetof(PyThreadState, last_profiled_frame_seq), \
297299
.thread_id = offsetof(PyThreadState, thread_id), \
298300
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
299301
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \

Include/internal/pycore_interpframe.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,12 +292,15 @@ _PyThreadState_GetFrame(PyThreadState *tstate)
292292
// This avoids corrupting the cache when transient frames (called and returned
293293
// between profiler samples) update last_profiled_frame to addresses the
294294
// profiler never saw.
295+
// The sequence distinguishes this anchor from a later frame that reuses the
296+
// same _PyInterpreterFrame address.
295297
#define _PyThreadState_UpdateLastProfiledFrame(tstate, frame, previous) \
296298
do { \
297299
PyThreadState *tstate_ = (tstate); \
298300
_PyInterpreterFrame *frame_ = (frame); \
299301
if (tstate_->last_profiled_frame == frame_) { \
300302
tstate_->last_profiled_frame = (previous); \
303+
tstate_->last_profiled_frame_seq++; \
301304
} \
302305
} while (0)
303306

Lib/test/test_external_inspection.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3451,6 +3451,88 @@ def level1():
34513451
# Parent frames: must match exactly
34523452
self.assertEqual(loc_cached, loc_no_cache)
34533453

3454+
@skip_if_not_supported
3455+
@unittest.skipIf(
3456+
sys.platform != "linux" or not PROCESS_VM_READV_SUPPORTED,
3457+
"Test only runs on Linux with process_vm_readv support",
3458+
)
3459+
def test_cache_rejects_reused_frame_address_aba(self):
3460+
"""Test that frame address reuse cannot splice cached parent frames."""
3461+
script_body = """\
3462+
sock.sendall(b"ready")
3463+
3464+
def burn_a():
3465+
total = 0
3466+
for i in range(20000):
3467+
total += i
3468+
return total
3469+
3470+
def burn_b():
3471+
total = 0
3472+
for i in range(20000):
3473+
total += i
3474+
return total
3475+
3476+
def a_leaf():
3477+
return burn_a()
3478+
3479+
def b_leaf():
3480+
return burn_b()
3481+
3482+
def a_parent():
3483+
return a_leaf()
3484+
3485+
def b_parent():
3486+
return b_leaf()
3487+
3488+
while True:
3489+
a_parent()
3490+
b_parent()
3491+
"""
3492+
3493+
with self._target_process(script_body) as (p, client_socket, _):
3494+
_wait_for_signal(client_socket, b"ready")
3495+
unwinder = RemoteUnwinder(
3496+
p.pid, all_threads=True, cache_frames=True, stats=True
3497+
)
3498+
bad_stacks = []
3499+
seen = 0
3500+
branch_a = {"a_parent", "a_leaf", "burn_a"}
3501+
branch_b = {"b_parent", "b_leaf", "burn_b"}
3502+
3503+
for _ in range(8000):
3504+
with contextlib.suppress(*TRANSIENT_ERRORS):
3505+
traces = unwinder.get_stack_trace()
3506+
for interp in traces:
3507+
for thread in interp.threads:
3508+
in_a = False
3509+
in_b = False
3510+
for frame in thread.frame_info:
3511+
name = frame.funcname
3512+
if name in branch_a:
3513+
in_a = True
3514+
elif name in branch_b:
3515+
in_b = True
3516+
if in_a and in_b:
3517+
break
3518+
if not (in_a or in_b):
3519+
continue
3520+
seen += 1
3521+
if in_a and in_b:
3522+
funcs = [f.funcname for f in thread.frame_info]
3523+
bad_stacks.append(funcs)
3524+
if len(bad_stacks) >= 3:
3525+
break
3526+
3527+
stats = unwinder.get_stats()
3528+
3529+
self.assertGreater(seen, 0)
3530+
self.assertGreater(
3531+
stats["frame_cache_hits"] + stats["frame_cache_partial_hits"], 0
3532+
)
3533+
self.assertGreater(stats["frames_read_from_cache"], 0)
3534+
self.assertEqual(bad_stacks, [])
3535+
34543536
@skip_if_not_supported
34553537
@unittest.skipIf(
34563538
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Fix skewed stack trackes in the Tachyon profiler when caching is enabled and
1+
Fix skewed stack traces in the Tachyon profiler when caching is enabled and
22
when generators and coroutines are profiled, by updating
33
``tstate->last_profiled_frame`` at every frame-removal site. The issue resulted
44
in total erasure of some callers. Patch by Maurycy Pawłowski-Wieroński.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix another way the Tachyon profiler frame cache could produce impossible
2+
mixed stack traces when ``_PyInterpreterFrame`` addresses are reused, by
3+
validating cached frame anchors with a sequence counter.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ typedef struct {
228228
typedef struct {
229229
uint64_t thread_id; // 0 = empty slot
230230
uintptr_t thread_state_addr;
231+
uintptr_t last_profiled_frame_seq;
231232
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
232233
Py_ssize_t num_addrs;
233234
PyObject *thread_id_obj; // owned reference, NULL if empty
@@ -435,6 +436,7 @@ typedef struct {
435436
uintptr_t base_frame_addr; // Sentinel at bottom (for validation)
436437
uintptr_t gc_frame; // GC frame address (0 if not tracking)
437438
uintptr_t last_profiled_frame; // Last cached frame (0 if no cache)
439+
uintptr_t last_profiled_frame_seq;
438440
StackChunkList *chunks; // Pre-copied stack chunks
439441
int skip_first_frame; // Skip frame_addr itself (continue from its caller)
440442
RemoteReadPrefetch prefetch; // Optional already-read thread/frame buffers
@@ -626,11 +628,18 @@ extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObjec
626628
extern int frame_cache_lookup_and_extend(
627629
RemoteUnwinderObject *unwinder,
628630
uint64_t thread_id,
631+
uintptr_t thread_state_addr,
629632
uintptr_t last_profiled_frame,
633+
uintptr_t last_profiled_frame_seq,
630634
PyObject *frame_info,
631635
uintptr_t *frame_addrs,
632636
Py_ssize_t *num_addrs,
633637
Py_ssize_t max_addrs);
638+
extern int frame_cache_anchor_matches(
639+
RemoteUnwinderObject *unwinder,
640+
uintptr_t thread_state_addr,
641+
uintptr_t last_profiled_frame,
642+
uintptr_t last_profiled_frame_seq);
634643
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
635644
// Only stores complete stacks that reach base_frame_addr
636645
extern int frame_cache_store(
@@ -640,6 +649,7 @@ extern int frame_cache_store(
640649
const uintptr_t *addrs,
641650
Py_ssize_t num_addrs,
642651
uintptr_t thread_state_addr,
652+
uintptr_t last_profiled_frame_seq,
643653
uintptr_t base_frame_addr,
644654
uintptr_t last_frame_visited);
645655

Modules/_remote_debugging/debug_offsets_validation.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#define FIELD_SIZE(type, member) sizeof(((type *)0)->member)
3232

3333
enum {
34-
PY_REMOTE_DEBUG_OFFSETS_TOTAL_SIZE = 880,
34+
PY_REMOTE_DEBUG_OFFSETS_TOTAL_SIZE = 888,
3535
PY_REMOTE_ASYNC_DEBUG_OFFSETS_TOTAL_SIZE = 104,
3636
};
3737

@@ -261,7 +261,8 @@ validate_fixed_field(
261261
APPLY(thread_state, next, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
262262
APPLY(thread_state, current_frame, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
263263
APPLY(thread_state, base_frame, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
264-
APPLY(thread_state, last_profiled_frame, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size)
264+
APPLY(thread_state, last_profiled_frame, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
265+
APPLY(thread_state, last_profiled_frame_seq, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size)
265266

266267
#define PY_REMOTE_DEBUG_INTERPRETER_STATE_FIELDS(APPLY, buffer_size) \
267268
APPLY(interpreter_state, id, sizeof(int64_t), _Alignof(int64_t), buffer_size); \

Modules/_remote_debugging/frame_cache.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,59 @@ frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result)
147147
Py_CLEAR(unwinder->frame_cache[i].frame_list);
148148
unwinder->frame_cache[i].thread_id = 0;
149149
unwinder->frame_cache[i].thread_state_addr = 0;
150+
unwinder->frame_cache[i].last_profiled_frame_seq = 0;
150151
unwinder->frame_cache[i].num_addrs = 0;
151152
STATS_INC(unwinder, stale_cache_invalidations);
152153
}
153154
}
154155
}
155156

157+
int
158+
frame_cache_anchor_matches(
159+
RemoteUnwinderObject *unwinder,
160+
uintptr_t thread_state_addr,
161+
uintptr_t last_profiled_frame,
162+
uintptr_t last_profiled_frame_seq)
163+
{
164+
uintptr_t live_frame = 0;
165+
uintptr_t live_seq = 0;
166+
uintptr_t frame_offset = (uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame;
167+
uintptr_t seq_offset = (uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame_seq;
168+
169+
if (seq_offset == frame_offset + sizeof(uintptr_t)) {
170+
uintptr_t live_anchor[2];
171+
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle,
172+
thread_state_addr + frame_offset,
173+
sizeof(live_anchor),
174+
live_anchor) < 0) {
175+
PyErr_Clear();
176+
return 0;
177+
}
178+
live_frame = live_anchor[0];
179+
live_seq = live_anchor[1];
180+
}
181+
else {
182+
if (read_ptr(unwinder, thread_state_addr + frame_offset, &live_frame) < 0) {
183+
PyErr_Clear();
184+
return 0;
185+
}
186+
if (read_ptr(unwinder, thread_state_addr + seq_offset, &live_seq) < 0) {
187+
PyErr_Clear();
188+
return 0;
189+
}
190+
}
191+
return live_frame == last_profiled_frame && live_seq == last_profiled_frame_seq;
192+
}
193+
156194
// Find last_profiled_frame in cache and extend frame_info with cached continuation
157195
// If frame_addrs is provided (not NULL), also extends it with cached addresses
158196
int
159197
frame_cache_lookup_and_extend(
160198
RemoteUnwinderObject *unwinder,
161199
uint64_t thread_id,
200+
uintptr_t thread_state_addr,
162201
uintptr_t last_profiled_frame,
202+
uintptr_t last_profiled_frame_seq,
163203
PyObject *frame_info,
164204
uintptr_t *frame_addrs,
165205
Py_ssize_t *num_addrs,
@@ -173,6 +213,9 @@ frame_cache_lookup_and_extend(
173213
if (!entry || !entry->frame_list) {
174214
return 0;
175215
}
216+
if (entry->thread_state_addr != thread_state_addr) {
217+
return 0;
218+
}
176219

177220
assert(entry->num_addrs >= 0 && entry->num_addrs <= FRAME_CACHE_MAX_FRAMES);
178221

@@ -189,8 +232,24 @@ frame_cache_lookup_and_extend(
189232
return 0; // Not found
190233
}
191234
assert(start_idx < entry->num_addrs);
235+
// Synthetic marker frames (<native>/<GC>) are stored as addr-0 entries but
236+
// never increment last_profiled_frame_seq in the target (only real frame
237+
// pops do). Count the real frames before start_idx so the sequence check is
238+
// not thrown off by markers sitting between the leaf and the anchor.
239+
uintptr_t real_pops = 0;
240+
for (Py_ssize_t i = 0; i < start_idx; i++) {
241+
if (entry->addrs[i] != 0) {
242+
real_pops++;
243+
}
244+
}
245+
if (entry->last_profiled_frame_seq + real_pops != last_profiled_frame_seq) {
246+
return 0;
247+
}
192248

193249
Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
250+
if (start_idx >= num_frames) {
251+
return 0;
252+
}
194253

195254
// Extend frame_info with frames ABOVE start_idx (not including it).
196255
// The frame at start_idx (last_profiled_frame) was the executing frame
@@ -200,6 +259,11 @@ frame_cache_lookup_and_extend(
200259
if (cache_start >= num_frames) {
201260
return 0; // Nothing above last_profiled_frame to extend with
202261
}
262+
if (!frame_cache_anchor_matches(unwinder, thread_state_addr,
263+
last_profiled_frame,
264+
last_profiled_frame_seq)) {
265+
return 0;
266+
}
203267

204268
PyObject *slice = PyList_GetSlice(entry->frame_list, cache_start, num_frames);
205269
if (!slice) {
@@ -235,6 +299,7 @@ frame_cache_store(
235299
const uintptr_t *addrs,
236300
Py_ssize_t num_addrs,
237301
uintptr_t thread_state_addr,
302+
uintptr_t last_profiled_frame_seq,
238303
uintptr_t base_frame_addr,
239304
uintptr_t last_frame_visited)
240305
{
@@ -277,6 +342,7 @@ frame_cache_store(
277342
}
278343
entry->thread_id = thread_id;
279344
entry->thread_state_addr = thread_state_addr;
345+
entry->last_profiled_frame_seq = last_profiled_frame_seq;
280346
if (entry->thread_id_obj == NULL) {
281347
entry->thread_id_obj = PyLong_FromUnsignedLongLong(thread_id);
282348
if (entry->thread_id_obj == NULL) {

0 commit comments

Comments
 (0)