From 67e7b05b9503cac5d32351c80047b5743dcd322c Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Tue, 16 Jun 2026 16:48:39 +0800 Subject: [PATCH 1/2] gh-151535: Bound _remote_debugging asyncio awaited_by graph recursion --- Lib/test/test_external_inspection.py | 79 +++++++++++++++++++ ...-06-15-10-08-29.gh-issue-151535.CIumHj.rst | 3 + Modules/_remote_debugging/_remote_debugging.h | 1 + Modules/_remote_debugging/asyncio.c | 46 +++++++++-- 4 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-15-10-08-29.gh-issue-151535.CIumHj.rst diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 6b1529aa173f01c..67ad58d7f595cd6 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1164,6 +1164,85 @@ async def main(): finally: _cleanup_sockets(client_socket, server_socket) + @skip_if_not_supported + @unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", + ) + def test_async_deep_awaited_by_chain_is_bounded(self): + # A very deep awaited_by chain in the target (which a corrupted or + # concurrently-mutated remote process can also present as a cycle) must + # not drive unbounded C recursion in the debugger. get_async_stack_trace + # should raise instead of overflowing the stack. + depth = 2000 + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import asyncio + import socket + import time + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + + async def main(): + started = asyncio.Event() + + async def leaf(): + while not started.is_set(): + await asyncio.sleep(0) + end = time.time() + 10_000 + while time.time() < end: + pass + + leaf_t = asyncio.ensure_future(leaf()) + + async def waiter(child): + await child + + cur = leaf_t + tasks = [cur] + for _ in range({depth}): + cur = asyncio.ensure_future(waiter(cur)) + tasks.append(cur) + + for _ in range(5): + await asyncio.sleep(0) + + sock.sendall(b"ready") + started.set() + try: + await leaf_t + finally: + for t in tasks: + t.cancel() + + asyncio.run(main()) + """ + ) + + with os_helper.temp_dir() as work_dir: + script_dir = os.path.join(work_dir, "script_pkg") + os.mkdir(script_dir) + + server_socket = _create_server_socket(port) + script_name = _make_test_script(script_dir, "script", script) + client_socket = None + try: + with _managed_subprocess([sys.executable, script_name]) as p: + client_socket, _ = server_socket.accept() + server_socket.close() + server_socket = None + + _wait_for_signal(client_socket, b"ready") + + unwinder = RemoteUnwinder(p.pid) + with self.assertRaises(RuntimeError) as cm: + unwinder.get_async_stack_trace() + self.assertIn("too deep or cyclic", str(cm.exception)) + finally: + _cleanup_sockets(client_socket, server_socket) + @skip_if_not_supported @unittest.skipIf( sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, diff --git a/Misc/NEWS.d/next/Library/2026-06-15-10-08-29.gh-issue-151535.CIumHj.rst b/Misc/NEWS.d/next/Library/2026-06-15-10-08-29.gh-issue-151535.CIumHj.rst new file mode 100644 index 000000000000000..2b5af804f8229d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-15-10-08-29.gh-issue-151535.CIumHj.rst @@ -0,0 +1,3 @@ +:meth:`!RemoteUnwinder.get_async_stack_trace` no longer crashes when the +target process presents a deeply nested or concurrently-mutated ``awaited_by`` +graph; a :exc:`RuntimeError` is now raised instead. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 635e6e208902af5..3ed809769af931d 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -147,6 +147,7 @@ typedef enum _WIN32_THREADSTATE { #define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */ #define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */ #define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */ +#define MAX_TASK_AWAITED_BY_DEPTH 1000 /* Bound recursion over the awaited_by graph */ #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c index 44a9a3cbce0061a..93b4000b9824477 100644 --- a/Modules/_remote_debugging/asyncio.c +++ b/Modules/_remote_debugging/asyncio.c @@ -516,6 +516,17 @@ parse_task( // Forward declaration for mutual recursion static int process_waiter_task(RemoteUnwinderObject *unwinder, uintptr_t key_addr, void *context); +// Carries the recursion depth so a cyclic or corrupted remote awaited_by graph +// cannot drive unbounded C recursion and overflow the debugger's stack. +typedef struct { + PyObject *result; + int depth; +} waiter_context_t; + +static int process_task_and_waiters_impl( + RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result, + int depth); + // Processor function for parsing tasks in sets static int process_task_parser( @@ -658,11 +669,12 @@ process_single_task_node( return -1; } -int -process_task_and_waiters( +static int +process_task_and_waiters_impl( RemoteUnwinderObject *unwinder, uintptr_t task_addr, - PyObject *result + PyObject *result, + int depth ) { // First, add this task to the result if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { @@ -670,7 +682,17 @@ process_task_and_waiters( } // Now find all tasks that are waiting for this task and process them - return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result); + waiter_context_t ctx = {result, depth}; + return process_task_awaited_by(unwinder, task_addr, process_waiter_task, &ctx); +} + +int +process_task_and_waiters( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + return process_task_and_waiters_impl(unwinder, task_addr, result, 0); } // Processor function for task waiters @@ -680,8 +702,17 @@ process_waiter_task( uintptr_t key_addr, void *context ) { - PyObject *result = (PyObject *)context; - return process_task_and_waiters(unwinder, key_addr, result); + waiter_context_t *ctx = (waiter_context_t *)context; + if (ctx->depth >= MAX_TASK_AWAITED_BY_DEPTH) { + PyErr_SetString(PyExc_RuntimeError, + "Task awaited_by chain is too deep or cyclic " + "(corrupted remote memory)"); + set_exception_cause(unwinder, PyExc_RuntimeError, + "Task awaited_by recursion limit exceeded"); + return -1; + } + return process_task_and_waiters_impl(unwinder, key_addr, ctx->result, + ctx->depth + 1); } /* ============================================================================ @@ -978,7 +1009,8 @@ process_running_task_chain( } // Now find all tasks that are waiting for this task and process them - if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) { + waiter_context_t ctx = {result, 0}; + if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, &ctx) < 0) { return -1; } From dcd1db0c5d6e68257328faf04dca19d49913c454 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Tue, 16 Jun 2026 17:58:51 +0800 Subject: [PATCH 2/2] gh-151535: Walk awaited_by graph iteratively to avoid C stack overflow The depth-bounded recursion still overflowed the debugger's C stack on platforms with a small default thread stack (Windows uses 1 MiB): every level keeps a SIZEOF_TASK_OBJ buffer alive in process_task_awaited_by, so the MAX_TASK_AWAITED_BY_DEPTH limit of 1000 was only reached after several MiB of stack had already been consumed, and the process aborted with a stack overflow before the limit could fire. Walk the awaited_by graph with an explicit, heap-allocated work-stack instead of mutual recursion, so the C stack depth stays constant no matter how deep the graph is. The depth limit is retained as a cycle guard for corrupted or concurrently-mutated remote memory. Also make the regression test deterministic under load: signal readiness from the leaf task itself, immediately before it busy-spins, so the observer always inspects while the full chain is built and rooted at a running task. The previous handshake was sent before the leaf started running and could race, letting the observer see a shallow graph. --- Lib/test/test_external_inspection.py | 6 +- Modules/_remote_debugging/_remote_debugging.h | 2 +- Modules/_remote_debugging/asyncio.c | 130 ++++++++++++------ 3 files changed, 97 insertions(+), 41 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 67ad58d7f595cd6..c19bd745872a6fe 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1191,6 +1191,11 @@ async def main(): async def leaf(): while not started.is_set(): await asyncio.sleep(0) + # The whole awaited_by chain is built and this is now the + # running task at the bottom of it. Signal here, then + # busy-spin, so the observer inspects while the chain is + # fully present and rooted at a running task. + sock.sendall(b"ready") end = time.time() + 10_000 while time.time() < end: pass @@ -1209,7 +1214,6 @@ async def waiter(child): for _ in range(5): await asyncio.sleep(0) - sock.sendall(b"ready") started.set() try: await leaf_t diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 3ed809769af931d..2a91772d9a31bf6 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -147,7 +147,7 @@ typedef enum _WIN32_THREADSTATE { #define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */ #define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */ #define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */ -#define MAX_TASK_AWAITED_BY_DEPTH 1000 /* Bound recursion over the awaited_by graph */ +#define MAX_TASK_AWAITED_BY_DEPTH 1000 /* Bound the awaited_by graph walk so a cycle terminates */ #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c index 93b4000b9824477..aad844d06c08e50 100644 --- a/Modules/_remote_debugging/asyncio.c +++ b/Modules/_remote_debugging/asyncio.c @@ -513,19 +513,24 @@ parse_task( * TASK AWAITED_BY PROCESSING * ============================================================================ */ -// Forward declaration for mutual recursion -static int process_waiter_task(RemoteUnwinderObject *unwinder, uintptr_t key_addr, void *context); - -// Carries the recursion depth so a cyclic or corrupted remote awaited_by graph -// cannot drive unbounded C recursion and overflow the debugger's stack. +// The awaited_by graph is walked with an explicit, heap-allocated work-stack +// rather than C recursion. A deeply nested -- or, in a corrupted or +// concurrently-mutated remote process, cyclic -- chain would otherwise drive +// unbounded recursion and overflow the debugger's own C stack: each level holds +// a SIZEOF_TASK_OBJ buffer (see process_task_awaited_by), so even a few hundred +// levels can exhaust a 1 MB stack. MAX_TASK_AWAITED_BY_DEPTH bounds the walk so +// a cycle terminates. typedef struct { - PyObject *result; + uintptr_t addr; int depth; -} waiter_context_t; +} awaited_by_entry_t; -static int process_task_and_waiters_impl( - RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result, - int depth); +typedef struct { + awaited_by_entry_t *items; + Py_ssize_t size; + Py_ssize_t capacity; + int current_depth; +} awaited_by_stack_t; // Processor function for parsing tasks in sets static int @@ -670,49 +675,88 @@ process_single_task_node( } static int -process_task_and_waiters_impl( +awaited_by_stack_push( RemoteUnwinderObject *unwinder, - uintptr_t task_addr, - PyObject *result, + awaited_by_stack_t *stack, + uintptr_t addr, int depth ) { - // First, add this task to the result - if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { - return -1; + if (stack->size >= stack->capacity) { + Py_ssize_t new_capacity = stack->capacity ? stack->capacity * 2 : 16; + awaited_by_entry_t *new_items = PyMem_Realloc( + stack->items, (size_t)new_capacity * sizeof(awaited_by_entry_t)); + if (new_items == NULL) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, + "Failed to grow awaited_by work-stack"); + return -1; + } + stack->items = new_items; + stack->capacity = new_capacity; } - - // Now find all tasks that are waiting for this task and process them - waiter_context_t ctx = {result, depth}; - return process_task_awaited_by(unwinder, task_addr, process_waiter_task, &ctx); -} - -int -process_task_and_waiters( - RemoteUnwinderObject *unwinder, - uintptr_t task_addr, - PyObject *result -) { - return process_task_and_waiters_impl(unwinder, task_addr, result, 0); + stack->items[stack->size].addr = addr; + stack->items[stack->size].depth = depth; + stack->size++; + return 0; } -// Processor function for task waiters +// set_entry_processor_func: enqueue a task waiting on the one currently being +// expanded, one level deeper. The depth bound makes a cyclic or corrupted +// awaited_by graph terminate instead of looping forever. static int -process_waiter_task( +push_awaited_by_waiter( RemoteUnwinderObject *unwinder, uintptr_t key_addr, void *context ) { - waiter_context_t *ctx = (waiter_context_t *)context; - if (ctx->depth >= MAX_TASK_AWAITED_BY_DEPTH) { + awaited_by_stack_t *stack = (awaited_by_stack_t *)context; + if (stack->current_depth >= MAX_TASK_AWAITED_BY_DEPTH) { PyErr_SetString(PyExc_RuntimeError, "Task awaited_by chain is too deep or cyclic " "(corrupted remote memory)"); set_exception_cause(unwinder, PyExc_RuntimeError, - "Task awaited_by recursion limit exceeded"); + "Task awaited_by depth limit exceeded"); return -1; } - return process_task_and_waiters_impl(unwinder, key_addr, ctx->result, - ctx->depth + 1); + return awaited_by_stack_push(unwinder, stack, key_addr, + stack->current_depth + 1); +} + +// Drain the work-stack: append each task node to result, then enqueue the +// tasks waiting on it. Depth-first, with no C recursion over the graph. +static int +drain_awaited_by_stack( + RemoteUnwinderObject *unwinder, + PyObject *result, + awaited_by_stack_t *stack +) { + while (stack->size > 0) { + awaited_by_entry_t entry = stack->items[--stack->size]; + if (process_single_task_node(unwinder, entry.addr, NULL, result) < 0) { + return -1; + } + stack->current_depth = entry.depth; + if (process_task_awaited_by(unwinder, entry.addr, + push_awaited_by_waiter, stack) < 0) { + return -1; + } + } + return 0; +} + +int +process_task_and_waiters( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + awaited_by_stack_t stack = {0}; + int result_code = -1; + if (awaited_by_stack_push(unwinder, &stack, task_addr, 0) == 0) { + result_code = drain_awaited_by_stack(unwinder, result, &stack); + } + PyMem_Free(stack.items); + return result_code; } /* ============================================================================ @@ -1008,9 +1052,17 @@ process_running_task_chain( return -1; } - // Now find all tasks that are waiting for this task and process them - waiter_context_t ctx = {result, 0}; - if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, &ctx) < 0) { + // Now find all tasks that are waiting for this task and process them with + // the same iterative, heap-stacked walk as process_task_and_waiters (the + // running task itself is already recorded via the frame chain above). + awaited_by_stack_t stack = {0}; + int waiters_code = process_task_awaited_by(unwinder, running_task_addr, + push_awaited_by_waiter, &stack); + if (waiters_code == 0) { + waiters_code = drain_awaited_by_stack(unwinder, result, &stack); + } + PyMem_Free(stack.items); + if (waiters_code < 0) { return -1; }