From 5014d9fbca77451d4047f4fec487c1b074683903 Mon Sep 17 00:00:00 2001 From: Timur Mamedov Date: Wed, 24 Jun 2026 15:55:51 -0400 Subject: [PATCH] gh-151475: Fix data race in faulthandler watchdog on free-threaded builds Add a PyMutex to serialize dump_traceback_later() and cancel_dump_traceback_later() calls. Without this, concurrent arm/cancel from multiple threads corrupts the cancel_event/running lock handshake, causing an abort from unlocking an unheld lock. --- Include/internal/pycore_faulthandler.h | 2 ++ ...-06-24-14-00-00.gh-issue-151475.FhTrSf.rst | 4 +++ Modules/faulthandler.c | 27 +++++++++++++------ 3 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-24-14-00-00.gh-issue-151475.FhTrSf.rst diff --git a/Include/internal/pycore_faulthandler.h b/Include/internal/pycore_faulthandler.h index 9ddd70d39ed0d51..2f3b524d69b50bb 100644 --- a/Include/internal/pycore_faulthandler.h +++ b/Include/internal/pycore_faulthandler.h @@ -48,6 +48,8 @@ struct faulthandler_user_signal { struct _faulthandler_runtime_state { + PyMutex mutex; + struct { int enabled; PyObject *file; diff --git a/Misc/NEWS.d/next/Library/2026-06-24-14-00-00.gh-issue-151475.FhTrSf.rst b/Misc/NEWS.d/next/Library/2026-06-24-14-00-00.gh-issue-151475.FhTrSf.rst new file mode 100644 index 000000000000000..05a3a61abec8705 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-24-14-00-00.gh-issue-151475.FhTrSf.rst @@ -0,0 +1,4 @@ +Fix data race in :mod:`faulthandler` where concurrent calls to +:func:`~faulthandler.dump_traceback_later` and +:func:`~faulthandler.cancel_dump_traceback_later` could corrupt the +watchdog lock handshake on free-threaded builds. diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c index 3b0647152ceffed..df6778e31d7fe2e 100644 --- a/Modules/faulthandler.c +++ b/Modules/faulthandler.c @@ -56,6 +56,7 @@ typedef struct { int all_threads; } fault_handler_t; +#define faulthandler_mutex _PyRuntime.faulthandler.mutex #define fatal_error _PyRuntime.faulthandler.fatal_error #define thread _PyRuntime.faulthandler.thread @@ -836,17 +837,31 @@ faulthandler_dump_traceback_later_impl(PyObject *module, return NULL; } + /* format the timeout before acquiring the lock (no shared state) */ + header = format_timeout(timeout_us); + if (header == NULL) { + Py_XDECREF(file); + return PyErr_NoMemory(); + } + header_len = strlen(header); + + PyMutex_Lock(&faulthandler_mutex); + if (!thread.running) { thread.running = PyThread_allocate_lock(); if (!thread.running) { + PyMutex_Unlock(&faulthandler_mutex); Py_XDECREF(file); + PyMem_Free(header); return PyErr_NoMemory(); } } if (!thread.cancel_event) { thread.cancel_event = PyThread_allocate_lock(); if (!thread.cancel_event || !thread.running) { + PyMutex_Unlock(&faulthandler_mutex); Py_XDECREF(file); + PyMem_Free(header); return PyErr_NoMemory(); } @@ -855,14 +870,6 @@ faulthandler_dump_traceback_later_impl(PyObject *module, PyThread_acquire_lock(thread.cancel_event, 1); } - /* format the timeout */ - header = format_timeout(timeout_us); - if (header == NULL) { - Py_XDECREF(file); - return PyErr_NoMemory(); - } - header_len = strlen(header); - /* Cancel previous thread, if running */ cancel_dump_traceback_later(); @@ -885,11 +892,13 @@ faulthandler_dump_traceback_later_impl(PyObject *module, Py_CLEAR(thread.file); PyMem_Free(header); thread.header = NULL; + PyMutex_Unlock(&faulthandler_mutex); PyErr_SetString(PyExc_RuntimeError, "unable to start watchdog thread"); return NULL; } + PyMutex_Unlock(&faulthandler_mutex); Py_RETURN_NONE; } @@ -904,7 +913,9 @@ static PyObject * faulthandler_cancel_dump_traceback_later_py_impl(PyObject *module) /*[clinic end generated code: output=2cf303015d39c926 input=51ad64b6ca8412a4]*/ { + PyMutex_Lock(&faulthandler_mutex); cancel_dump_traceback_later(); + PyMutex_Unlock(&faulthandler_mutex); Py_RETURN_NONE; }