Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,11 @@ struct _Py_unicode_ids {
struct _Py_unicode_state {
struct _Py_unicode_fs_codec fs_codec;

// Cached pointer to the unicodedata module's _ucnhash_CAPI struct;
// valid as long as ucnhash_capsule holds a strong reference to the
// owning capsule. See _PyUnicode_GetNameCAPI().
_PyUnicode_Name_CAPI *ucnhash_capi;
PyObject *ucnhash_capsule;

// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
Expand Down
16 changes: 16 additions & 0 deletions Lib/test/test_unicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,22 @@ def test_failed_import_during_compiling(self):
"(can't load unicodedata module)"
self.assertIn(error, result.err.decode("ascii"))

def test_unicodedata_unload_reload(self):
# gh-149449: dropping unicodedata and running gc must not leave the
# cached _ucnhash_CAPI pointer dangling.
code = (
"import gc, sys\n"
"assert '\\N{GRINNING FACE}'.encode("
" 'ascii', errors='namereplace') == b'\\\\N{GRINNING FACE}'\n"
"compile(r\"x = '\\\\N{LATIN CAPITAL LETTER A}'\", '<x>', 'exec')\n"
"del sys.modules['unicodedata']\n"
"gc.collect()\n"
"assert '\\N{WINKING FACE}'.encode("
" 'ascii', errors='namereplace') == b'\\\\N{WINKING FACE}'\n"
"compile(r\"x = '\\\\N{LATIN CAPITAL LETTER B}'\", '<x>', 'exec')\n"
)
script_helper.assert_python_ok("-c", code)

def test_decimal_numeric_consistent(self):
# Test that decimal and numeric are consistent,
# i.e. if a character has a decimal value,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a use-after-free crash when the :mod:`unicodedata` module was removed
from :data:`sys.modules` and garbage-collected between calls that decode
``\N{...}`` escapes or use the ``namereplace`` codec error handler.
43 changes: 39 additions & 4 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6445,13 +6445,44 @@ _PyUnicode_GetNameCAPI(void)
_PyUnicode_Name_CAPI *ucnhash_capi;

ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi);
if (ucnhash_capi != NULL) {
return ucnhash_capi;
}

// The pointer we cache lives inside a PyCapsule owned by the
// unicodedata module. PyCapsule_Import() only returns the raw C
// pointer, so if unicodedata is later removed from sys.modules and
// garbage-collected, the capsule's destructor frees the underlying
// struct and any cached raw pointer is left dangling (gh-149449).
// Keep a strong reference to the capsule object on the interpreter
// state so the struct stays alive for the lifetime of the interpreter.
PyObject *module = PyImport_ImportModule("unicodedata");
if (module == NULL) {
return NULL;
}
PyObject *capsule = PyObject_GetAttrString(module, "_ucnhash_CAPI");
Py_DECREF(module);
if (capsule == NULL) {
return NULL;
}
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_GetPointer(
capsule, PyUnicodeData_CAPSULE_NAME);
if (ucnhash_capi == NULL) {
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
PyUnicodeData_CAPSULE_NAME, 1);
Py_DECREF(capsule);
return NULL;
}

// It's fine if we overwrite the value here. It's always the same value.
// Install the capsule into the interpreter state. If another thread
// raced us and got there first, drop our reference; the pointer
// values are functionally equivalent.
PyObject *expected = NULL;
if (_Py_atomic_compare_exchange_ptr(
&interp->unicode.ucnhash_capsule, &expected, capsule)) {
_Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi);
}
else {
Py_DECREF(capsule);
}
return ucnhash_capi;
}

Expand Down Expand Up @@ -14959,8 +14990,12 @@ _PyUnicode_Fini(PyInterpreterState *interp)
_PyUnicode_FiniEncodings(&state->fs_codec);

// bpo-47182: force a unicodedata CAPI capsule re-import on
// subsequent initialization of interpreter.
// subsequent initialization of interpreter. Releasing the capsule
// reference here triggers its destructor, which frees the malloc'd
// _PyUnicode_Name_CAPI struct; clearing the raw pointer is required
// so callers don't observe the dangling cache (gh-149449).
interp->unicode.ucnhash_capi = NULL;
Py_CLEAR(interp->unicode.ucnhash_capsule);

unicode_clear_identifiers(state);
}
Expand Down
Loading