Skip to content

Commit 71db6f0

Browse files
eendebakptkumaraditya303
authored andcommitted
gh-149449: Fix use-after-free in _PyUnicode_GetNameCAPI (GH-150323)
(cherry picked from commit 43c60ec) Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com> Co-authored-by: Kumar Aditya <kumaraditya@python.org>
1 parent 80c422d commit 71db6f0

4 files changed

Lines changed: 28 additions & 23 deletions

File tree

Lib/test/test_unicodedata.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,22 @@ def test_failed_import_during_compiling(self):
664664
"(can't load unicodedata module)"
665665
self.assertIn(error, result.err.decode("ascii"))
666666

667+
def test_unicodedata_unload_reload(self):
668+
# gh-149449: dropping unicodedata and running gc must not leave the
669+
# cached _ucnhash_CAPI pointer dangling.
670+
code = (
671+
"import gc, sys\n"
672+
"assert '\\N{GRINNING FACE}'.encode("
673+
" 'ascii', errors='namereplace') == b'\\\\N{GRINNING FACE}'\n"
674+
"compile(r\"x = '\\\\N{LATIN CAPITAL LETTER A}'\", '<x>', 'exec')\n"
675+
"del sys.modules['unicodedata']\n"
676+
"gc.collect()\n"
677+
"assert '\\N{WINKING FACE}'.encode("
678+
" 'ascii', errors='namereplace') == b'\\\\N{WINKING FACE}'\n"
679+
"compile(r\"x = '\\\\N{LATIN CAPITAL LETTER B}'\", '<x>', 'exec')\n"
680+
)
681+
script_helper.assert_python_ok("-c", code)
682+
667683
def test_decimal_numeric_consistent(self):
668684
# Test that decimal and numeric are consistent,
669685
# i.e. if a character has a decimal value,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a use-after-free crash when the :mod:`unicodedata` module was removed
2+
from :data:`sys.modules` and garbage-collected between calls that decode
3+
``\N{...}`` escapes or use the ``namereplace`` codec error handler.

Modules/unicodedata.c

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,32 +1501,17 @@ capi_getcode(const char* name, int namelen, Py_UCS4* code,
15011501
return _check_alias_and_seq(code, with_named_seq);
15021502
}
15031503

1504-
static void
1505-
unicodedata_destroy_capi(PyObject *capsule)
1506-
{
1507-
void *capi = PyCapsule_GetPointer(capsule, PyUnicodeData_CAPSULE_NAME);
1508-
PyMem_Free(capi);
1509-
}
1510-
15111504
static PyObject *
15121505
unicodedata_create_capi(void)
15131506
{
1514-
_PyUnicode_Name_CAPI *capi = PyMem_Malloc(sizeof(_PyUnicode_Name_CAPI));
1515-
if (capi == NULL) {
1516-
PyErr_NoMemory();
1517-
return NULL;
1518-
}
1519-
capi->getname = capi_getucname;
1520-
capi->getcode = capi_getcode;
1521-
1522-
PyObject *capsule = PyCapsule_New(capi,
1523-
PyUnicodeData_CAPSULE_NAME,
1524-
unicodedata_destroy_capi);
1525-
if (capsule == NULL) {
1526-
PyMem_Free(capi);
1527-
}
1528-
return capsule;
1529-
};
1507+
// Statically allocated so that any cached pointers stay valid after unicodedata
1508+
// is removed from sys.modules and the capsule is gc'd (gh-149449).
1509+
static _PyUnicode_Name_CAPI capi = {
1510+
.getname = capi_getucname,
1511+
.getcode = capi_getcode,
1512+
};
1513+
return PyCapsule_New(&capi, PyUnicodeData_CAPSULE_NAME, NULL);
1514+
}
15301515

15311516

15321517
/* -------------------------------------------------------------------- */

Tools/c-analyzer/cpython/ignored.tsv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ Modules/pyexpat.c - error_info_of -
312312
Modules/pyexpat.c - handler_info -
313313
Modules/termios.c - termios_constants -
314314
Modules/timemodule.c init_timezone YEAR -
315+
Modules/unicodedata.c unicodedata_create_capi capi -
315316
Objects/bytearrayobject.c - _PyByteArray_empty_string -
316317
Objects/complexobject.c - c_1 -
317318
Objects/exceptions.c - static_exceptions -

0 commit comments

Comments
 (0)