From d5b9f8d7f39ae0d65b9460c05c5d21a6c63254ef Mon Sep 17 00:00:00 2001 From: Shamil Date: Sun, 12 Apr 2026 03:14:50 +0300 Subject: [PATCH] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. Co-authored-by: Kumar Aditya Co-authored-by: Gregory P. Smith (cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047) --- Lib/test/test_json/test_speedups.py | 61 +++++++++++++++++++ ...-12-17-04-10-35.gh-issue-142831.ee3t4L.rst | 2 + Modules/_json.c | 29 ++++++++- 3 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 4c0aa5f993b30fd..0b22a0bf4b95387 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -1,4 +1,5 @@ from test.test_json import CTest +from test.support import gc_collect class BadBool: @@ -111,3 +112,63 @@ def test_current_indent_level(self): self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}]) + + def test_mutate_dict_items_during_encode(self): + # gh-142831: Clearing the items list via a re-entrant key encoder + # must not cause a use-after-free. BadDict.items() returns a + # mutable list; encode_str clears it while iterating. + items = None + + class BadDict(dict): + def items(self): + nonlocal items + items = [("boom", object())] + return items + + cleared = False + def encode_str(obj): + nonlocal items, cleared + if items is not None: + items.clear() + items = None + cleared = True + gc_collect() + return '"x"' + + encoder = self.json.encoder.c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(BadDict(real=1), 0) + self.assertTrue(cleared) + + def test_mutate_list_during_encode(self): + # gh-142831: Clearing a list mid-iteration via the default + # callback must not cause a use-after-free. + call_count = 0 + lst = [object() for _ in range(10)] + + def default(obj): + nonlocal call_count + call_count += 1 + if call_count == 3: + lst.clear() + gc_collect() + return None + + encoder = self.json.encoder.c_make_encoder( + None, default, + self.json.encoder.c_encode_basestring, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(lst, 0) + # Verify the mutation path was actually hit and the loop + # stopped iterating after the list was cleared. + self.assertEqual(call_count, 3) diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 000000000000000..5fa3cd2727a9e57 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index 39ec9a969cf04d4..39cdb9fd4f40c84 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1702,9 +1702,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the items list, invalidating this borrowed ref. + Py_INCREF(item); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_DECREF(item); goto bail; } @@ -1712,18 +1716,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(item); goto bail; + } + Py_DECREF(item); } Py_CLEAR(items); } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the dict, invalidating these borrowed refs. + Py_INCREF(key); + Py_INCREF(value); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(key); + Py_DECREF(value); goto bail; + } + Py_DECREF(key); + Py_DECREF(value); } } @@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, } for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + // gh-142831: encoder_listencode_obj() can invoke user code + // that mutates the sequence, invalidating this borrowed ref. + Py_INCREF(obj); if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { + Py_DECREF(obj); goto bail; + } } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { _PyErr_FormatNote("when serializing %T item %zd", seq, i); + Py_DECREF(obj); goto bail; } + Py_DECREF(obj); } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident))