diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 69235d444b915e7..28d566633eefda1 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -614,6 +614,32 @@ def test_join(self): with self.assertRaises(TypeError): dot_join([memoryview(b"ab"), "cd", b"ef"]) + def test_join_concurrent_buffer_mutation(self): + # __buffer__() can release the GIL, letting another thread concurrently + # mutate the joined sequence (simulated here by mutating in __buffer__). + # See: https://github.com/python/cpython/issues/151295 + def make_seq(mutate): + # Item is only referenced from the list slot, so mutate() frees it. + class Item: + def __buffer__(self, flags): + mutate(seq) + return memoryview(b'x') + seq = [b'a', Item(), b'c'] + return seq + + for sep in (self.type2test(b''), self.type2test(b'::')): + with self.subTest(sep=sep): + # Changing the list length is reported as a RuntimeError. + seq = make_seq(lambda seq: seq.clear()) + self.assertRaises(RuntimeError, sep.join, seq) + + # The list length is unchanged, so the size-change recheck + # cannot fire: only keeping the item alive avoids the crash. + def replace(seq): + seq[1] = b'z' + seq = make_seq(replace) + self.assertEqual(sep.join(seq), sep.join([b'a', b'x', b'c'])) + def test_count(self): b = self.type2test(b'mississippi') i = 105 diff --git a/Misc/NEWS.d/next/Library/2026-06-11-00-00-00.gh-issue-151295.NQYUzW.rst b/Misc/NEWS.d/next/Library/2026-06-11-00-00-00.gh-issue-151295.NQYUzW.rst new file mode 100644 index 000000000000000..e9012f023ff7f77 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-11-00-00-00.gh-issue-151295.NQYUzW.rst @@ -0,0 +1,4 @@ +Fixed a crash (use-after-free) in :meth:`bytes.join` and +:meth:`bytearray.join` that could occur if an item's +:meth:`~object.__buffer__` concurrently mutates the sequence being joined. +The mutation is now reported as a :exc:`RuntimeError` instead. diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index de6bd83ffe4c8bc..deebfeadc0f4fd8 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -68,13 +68,18 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) buffers[i].len = PyBytes_GET_SIZE(item); } else { + /* item is only borrowed; its __buffer__() may run Python that + drops the sequence's last reference to it. */ + Py_INCREF(item); if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { + Py_DECREF(item); PyErr_Format(PyExc_TypeError, "sequence item %zd: expected a bytes-like object, " "%.80s found", i, Py_TYPE(item)->tp_name); goto error; } + Py_DECREF(item); /* If the backing objects are mutable, then dropping the GIL * opens up race conditions where another thread tries to modify * the object which we hold a buffer on it. Such code has data