Skip to content
Open
Next Next commit
gh-145261: Fix ShareableList corruption for multi-byte strings and by…
…tes with trailing nulls

ShareableList had two bugs:
1. Used character count len(item) instead of byte count
   len(item.encode('utf-8')) for string slot allocation, causing
   UnicodeDecodeError with multi-byte UTF-8 characters.
2. Used rstrip(b'\x00') to recover bytes values, which stripped
   legitimate trailing null bytes.

Fix uses UTF-8 byte length for string allocation and stores the actual
byte length in the format metadata for bytes values, so retrieval reads
exactly the right number of bytes without needing rstrip.
  • Loading branch information
stefanzetzsche committed Feb 26, 2026
commit 6261c8633a6b1169d9da1e90c0fee13916fe2897
14 changes: 10 additions & 4 deletions Lib/multiprocessing/shared_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class ShareableList:
_back_transforms_mapping = {
0: lambda value: value, # int, float, bool
1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str
2: lambda value: value.rstrip(b'\x00'), # bytes
2: lambda value: value, # bytes
3: lambda _value: None, # None
}

Expand All @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None):
self._types_mapping[type(item)]
if not isinstance(item, (str, bytes))
else self._types_mapping[type(item)] % (
self._alignment * (len(item) // self._alignment + 1),
self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1),
)
for item in sequence
]
Expand Down Expand Up @@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None):
self._offset_data_start,
*(v.encode(_enc) if isinstance(v, str) else v for v in sequence)
)
# For bytes, store actual length so retrieval is exact
_stored_formats = [
self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f
for v, f in zip(sequence, _formats)
]
struct.pack_into(
self._format_packing_metainfo,
self.shm.buf,
self._offset_packing_formats,
*(v.encode(_enc) for v in _formats)
*(v.encode(_enc) for v in _stored_formats)
)
struct.pack_into(
self._format_back_transform_codes,
Expand Down Expand Up @@ -476,7 +481,8 @@ def __setitem__(self, position, value):

self._set_packing_format_and_transform(
position,
new_format,
self._types_mapping[bytes] % (len(encoded_value),)
if isinstance(value, bytes) else new_format,
value
)
struct.pack_into(new_format, self.shm.buf, offset, encoded_value)
Expand Down
5 changes: 2 additions & 3 deletions Lib/test/_test_multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(current_format, sl._get_packing_format(0))

# Verify attributes are readable.
self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q')
self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q')

# Exercise len().
self.assertEqual(len(sl), 7)
Expand Down Expand Up @@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(sl[3], 42)
sl[4] = 'some' # Change type at a given position.
self.assertEqual(sl[4], 'some')
self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q')
self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q')
with self.assertRaisesRegex(ValueError,
"exceeds available storage"):
sl[4] = 'far too many'
Expand Down Expand Up @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self):
self.assertNotEqual(sl.shm.name, sl_copy.shm.name)
self.assertEqual(name_duplicate, sl_copy.shm.name)
self.assertEqual(list(sl), list(sl_copy))
self.assertEqual(sl.format, sl_copy.format)
sl_copy[-1] = 77
self.assertEqual(sl_copy[-1], 77)
self.assertNotEqual(sl[-1], 77)
Expand Down
Loading