gh-145261: Fix ShareableList corruption for multi-byte strings and by…

…tes with trailing nulls ShareableList had two bugs: 1. Used character count len(item) instead of byte count len(item.encode('utf-8')) for string slot allocation, causing UnicodeDecodeError with multi-byte UTF-8 characters. 2. Used rstrip(b'\x00') to recover bytes values, which stripped legitimate trailing null bytes. Fix uses UTF-8 byte length for string allocation and stores the actual byte length in the format metadata for bytes values, so retrieval reads exactly the right number of bytes without needing rstrip.
python · zetzschest · Feb 26, 2026 · Feb 26, 2026 · Feb 27, 2026 · Feb 27, 2026
commit 6261c8633a6b1169d9da1e90c0fee13916fe2897
@@ -287,7 +287,7 @@ class ShareableList:
     _back_transforms_mapping = {
         0: lambda value: value,                   # int, float, bool
         1: lambda value: value.rstrip(b'\x00').decode(_encoding),  # str
-        2: lambda value: value.rstrip(b'\x00'),   # bytes
+        2: lambda value: value,                   # bytes
         3: lambda _value: None,                   # None
     }
 
@@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None):
                 self._types_mapping[type(item)]
                     if not isinstance(item, (str, bytes))
                     else self._types_mapping[type(item)] % (
-                        self._alignment * (len(item) // self._alignment + 1),
+                        self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1),
                     )
                 for item in sequence
             ]
@@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None):
                 self._offset_data_start,
                 *(v.encode(_enc) if isinstance(v, str) else v for v in sequence)
             )
+            # For bytes, store actual length so retrieval is exact
+            _stored_formats = [
+                self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f
+                for v, f in zip(sequence, _formats)
+            ]
             struct.pack_into(
                 self._format_packing_metainfo,
                 self.shm.buf,
                 self._offset_packing_formats,
-                *(v.encode(_enc) for v in _formats)
+                *(v.encode(_enc) for v in _stored_formats)
             )
             struct.pack_into(
                 self._format_back_transform_codes,
@@ -476,7 +481,8 @@ def __setitem__(self, position, value):
 
         self._set_packing_format_and_transform(
             position,
-            new_format,
+            self._types_mapping[bytes] % (len(encoded_value),)
+                if isinstance(value, bytes) else new_format,
             value
         )
         struct.pack_into(new_format, self.shm.buf, offset, encoded_value)

@@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self):
         self.assertEqual(current_format, sl._get_packing_format(0))
 
         # Verify attributes are readable.
-        self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q')
+        self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q')
 
         # Exercise len().
         self.assertEqual(len(sl), 7)
@@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self):
         self.assertEqual(sl[3], 42)
         sl[4] = 'some'  # Change type at a given position.
         self.assertEqual(sl[4], 'some')
-        self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q')
+        self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q')
         with self.assertRaisesRegex(ValueError,
                                     "exceeds available storage"):
             sl[4] = 'far too many'
@@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self):
             self.assertNotEqual(sl.shm.name, sl_copy.shm.name)
             self.assertEqual(name_duplicate, sl_copy.shm.name)
             self.assertEqual(list(sl), list(sl_copy))
-            self.assertEqual(sl.format, sl_copy.format)
             sl_copy[-1] = 77
             self.assertEqual(sl_copy[-1], 77)
             self.assertNotEqual(sl[-1], 77)