diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index f284c5835df099a..6a3f2d0936c0264 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -5,7 +5,7 @@ typedef struct { PyObject_VAR_HEAD Py_hash_t ob_shash; - char ob_sval[1]; + char ob_sval[]; /* Invariants: * ob_sval contains space for 'ob_size+1' elements. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 395725857b7c058..4bdb61d4a568ebe 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1197,7 +1197,7 @@ class C(object): pass # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) - check(x, size('32B2iB')) + check(x, size('32B2i')) # enumerate check(enumerate([]), size('n3P')) # reverse diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst new file mode 100644 index 000000000000000..0b0f6cdd6951034 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst @@ -0,0 +1,4 @@ +Fixed internal structure definitions for structs such as PyBytesObject and +unicode's encoding_map to not rely on C undefined behavior for access to +their trailing unbounded character array in favor of C99 approved flexible +array member syntax. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bd8af72ade5d3d9..1e125b101947249 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -28,6 +28,9 @@ _Py_IDENTIFIER(__bytes__); Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves 3 bytes per string allocation on a typical system. + + The + 1 accounts for the trailing \0 byte that we include as a safety + measure for code that treats the underlying char * as a C string. */ #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3d99f11ecff6fea..a471cdc283f1854 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8208,14 +8208,14 @@ struct encoding_map { PyObject_HEAD unsigned char level1[32]; int count2, count3; - unsigned char level23[1]; + unsigned char level23[]; }; static PyObject* encoding_map_size(PyObject *obj, PyObject* args) { struct encoding_map *map = (struct encoding_map*)obj; - return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 + + return PyLong_FromLong(sizeof(*map) + 16*map->count2 + 128*map->count3); } @@ -8347,7 +8347,7 @@ PyUnicode_BuildEncodingMap(PyObject* string) /* Create a three-level trie */ result = PyObject_MALLOC(sizeof(struct encoding_map) + - 16*count2 + 128*count3 - 1); + 16*count2 + 128*count3); if (!result) return PyErr_NoMemory(); PyObject_Init(result, &EncodingMapType);