From 809f26381d59c4d90b986a6e1ea70ef8abf7362e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 30 Mar 2020 13:12:52 -0700 Subject: [PATCH 1/5] Fix undefined behavior in PyBytesObject. It is undefined behaviour if index is beyond array size. The workaround for this is standard C99 feature known as "Flexible array member". https://en.wikipedia.org/wiki/Flexible_array_member --- Include/cpython/bytesobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index f284c5835df099..6a3f2d0936c026 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -5,7 +5,7 @@ typedef struct { PyObject_VAR_HEAD Py_hash_t ob_shash; - char ob_sval[1]; + char ob_sval[]; /* Invariants: * ob_sval contains space for 'ob_size+1' elements. From d3329fe0c707a615a8d01507a9d9d0c75b6481a4 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 30 Mar 2020 13:53:35 -0700 Subject: [PATCH 2/5] Fix unicode's struct encoding_map similarly. --- Objects/unicodeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3d99f11ecff6fe..a471cdc283f185 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8208,14 +8208,14 @@ struct encoding_map { PyObject_HEAD unsigned char level1[32]; int count2, count3; - unsigned char level23[1]; + unsigned char level23[]; }; static PyObject* encoding_map_size(PyObject *obj, PyObject* args) { struct encoding_map *map = (struct encoding_map*)obj; - return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 + + return PyLong_FromLong(sizeof(*map) + 16*map->count2 + 128*map->count3); } @@ -8347,7 +8347,7 @@ PyUnicode_BuildEncodingMap(PyObject* string) /* Create a three-level trie */ result = PyObject_MALLOC(sizeof(struct encoding_map) + - 16*count2 + 128*count3 - 1); + 16*count2 + 128*count3); if (!result) return PyErr_NoMemory(); PyObject_Init(result, &EncodingMapType); From a1759d9920ae500a8df01c40cf3b0f804631ce90 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 30 Mar 2020 14:00:40 -0700 Subject: [PATCH 3/5] Describe the + 1 used in PyBytesObject_SIZE. --- Objects/bytesobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bd8af72ade5d3d..1e125b10194724 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -28,6 +28,9 @@ _Py_IDENTIFIER(__bytes__); Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves 3 bytes per string allocation on a typical system. + + The + 1 accounts for the trailing \0 byte that we include as a safety + measure for code that treats the underlying char * as a C string. */ #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) From 630a26aa2151cd42051fe8ad4d62f5c82cf1659b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 30 Mar 2020 14:14:57 -0700 Subject: [PATCH 4/5] news! --- .../2020-03-30-14-06-40.bpo-40120.6ptcf4.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst new file mode 100644 index 00000000000000..0b0f6cdd695103 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst @@ -0,0 +1,4 @@ +Fixed internal structure definitions for structs such as PyBytesObject and +unicode's encoding_map to not rely on C undefined behavior for access to +their trailing unbounded character array in favor of C99 approved flexible +array member syntax. From cd483f6dcffc929c6309ae24a677a7e948b98677 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 30 Mar 2020 15:09:01 -0700 Subject: [PATCH 5/5] Fix test_sys, it encoded the struct definition. This is really nothing more than a change test, it needs to match the C struct when no level23 mappings will be allocated. An empty char[] does not consume any C sizeof() space whereas the previous `char[1]` consumed one alignment size worth, even when unused. --- Lib/test/test_sys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 395725857b7c05..4bdb61d4a568eb 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1197,7 +1197,7 @@ class C(object): pass # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) - check(x, size('32B2iB')) + check(x, size('32B2i')) # enumerate check(enumerate([]), size('n3P')) # reverse