Skip to content

Commit 8f82506

Browse files
committed
Check newly created consistency using _PyUnicode_CheckConsistency(str, 1)
* In debug mode, fill the string data with invalid characters * Simplify also reference counting in PyCodec_BackslashReplaceErrors() and PyCodec_XMLCharRefReplaceError()
1 parent 990eff0 commit 8f82506

File tree

10 files changed

+31
-14
lines changed

10 files changed

+31
-14
lines changed

Modules/_json.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ ascii_escape_unicode(PyObject *pystr)
246246
}
247247
}
248248
output[chars++] = '"';
249+
assert(_PyUnicode_CheckConsistency(rval, 1));
249250
return rval;
250251
}
251252

Modules/md5module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
397397
c = (digest[i] & 0xf);
398398
hex_digest[j++] = Py_hexdigits[c];
399399
}
400+
assert(_PyUnicode_CheckConsistency(retval, 1));
400401
return retval;
401402
}
402403

Modules/sha1module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
373373
c = (digest[i] & 0xf);
374374
hex_digest[j++] = Py_hexdigits[c];
375375
}
376+
assert(_PyUnicode_CheckConsistency(retval, 1));
376377
return retval;
377378
}
378379

Modules/sha256module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
466466
c = (digest[i] & 0xf);
467467
hex_digest[j++] = Py_hexdigits[c];
468468
}
469+
assert(_PyUnicode_CheckConsistency(retval, 1));
469470
return retval;
470471
}
471472

Modules/sha512module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
532532
c = (digest[i] & 0xf);
533533
hex_digest[j++] = Py_hexdigits[c];
534534
}
535+
assert(_PyUnicode_CheckConsistency(retval, 1));
535536
return retval;
536537
}
537538

Objects/bytesobject.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
626626
*p++ = c;
627627
}
628628
*p++ = quote;
629+
assert(_PyUnicode_CheckConsistency(v, 1));
629630
return v;
630631
}
631632

Objects/unicodeobject.c

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -967,7 +967,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
967967
PyObject *obj;
968968
PyCompactUnicodeObject *unicode;
969969
void *data;
970-
int kind_state;
970+
enum PyUnicode_Kind kind;
971971
int is_sharing, is_ascii;
972972
Py_ssize_t char_size;
973973
Py_ssize_t struct_size;
@@ -986,17 +986,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
986986
is_sharing = 0;
987987
struct_size = sizeof(PyCompactUnicodeObject);
988988
if (maxchar < 128) {
989-
kind_state = PyUnicode_1BYTE_KIND;
989+
kind = PyUnicode_1BYTE_KIND;
990990
char_size = 1;
991991
is_ascii = 1;
992992
struct_size = sizeof(PyASCIIObject);
993993
}
994994
else if (maxchar < 256) {
995-
kind_state = PyUnicode_1BYTE_KIND;
995+
kind = PyUnicode_1BYTE_KIND;
996996
char_size = 1;
997997
}
998998
else if (maxchar < 65536) {
999-
kind_state = PyUnicode_2BYTE_KIND;
999+
kind = PyUnicode_2BYTE_KIND;
10001000
char_size = 2;
10011001
if (sizeof(wchar_t) == 2)
10021002
is_sharing = 1;
@@ -1007,7 +1007,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10071007
"invalid maximum character passed to PyUnicode_New");
10081008
return NULL;
10091009
}
1010-
kind_state = PyUnicode_4BYTE_KIND;
1010+
kind = PyUnicode_4BYTE_KIND;
10111011
char_size = 4;
10121012
if (sizeof(wchar_t) == 4)
10131013
is_sharing = 1;
@@ -1041,27 +1041,27 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10411041
_PyUnicode_LENGTH(unicode) = size;
10421042
_PyUnicode_HASH(unicode) = -1;
10431043
_PyUnicode_STATE(unicode).interned = 0;
1044-
_PyUnicode_STATE(unicode).kind = kind_state;
1044+
_PyUnicode_STATE(unicode).kind = kind;
10451045
_PyUnicode_STATE(unicode).compact = 1;
10461046
_PyUnicode_STATE(unicode).ready = 1;
10471047
_PyUnicode_STATE(unicode).ascii = is_ascii;
10481048
if (is_ascii) {
10491049
((char*)data)[size] = 0;
10501050
_PyUnicode_WSTR(unicode) = NULL;
10511051
}
1052-
else if (kind_state == PyUnicode_1BYTE_KIND) {
1052+
else if (kind == PyUnicode_1BYTE_KIND) {
10531053
((char*)data)[size] = 0;
10541054
_PyUnicode_WSTR(unicode) = NULL;
10551055
_PyUnicode_WSTR_LENGTH(unicode) = 0;
10561056
unicode->utf8 = NULL;
10571057
unicode->utf8_length = 0;
1058-
}
1058+
}
10591059
else {
10601060
unicode->utf8 = NULL;
10611061
unicode->utf8_length = 0;
1062-
if (kind_state == PyUnicode_2BYTE_KIND)
1062+
if (kind == PyUnicode_2BYTE_KIND)
10631063
((Py_UCS2*)data)[size] = 0;
1064-
else /* kind_state == PyUnicode_4BYTE_KIND */
1064+
else /* kind == PyUnicode_4BYTE_KIND */
10651065
((Py_UCS4*)data)[size] = 0;
10661066
if (is_sharing) {
10671067
_PyUnicode_WSTR_LENGTH(unicode) = size;
@@ -1072,6 +1072,13 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10721072
_PyUnicode_WSTR(unicode) = NULL;
10731073
}
10741074
}
1075+
#ifdef Py_DEBUG
1076+
/* Fill the data with invalid characters to detect bugs earlier.
1077+
_PyUnicode_CheckConsistency(str, 1) detects invalid characters,
1078+
at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
1079+
and U+FFFFFFFF is an invalid character in Unicode 6.0. */
1080+
memset(data, 0xff, size * kind);
1081+
#endif
10751082
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
10761083
return obj;
10771084
}

Python/codecs.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
534534
data = PyUnicode_DATA(res);
535535
for (i = 0; i < len; ++i)
536536
PyUnicode_WRITE(kind, data, i, '?');
537+
assert(_PyUnicode_CheckConsistency(res, 1));
537538
return Py_BuildValue("(Nn)", res, end);
538539
}
539540
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
@@ -559,6 +560,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
559560
data = PyUnicode_DATA(res);
560561
for (i=0; i < len; i++)
561562
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
563+
assert(_PyUnicode_CheckConsistency(res, 1));
562564
return Py_BuildValue("(Nn)", res, end);
563565
}
564566
else {
@@ -652,8 +654,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
652654
}
653655
*outp++ = ';';
654656
}
655-
restuple = Py_BuildValue("(On)", res, end);
656-
Py_DECREF(res);
657+
assert(_PyUnicode_CheckConsistency(res, 1));
658+
restuple = Py_BuildValue("(Nn)", res, end);
657659
Py_DECREF(object);
658660
return restuple;
659661
}
@@ -720,8 +722,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
720722
*outp++ = Py_hexdigits[c&0xf];
721723
}
722724

723-
restuple = Py_BuildValue("(On)", res, end);
724-
Py_DECREF(res);
725+
assert(_PyUnicode_CheckConsistency(res, 1));
726+
restuple = Py_BuildValue("(Nn)", res, end);
725727
Py_DECREF(object);
726728
return restuple;
727729
}

Python/compile.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
263263
Py_DECREF(result);
264264
return NULL;
265265
}
266+
assert(_PyUnicode_CheckConsistency(result, 1));
266267
return result;
267268
}
268269

Python/import.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,7 @@ make_source_pathname(PyObject *path)
992992
(j = dot0-right));
993993
PyUnicode_WRITE(kind, data, i+j, 'p');
994994
PyUnicode_WRITE(kind, data, i+j+1, 'y');
995+
assert(_PyUnicode_CheckConsistency(result, 1));
995996
return result;
996997
}
997998

0 commit comments

Comments
 (0)