@@ -967,7 +967,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
967967 PyObject * obj ;
968968 PyCompactUnicodeObject * unicode ;
969969 void * data ;
970- int kind_state ;
970+ enum PyUnicode_Kind kind ;
971971 int is_sharing , is_ascii ;
972972 Py_ssize_t char_size ;
973973 Py_ssize_t struct_size ;
@@ -986,17 +986,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
986986 is_sharing = 0 ;
987987 struct_size = sizeof (PyCompactUnicodeObject );
988988 if (maxchar < 128 ) {
989- kind_state = PyUnicode_1BYTE_KIND ;
989+ kind = PyUnicode_1BYTE_KIND ;
990990 char_size = 1 ;
991991 is_ascii = 1 ;
992992 struct_size = sizeof (PyASCIIObject );
993993 }
994994 else if (maxchar < 256 ) {
995- kind_state = PyUnicode_1BYTE_KIND ;
995+ kind = PyUnicode_1BYTE_KIND ;
996996 char_size = 1 ;
997997 }
998998 else if (maxchar < 65536 ) {
999- kind_state = PyUnicode_2BYTE_KIND ;
999+ kind = PyUnicode_2BYTE_KIND ;
10001000 char_size = 2 ;
10011001 if (sizeof (wchar_t ) == 2 )
10021002 is_sharing = 1 ;
@@ -1007,7 +1007,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10071007 "invalid maximum character passed to PyUnicode_New" );
10081008 return NULL ;
10091009 }
1010- kind_state = PyUnicode_4BYTE_KIND ;
1010+ kind = PyUnicode_4BYTE_KIND ;
10111011 char_size = 4 ;
10121012 if (sizeof (wchar_t ) == 4 )
10131013 is_sharing = 1 ;
@@ -1041,27 +1041,27 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10411041 _PyUnicode_LENGTH (unicode ) = size ;
10421042 _PyUnicode_HASH (unicode ) = -1 ;
10431043 _PyUnicode_STATE (unicode ).interned = 0 ;
1044- _PyUnicode_STATE (unicode ).kind = kind_state ;
1044+ _PyUnicode_STATE (unicode ).kind = kind ;
10451045 _PyUnicode_STATE (unicode ).compact = 1 ;
10461046 _PyUnicode_STATE (unicode ).ready = 1 ;
10471047 _PyUnicode_STATE (unicode ).ascii = is_ascii ;
10481048 if (is_ascii ) {
10491049 ((char * )data )[size ] = 0 ;
10501050 _PyUnicode_WSTR (unicode ) = NULL ;
10511051 }
1052- else if (kind_state == PyUnicode_1BYTE_KIND ) {
1052+ else if (kind == PyUnicode_1BYTE_KIND ) {
10531053 ((char * )data )[size ] = 0 ;
10541054 _PyUnicode_WSTR (unicode ) = NULL ;
10551055 _PyUnicode_WSTR_LENGTH (unicode ) = 0 ;
10561056 unicode -> utf8 = NULL ;
10571057 unicode -> utf8_length = 0 ;
1058- }
1058+ }
10591059 else {
10601060 unicode -> utf8 = NULL ;
10611061 unicode -> utf8_length = 0 ;
1062- if (kind_state == PyUnicode_2BYTE_KIND )
1062+ if (kind == PyUnicode_2BYTE_KIND )
10631063 ((Py_UCS2 * )data )[size ] = 0 ;
1064- else /* kind_state == PyUnicode_4BYTE_KIND */
1064+ else /* kind == PyUnicode_4BYTE_KIND */
10651065 ((Py_UCS4 * )data )[size ] = 0 ;
10661066 if (is_sharing ) {
10671067 _PyUnicode_WSTR_LENGTH (unicode ) = size ;
@@ -1072,6 +1072,13 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
10721072 _PyUnicode_WSTR (unicode ) = NULL ;
10731073 }
10741074 }
1075+ #ifdef Py_DEBUG
1076+ /* Fill the data with invalid characters to detect bugs earlier.
1077+ _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
1078+ at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
1079+ and U+FFFFFFFF is an invalid character in Unicode 6.0. */
1080+ memset (data , 0xff , size * kind );
1081+ #endif
10751082 assert (_PyUnicode_CheckConsistency ((PyObject * )unicode , 0 ));
10761083 return obj ;
10771084}
0 commit comments