@@ -1255,41 +1255,90 @@ save_string(Picklerobject *self, PyObject *args, int doput)
12551255/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
12561256 backslash and newline characters to \uXXXX escapes. */
12571257static PyObject *
1258- modified_EncodeRawUnicodeEscape (const Py_UNICODE * s , int size )
1258+ modified_EncodeRawUnicodeEscape (const Py_UNICODE * s , Py_ssize_t size )
12591259{
1260- PyObject * repr ;
1261- char * p ;
1262- char * q ;
1260+ PyObject * repr ;
1261+ char * p ;
1262+ char * q ;
12631263
1264- static const char * hexdigit = "0123456789ABCDEF" ;
1264+ static const char * hexdigit = "0123456789abcdef" ;
1265+ #ifdef Py_UNICODE_WIDE
1266+ const Py_ssize_t expandsize = 10 ;
1267+ #else
1268+ const Py_ssize_t expandsize = 6 ;
1269+ #endif
12651270
1266- repr = PyString_FromStringAndSize (NULL , 6 * size );
1267- if (repr == NULL )
1268- return NULL ;
1269- if (size == 0 )
1270- return repr ;
1271-
1272- p = q = PyString_AS_STRING (repr );
1273- while (size -- > 0 ) {
1274- Py_UNICODE ch = * s ++ ;
1275- /* Map 16-bit characters to '\uxxxx' */
1276- if (ch >= 256 || ch == '\\' || ch == '\n' ) {
1277- * p ++ = '\\' ;
1278- * p ++ = 'u' ;
1279- * p ++ = hexdigit [(ch >> 12 ) & 0xf ];
1280- * p ++ = hexdigit [(ch >> 8 ) & 0xf ];
1281- * p ++ = hexdigit [(ch >> 4 ) & 0xf ];
1282- * p ++ = hexdigit [ch & 15 ];
1283- }
1284- /* Copy everything else as-is */
1285- else
1286- * p ++ = (char ) ch ;
1287- }
1288- * p = '\0' ;
1289- _PyString_Resize (& repr , p - q );
1271+ if (size > PY_SSIZE_T_MAX / expandsize )
1272+ return PyErr_NoMemory ();
1273+
1274+ repr = PyString_FromStringAndSize (NULL , expandsize * size );
1275+ if (repr == NULL )
1276+ return NULL ;
1277+ if (size == 0 )
12901278 return repr ;
1291- }
12921279
1280+ p = q = PyString_AS_STRING (repr );
1281+ while (size -- > 0 ) {
1282+ Py_UNICODE ch = * s ++ ;
1283+ #ifdef Py_UNICODE_WIDE
1284+ /* Map 32-bit characters to '\Uxxxxxxxx' */
1285+ if (ch >= 0x10000 ) {
1286+ * p ++ = '\\' ;
1287+ * p ++ = 'U' ;
1288+ * p ++ = hexdigit [(ch >> 28 ) & 0xf ];
1289+ * p ++ = hexdigit [(ch >> 24 ) & 0xf ];
1290+ * p ++ = hexdigit [(ch >> 20 ) & 0xf ];
1291+ * p ++ = hexdigit [(ch >> 16 ) & 0xf ];
1292+ * p ++ = hexdigit [(ch >> 12 ) & 0xf ];
1293+ * p ++ = hexdigit [(ch >> 8 ) & 0xf ];
1294+ * p ++ = hexdigit [(ch >> 4 ) & 0xf ];
1295+ * p ++ = hexdigit [ch & 15 ];
1296+ }
1297+ else
1298+ #else
1299+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1300+ if (ch >= 0xD800 && ch < 0xDC00 ) {
1301+ Py_UNICODE ch2 ;
1302+ Py_UCS4 ucs ;
1303+
1304+ ch2 = * s ++ ;
1305+ size -- ;
1306+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF ) {
1307+ ucs = (((ch & 0x03FF ) << 10 ) | (ch2 & 0x03FF )) + 0x00010000 ;
1308+ * p ++ = '\\' ;
1309+ * p ++ = 'U' ;
1310+ * p ++ = hexdigit [(ucs >> 28 ) & 0xf ];
1311+ * p ++ = hexdigit [(ucs >> 24 ) & 0xf ];
1312+ * p ++ = hexdigit [(ucs >> 20 ) & 0xf ];
1313+ * p ++ = hexdigit [(ucs >> 16 ) & 0xf ];
1314+ * p ++ = hexdigit [(ucs >> 12 ) & 0xf ];
1315+ * p ++ = hexdigit [(ucs >> 8 ) & 0xf ];
1316+ * p ++ = hexdigit [(ucs >> 4 ) & 0xf ];
1317+ * p ++ = hexdigit [ucs & 0xf ];
1318+ continue ;
1319+ }
1320+ /* Fall through: isolated surrogates are copied as-is */
1321+ s -- ;
1322+ size ++ ;
1323+ }
1324+ #endif
1325+ /* Map 16-bit characters to '\uxxxx' */
1326+ if (ch >= 256 || ch == '\\' || ch == '\n' ) {
1327+ * p ++ = '\\' ;
1328+ * p ++ = 'u' ;
1329+ * p ++ = hexdigit [(ch >> 12 ) & 0xf ];
1330+ * p ++ = hexdigit [(ch >> 8 ) & 0xf ];
1331+ * p ++ = hexdigit [(ch >> 4 ) & 0xf ];
1332+ * p ++ = hexdigit [ch & 15 ];
1333+ }
1334+ /* Copy everything else as-is */
1335+ else
1336+ * p ++ = (char ) ch ;
1337+ }
1338+ * p = '\0' ;
1339+ _PyString_Resize (& repr , p - q );
1340+ return repr ;
1341+ }
12931342
12941343static int
12951344save_unicode (Picklerobject * self , PyObject * args , int doput )
0 commit comments