@@ -263,10 +263,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
263263#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
264264
265265 Py_ssize_t i ; /* index into s of next input byte */
266- PyObject * result ; /* result string object */
267266 char * p ; /* next free byte in output buffer */
268- Py_ssize_t nallocated ; /* number of result bytes allocated */
269- Py_ssize_t nneeded ; /* number of result bytes needed */
270267#if STRINGLIB_SIZEOF_CHAR > 1
271268 PyObject * error_handler_obj = NULL ;
272269 PyObject * exc = NULL ;
@@ -275,39 +272,25 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
275272#endif
276273#if STRINGLIB_SIZEOF_CHAR == 1
277274 const Py_ssize_t max_char_size = 2 ;
278- char stackbuf [MAX_SHORT_UNICHARS * 2 ];
279275#elif STRINGLIB_SIZEOF_CHAR == 2
280276 const Py_ssize_t max_char_size = 3 ;
281- char stackbuf [MAX_SHORT_UNICHARS * 3 ];
282277#else /* STRINGLIB_SIZEOF_CHAR == 4 */
283278 const Py_ssize_t max_char_size = 4 ;
284- char stackbuf [MAX_SHORT_UNICHARS * 4 ];
285279#endif
280+ _PyBytesWriter writer ;
286281
287282 assert (size >= 0 );
283+ _PyBytesWriter_Init (& writer );
288284
289- if (size <= MAX_SHORT_UNICHARS ) {
290- /* Write into the stack buffer; nallocated can't overflow.
291- * At the end, we'll allocate exactly as much heap space as it
292- * turns out we need.
293- */
294- nallocated = Py_SAFE_DOWNCAST (sizeof (stackbuf ), size_t , int );
295- result = NULL ; /* will allocate after we're done */
296- p = stackbuf ;
297- }
298- else {
299- if (size > PY_SSIZE_T_MAX / max_char_size ) {
300- /* integer overflow */
301- return PyErr_NoMemory ();
302- }
303- /* Overallocate on the heap, and give the excess back at the end. */
304- nallocated = size * max_char_size ;
305- result = PyBytes_FromStringAndSize (NULL , nallocated );
306- if (result == NULL )
307- return NULL ;
308- p = PyBytes_AS_STRING (result );
285+ if (size > PY_SSIZE_T_MAX / max_char_size ) {
286+ /* integer overflow */
287+ return PyErr_NoMemory ();
309288 }
310289
290+ p = _PyBytesWriter_Alloc (& writer , size * max_char_size );
291+ if (p == NULL )
292+ return NULL ;
293+
311294 for (i = 0 ; i < size ;) {
312295 Py_UCS4 ch = data [i ++ ];
313296
@@ -338,6 +321,9 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
338321 while ((endpos < size ) && Py_UNICODE_IS_SURROGATE (data [endpos ]))
339322 endpos ++ ;
340323
324+ /* Only overallocate the buffer if it's not the last write */
325+ writer .overallocate = (endpos < size );
326+
341327 switch (error_handler )
342328 {
343329 case _Py_ERROR_REPLACE :
@@ -387,29 +373,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
387373 repsize = PyUnicode_GET_LENGTH (rep );
388374
389375 if (repsize > max_char_size ) {
390- Py_ssize_t offset ;
391-
392- if (result == NULL )
393- offset = p - stackbuf ;
394- else
395- offset = p - PyBytes_AS_STRING (result );
396-
397- if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size ) {
398- /* integer overflow */
399- PyErr_NoMemory ();
376+ p = _PyBytesWriter_Prepare (& writer , p ,
377+ repsize - max_char_size );
378+ if (p == NULL )
400379 goto error ;
401- }
402- nallocated += repsize - max_char_size ;
403- if (result != NULL ) {
404- if (_PyBytes_Resize (& result , nallocated ) < 0 )
405- goto error ;
406- } else {
407- result = PyBytes_FromStringAndSize (NULL , nallocated );
408- if (result == NULL )
409- goto error ;
410- Py_MEMCPY (PyBytes_AS_STRING (result ), stackbuf , offset );
411- }
412- p = PyBytes_AS_STRING (result ) + offset ;
413380 }
414381
415382 if (PyBytes_Check (rep )) {
@@ -437,6 +404,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
437404
438405 i = newpos ;
439406 }
407+
408+ /* If overallocation was disabled, ensure that it was the last
409+ write. Otherwise, we missed an optimization */
410+ assert (writer .overallocate || i == size );
440411 }
441412 else
442413#if STRINGLIB_SIZEOF_CHAR > 2
@@ -461,31 +432,18 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
461432#endif /* STRINGLIB_SIZEOF_CHAR > 1 */
462433 }
463434
464- if (result == NULL ) {
465- /* This was stack allocated. */
466- nneeded = p - stackbuf ;
467- assert (nneeded <= nallocated );
468- result = PyBytes_FromStringAndSize (stackbuf , nneeded );
469- }
470- else {
471- /* Cut back to size actually needed. */
472- nneeded = p - PyBytes_AS_STRING (result );
473- assert (nneeded <= nallocated );
474- _PyBytes_Resize (& result , nneeded );
475- }
476-
477435#if STRINGLIB_SIZEOF_CHAR > 1
478436 Py_XDECREF (error_handler_obj );
479437 Py_XDECREF (exc );
480438#endif
481- return result ;
439+ return _PyBytesWriter_Finish ( & writer , p ) ;
482440
483441#if STRINGLIB_SIZEOF_CHAR > 1
484442 error :
485443 Py_XDECREF (rep );
486444 Py_XDECREF (error_handler_obj );
487445 Py_XDECREF (exc );
488- Py_XDECREF ( result );
446+ _PyBytesWriter_Dealloc ( & writer );
489447 return NULL ;
490448#endif
491449
0 commit comments