@@ -12693,17 +12693,13 @@ formatlong(PyObject *val, int flags, int prec, int type)
1269312693 return result ;
1269412694}
1269512695
12696- static int
12697- formatchar (Py_UCS4 * buf ,
12698- size_t buflen ,
12699- PyObject * v )
12696+ static Py_UCS4
12697+ formatchar (PyObject * v )
1270012698{
1270112699 /* presume that the buffer is at least 3 characters long */
1270212700 if (PyUnicode_Check (v )) {
1270312701 if (PyUnicode_GET_LENGTH (v ) == 1 ) {
12704- buf [0 ] = PyUnicode_READ_CHAR (v , 0 );
12705- buf [1 ] = '\0' ;
12706- return 1 ;
12702+ return PyUnicode_READ_CHAR (v , 0 );
1270712703 }
1270812704 goto onError ;
1270912705 }
@@ -12717,38 +12713,45 @@ formatchar(Py_UCS4 *buf,
1271712713 if (x < 0 || x > 0x10ffff ) {
1271812714 PyErr_SetString (PyExc_OverflowError ,
1271912715 "%c arg not in range(0x110000)" );
12720- return -1 ;
12716+ return ( Py_UCS4 ) - 1 ;
1272112717 }
1272212718
12723- buf [0 ] = (Py_UCS4 ) x ;
12724- buf [1 ] = '\0' ;
12725- return 1 ;
12719+ return (Py_UCS4 ) x ;
1272612720 }
1272712721
1272812722 onError :
1272912723 PyErr_SetString (PyExc_TypeError ,
1273012724 "%c requires int or char" );
12731- return -1 ;
12725+ return ( Py_UCS4 ) - 1 ;
1273212726}
1273312727
12734- /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
12735- FORMATBUFLEN is the length of the buffer in which chars are formatted.
12736- */
12737- #define FORMATBUFLEN (size_t)10
12738-
1273912728PyObject *
1274012729PyUnicode_Format (PyObject * format , PyObject * args )
1274112730{
1274212731 void * fmt ;
1274312732 int fmtkind ;
1274412733 PyObject * result ;
12745- Py_UCS4 * res , * res0 ;
12746- Py_UCS4 max ;
1274712734 int kind ;
12748- Py_ssize_t fmtcnt , fmtpos , rescnt , reslen , arglen , argidx ;
12735+ int r ;
12736+ Py_ssize_t fmtcnt , fmtpos , arglen , argidx ;
1274912737 int args_owned = 0 ;
1275012738 PyObject * dict = NULL ;
12739+ PyObject * temp = NULL ;
12740+ PyObject * second = NULL ;
1275112741 PyUnicodeObject * uformat ;
12742+ _PyAccu acc ;
12743+ static PyObject * plus , * minus , * blank , * zero , * percent ;
12744+
12745+ if (!plus && !(plus = get_latin1_char ('+' )))
12746+ return NULL ;
12747+ if (!minus && !(minus = get_latin1_char ('-' )))
12748+ return NULL ;
12749+ if (!blank && !(blank = get_latin1_char (' ' )))
12750+ return NULL ;
12751+ if (!zero && !(zero = get_latin1_char ('0' )))
12752+ return NULL ;
12753+ if (!percent && !(percent = get_latin1_char ('%' )))
12754+ return NULL ;
1275212755
1275312756 if (format == NULL || args == NULL ) {
1275412757 PyErr_BadInternalCall ();
@@ -12757,18 +12760,13 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1275712760 uformat = (PyUnicodeObject * )PyUnicode_FromObject (format );
1275812761 if (uformat == NULL || PyUnicode_READY (uformat ) == -1 )
1275912762 return NULL ;
12763+ if (_PyAccu_Init (& acc ))
12764+ goto onError ;
1276012765 fmt = PyUnicode_DATA (uformat );
1276112766 fmtkind = PyUnicode_KIND (uformat );
1276212767 fmtcnt = PyUnicode_GET_LENGTH (uformat );
1276312768 fmtpos = 0 ;
1276412769
12765- reslen = rescnt = fmtcnt + 100 ;
12766- res = res0 = PyMem_Malloc (reslen * sizeof (Py_UCS4 ));
12767- if (res0 == NULL ) {
12768- PyErr_NoMemory ();
12769- goto onError ;
12770- }
12771-
1277212770 if (PyTuple_Check (args )) {
1277312771 arglen = PyTuple_Size (args );
1277412772 argidx = 0 ;
@@ -12783,34 +12781,34 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1278312781
1278412782 while (-- fmtcnt >= 0 ) {
1278512783 if (PyUnicode_READ (fmtkind , fmt , fmtpos ) != '%' ) {
12786- if (-- rescnt < 0 ) {
12787- rescnt = fmtcnt + 100 ;
12788- reslen += rescnt ;
12789- res0 = PyMem_Realloc (res0 , reslen * sizeof (Py_UCS4 ));
12790- if (res0 == NULL ){
12791- PyErr_NoMemory ();
12792- goto onError ;
12793- }
12794- res = res0 + reslen - rescnt ;
12795- -- rescnt ;
12796- }
12797- * res ++ = PyUnicode_READ (fmtkind , fmt , fmtpos ++ );
12784+ PyObject * nonfmt ;
12785+ Py_ssize_t nonfmtpos ;
12786+ nonfmtpos = fmtpos ++ ;
12787+ while (fmtcnt >= 0 &&
12788+ PyUnicode_READ (fmtkind , fmt , fmtpos ) != '%' ) {
12789+ fmtpos ++ ;
12790+ fmtcnt -- ;
12791+ }
12792+ nonfmt = PyUnicode_Substring ((PyObject * ) uformat , nonfmtpos , fmtpos );
12793+ if (nonfmt == NULL )
12794+ goto onError ;
12795+ r = _PyAccu_Accumulate (& acc , nonfmt );
12796+ Py_DECREF (nonfmt );
12797+ if (r )
12798+ goto onError ;
1279812799 }
1279912800 else {
1280012801 /* Got a format specifier */
1280112802 int flags = 0 ;
1280212803 Py_ssize_t width = -1 ;
1280312804 int prec = -1 ;
1280412805 Py_UCS4 c = '\0' ;
12805- Py_UCS4 fill ;
12806+ Py_UCS4 fill , sign ;
1280612807 int isnumok ;
1280712808 PyObject * v = NULL ;
12808- PyObject * temp = NULL ;
12809- void * pbuf ;
12810- Py_ssize_t pindex ;
12811- Py_UNICODE sign ;
12812- Py_ssize_t len , len1 ;
12813- Py_UCS4 formatbuf [FORMATBUFLEN ]; /* For formatchar() */
12809+ void * pbuf = NULL ;
12810+ Py_ssize_t pindex , len ;
12811+ PyObject * signobj = NULL , * fillobj = NULL ;
1281412812
1281512813 fmtpos ++ ;
1281612814 if (PyUnicode_READ (fmtkind , fmt , fmtpos ) == '(' ) {
@@ -12955,15 +12953,12 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1295512953 }
1295612954 sign = 0 ;
1295712955 fill = ' ' ;
12956+ fillobj = blank ;
1295812957 switch (c ) {
1295912958
1296012959 case '%' :
12961- pbuf = formatbuf ;
12962- kind = PyUnicode_4BYTE_KIND ;
12963- /* presume that buffer length is at least 1 */
12964- PyUnicode_WRITE (kind , pbuf , 0 , '%' );
12965- len = 1 ;
12966- break ;
12960+ _PyAccu_Accumulate (& acc , percent );
12961+ continue ;
1296712962
1296812963 case 's' :
1296912964 case 'r' :
@@ -13045,8 +13040,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1304513040 "not %.200s" , (char )c , Py_TYPE (v )-> tp_name );
1304613041 goto onError ;
1304713042 }
13048- if (flags & F_ZERO )
13043+ if (flags & F_ZERO ) {
1304913044 fill = '0' ;
13045+ fillobj = zero ;
13046+ }
1305013047 break ;
1305113048
1305213049 case 'e' :
@@ -13066,17 +13063,25 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1306613063 kind = PyUnicode_KIND (temp );
1306713064 len = PyUnicode_GET_LENGTH (temp );
1306813065 sign = 1 ;
13069- if (flags & F_ZERO )
13066+ if (flags & F_ZERO ) {
1307013067 fill = '0' ;
13068+ fillobj = zero ;
13069+ }
1307113070 break ;
1307213071
1307313072 case 'c' :
13074- pbuf = formatbuf ;
13075- kind = PyUnicode_4BYTE_KIND ;
13076- len = formatchar (pbuf , Py_ARRAY_LENGTH (formatbuf ), v );
13077- if (len < 0 )
13073+ {
13074+ Py_UCS4 ch = formatchar (v );
13075+ if (ch == (Py_UCS4 ) - 1 )
13076+ goto onError ;
13077+ temp = _PyUnicode_FromUCS4 (& ch , 1 );
13078+ if (temp == NULL )
1307813079 goto onError ;
13080+ pbuf = PyUnicode_DATA (temp );
13081+ kind = PyUnicode_KIND (temp );
13082+ len = PyUnicode_GET_LENGTH (temp );
1307913083 break ;
13084+ }
1308013085
1308113086 default :
1308213087 PyErr_Format (PyExc_ValueError ,
@@ -13090,90 +13095,105 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1309013095 /* pbuf is initialized here. */
1309113096 pindex = 0 ;
1309213097 if (sign ) {
13093- if (PyUnicode_READ (kind , pbuf , pindex ) == '-' ||
13094- PyUnicode_READ (kind , pbuf , pindex ) == '+' ) {
13095- sign = PyUnicode_READ (kind , pbuf , pindex ++ );
13098+ if (PyUnicode_READ (kind , pbuf , pindex ) == '-' ) {
13099+ signobj = minus ;
1309613100 len -- ;
13101+ pindex ++ ;
13102+ }
13103+ else if (PyUnicode_READ (kind , pbuf , pindex ) == '+' ) {
13104+ signobj = plus ;
13105+ len -- ;
13106+ pindex ++ ;
1309713107 }
1309813108 else if (flags & F_SIGN )
13099- sign = '+' ;
13109+ signobj = plus ;
1310013110 else if (flags & F_BLANK )
13101- sign = ' ' ;
13111+ signobj = blank ;
1310213112 else
1310313113 sign = 0 ;
1310413114 }
1310513115 if (width < len )
1310613116 width = len ;
13107- if (rescnt - (sign != 0 ) < width ) {
13108- reslen -= rescnt ;
13109- rescnt = width + fmtcnt + 100 ;
13110- reslen += rescnt ;
13111- if (reslen < 0 ) {
13112- Py_XDECREF (temp );
13113- PyErr_NoMemory ();
13114- goto onError ;
13115- }
13116- res0 = PyMem_Realloc (res0 , reslen * sizeof (Py_UCS4 ));
13117- if (res0 == 0 ) {
13118- PyErr_NoMemory ();
13119- Py_XDECREF (temp );
13120- goto onError ;
13121- }
13122- res = res0 + reslen - rescnt ;
13123- }
1312413117 if (sign ) {
13125- if (fill != ' ' )
13126- * res ++ = sign ;
13127- rescnt -- ;
13118+ if (fill != ' ' ) {
13119+ assert (signobj != NULL );
13120+ if (_PyAccu_Accumulate (& acc , signobj ))
13121+ goto onError ;
13122+ }
1312813123 if (width > len )
1312913124 width -- ;
1313013125 }
1313113126 if ((flags & F_ALT ) && (c == 'x' || c == 'X' || c == 'o' )) {
1313213127 assert (PyUnicode_READ (kind , pbuf , pindex ) == '0' );
13133- assert (PyUnicode_READ (kind , pbuf , pindex + 1 ) == c );
13128+ assert (PyUnicode_READ (kind , pbuf , pindex + 1 ) == c );
1313413129 if (fill != ' ' ) {
13135- * res ++ = PyUnicode_READ (kind , pbuf , pindex ++ );
13136- * res ++ = PyUnicode_READ (kind , pbuf , pindex ++ );
13130+ second = get_latin1_char (
13131+ PyUnicode_READ (kind , pbuf , pindex + 1 ));
13132+ pindex += 2 ;
13133+ if (second == NULL ||
13134+ _PyAccu_Accumulate (& acc , zero ) ||
13135+ _PyAccu_Accumulate (& acc , second ))
13136+ goto onError ;
13137+ Py_CLEAR (second );
1313713138 }
13138- rescnt -= 2 ;
1313913139 width -= 2 ;
1314013140 if (width < 0 )
1314113141 width = 0 ;
1314213142 len -= 2 ;
1314313143 }
1314413144 if (width > len && !(flags & F_LJUST )) {
13145+ assert (fillobj != NULL );
1314513146 do {
13146- -- rescnt ;
13147- * res ++ = fill ;
13147+ if ( _PyAccu_Accumulate ( & acc , fillobj ))
13148+ goto onError ;
1314813149 } while (-- width > len );
1314913150 }
1315013151 if (fill == ' ' ) {
13151- if (sign )
13152- * res ++ = sign ;
13152+ if (sign ) {
13153+ assert (signobj != NULL );
13154+ if (_PyAccu_Accumulate (& acc , signobj ))
13155+ goto onError ;
13156+ }
1315313157 if ((flags & F_ALT ) && (c == 'x' || c == 'X' || c == 'o' )) {
1315413158 assert (PyUnicode_READ (kind , pbuf , pindex ) == '0' );
1315513159 assert (PyUnicode_READ (kind , pbuf , pindex + 1 ) == c );
13156- * res ++ = PyUnicode_READ (kind , pbuf , pindex ++ );
13157- * res ++ = PyUnicode_READ (kind , pbuf , pindex ++ );
13160+ second = get_latin1_char (
13161+ PyUnicode_READ (kind , pbuf , pindex + 1 ));
13162+ pindex += 2 ;
13163+ if (second == NULL ||
13164+ _PyAccu_Accumulate (& acc , zero ) ||
13165+ _PyAccu_Accumulate (& acc , second ))
13166+ goto onError ;
13167+ Py_CLEAR (second );
1315813168 }
1315913169 }
1316013170 /* Copy all characters, preserving len */
13161- len1 = len ;
13162- while (len1 -- ) {
13163- * res ++ = PyUnicode_READ (kind , pbuf , pindex ++ );
13164- rescnt -- ;
13171+ if (temp != NULL ) {
13172+ assert (pbuf == PyUnicode_DATA (temp ));
13173+ v = PyUnicode_Substring (temp , pindex , pindex + len );
13174+ }
13175+ else {
13176+ const char * p = (const char * ) pbuf ;
13177+ assert (pbuf != NULL );
13178+ p = p + PyUnicode_KIND_SIZE (kind , pindex );
13179+ v = PyUnicode_FromKindAndData (kind , p , len );
1316513180 }
13181+ if (v == NULL )
13182+ goto onError ;
13183+ r = _PyAccu_Accumulate (& acc , v );
13184+ Py_DECREF (v );
13185+ if (r )
13186+ goto onError ;
1316613187 while (-- width >= len ) {
13167- -- rescnt ;
13168- * res ++ = ' ' ;
13188+ if ( _PyAccu_Accumulate ( & acc , blank ))
13189+ goto onError ;
1316913190 }
1317013191 if (dict && (argidx < arglen ) && c != '%' ) {
1317113192 PyErr_SetString (PyExc_TypeError ,
1317213193 "not all arguments converted during string formatting" );
13173- Py_XDECREF (temp );
1317413194 goto onError ;
1317513195 }
13176- Py_XDECREF (temp );
13196+ Py_CLEAR (temp );
1317713197 } /* '%' */
1317813198 } /* until end */
1317913199 if (argidx < arglen && !dict ) {
@@ -13182,27 +13202,20 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1318213202 goto onError ;
1318313203 }
1318413204
13185-
13186- for (max = 0 , res = res0 ; res < res0 + reslen - rescnt ; res ++ )
13187- if (* res > max )
13188- max = * res ;
13189- result = PyUnicode_New (reslen - rescnt , max );
13190- if (!result )
13191- goto onError ;
13192- kind = PyUnicode_KIND (result );
13193- for (res = res0 ; res < res0 + reslen - rescnt ; res ++ )
13194- PyUnicode_WRITE (kind , PyUnicode_DATA (result ), res - res0 , * res );
13195- PyMem_Free (res0 );
13205+ result = _PyAccu_Finish (& acc );
1319613206 if (args_owned ) {
1319713207 Py_DECREF (args );
1319813208 }
1319913209 Py_DECREF (uformat );
13200- assert (_PyUnicode_CheckConsistency (result , 1 ));
13210+ Py_XDECREF (temp );
13211+ Py_XDECREF (second );
1320113212 return (PyObject * )result ;
1320213213
1320313214 onError :
13204- PyMem_Free (res0 );
1320513215 Py_DECREF (uformat );
13216+ Py_XDECREF (temp );
13217+ Py_XDECREF (second );
13218+ _PyAccu_Destroy (& acc );
1320613219 if (args_owned ) {
1320713220 Py_DECREF (args );
1320813221 }
0 commit comments