@@ -3096,6 +3096,29 @@ dec_repr(PyObject *dec)
30963096 return res ;
30973097}
30983098
3099+ /* Return a duplicate of src, copy embedded null characters. */
3100+ static char *
3101+ dec_strdup (const char * src , Py_ssize_t size )
3102+ {
3103+ char * dest = PyMem_Malloc (size + 1 );
3104+ if (dest == NULL ) {
3105+ return NULL ;
3106+ }
3107+
3108+ memcpy (dest , src , size );
3109+ dest [size ] = '\0' ;
3110+ return dest ;
3111+ }
3112+
3113+ static void
3114+ dec_replace_fillchar (char * dest )
3115+ {
3116+ while (* dest != '\0' ) {
3117+ if (* dest == '\xff' ) * dest = '\0' ;
3118+ dest ++ ;
3119+ }
3120+ }
3121+
30993122/* Convert decimal_point or thousands_sep, which may be multibyte or in
31003123 the range [128, 255], to a UTF8 string. */
31013124static PyObject *
@@ -3131,13 +3154,14 @@ dec_format(PyObject *dec, PyObject *args)
31313154 PyObject * dot = NULL ;
31323155 PyObject * sep = NULL ;
31333156 PyObject * grouping = NULL ;
3134- PyObject * fmt = NULL ;
31353157 PyObject * fmtarg ;
31363158 PyObject * context ;
31373159 mpd_spec_t spec ;
3138- char * decstring = NULL ;
3160+ char * fmt ;
3161+ char * decstring = NULL ;
31393162 uint32_t status = 0 ;
3140- size_t n ;
3163+ int replace_fillchar = 0 ;
3164+ Py_ssize_t size ;
31413165
31423166
31433167 CURRENT_CONTEXT (context );
@@ -3146,23 +3170,40 @@ dec_format(PyObject *dec, PyObject *args)
31463170 }
31473171
31483172 if (PyUnicode_Check (fmtarg )) {
3149- fmt = PyUnicode_AsUTF8String (fmtarg );
3173+ fmt = PyUnicode_AsUTF8AndSize (fmtarg , & size );
31503174 if (fmt == NULL ) {
31513175 return NULL ;
31523176 }
3177+ if (size > 0 && fmt [0 ] == '\0' ) {
3178+ /* NUL fill character: must be replaced with a valid UTF-8 char
3179+ before calling mpd_parse_fmt_str(). */
3180+ replace_fillchar = 1 ;
3181+ fmt = dec_strdup (fmt , size );
3182+ if (fmt == NULL ) {
3183+ return NULL ;
3184+ }
3185+ fmt [0 ] = '_' ;
3186+ }
31533187 }
31543188 else {
31553189 PyErr_SetString (PyExc_TypeError ,
31563190 "format arg must be str" );
31573191 return NULL ;
31583192 }
31593193
3160- if (!mpd_parse_fmt_str (& spec , PyBytes_AS_STRING (fmt ),
3161- CtxCaps (context ))) {
3194+ if (!mpd_parse_fmt_str (& spec , fmt , CtxCaps (context ))) {
31623195 PyErr_SetString (PyExc_ValueError ,
31633196 "invalid format string" );
31643197 goto finish ;
31653198 }
3199+ if (replace_fillchar ) {
3200+ /* In order to avoid clobbering parts of UTF-8 thousands separators or
3201+ decimal points when the substitution is reversed later, the actual
3202+ placeholder must be an invalid UTF-8 byte. */
3203+ spec .fill [0 ] = '\xff' ;
3204+ spec .fill [1 ] = '\0' ;
3205+ }
3206+
31663207 if (override ) {
31673208 /* Values for decimal_point, thousands_sep and grouping can
31683209 be explicitly specified in the override dict. These values
@@ -3199,7 +3240,7 @@ dec_format(PyObject *dec, PyObject *args)
31993240 }
32003241 }
32013242 else {
3202- n = strlen (spec .dot );
3243+ size_t n = strlen (spec .dot );
32033244 if (n > 1 || (n == 1 && !isascii ((uchar )spec .dot [0 ]))) {
32043245 /* fix locale dependent non-ascii characters */
32053246 dot = dotsep_as_utf8 (spec .dot );
@@ -3231,14 +3272,19 @@ dec_format(PyObject *dec, PyObject *args)
32313272 }
32323273 goto finish ;
32333274 }
3234- result = PyUnicode_DecodeUTF8 (decstring , strlen (decstring ), NULL );
3275+ size = strlen (decstring );
3276+ if (replace_fillchar ) {
3277+ dec_replace_fillchar (decstring );
3278+ }
3279+
3280+ result = PyUnicode_DecodeUTF8 (decstring , size , NULL );
32353281
32363282
32373283finish :
32383284 Py_XDECREF (grouping );
32393285 Py_XDECREF (sep );
32403286 Py_XDECREF (dot );
3241- Py_XDECREF (fmt );
3287+ if ( replace_fillchar ) PyMem_Free (fmt );
32423288 if (decstring ) mpd_free (decstring );
32433289 return result ;
32443290}
0 commit comments