Skip to content

Commit 772b2b0

Browse files
committed
Optimize bytearray % args
Issue python#25399: Don't create temporary bytes objects: modify _PyBytes_Format() to create work directly on bytearray objects. * Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something outside CPython uses it * _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so bytearray_format() doesn't need tot create a temporary input bytes object * Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer Most formatting operations are now between 2.5 and 5 times faster.
1 parent 661aacc commit 772b2b0

3 files changed

Lines changed: 33 additions & 36 deletions

File tree

Include/bytesobject.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
6262
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
6363
#ifndef Py_LIMITED_API
6464
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
65-
PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *);
65+
PyAPI_FUNC(PyObject*) _PyBytes_FormatEx(
66+
const char *format,
67+
Py_ssize_t format_len,
68+
PyObject *args,
69+
int use_bytearray);
6670
#endif
6771
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
6872
const char *, Py_ssize_t,

Objects/bytearrayobject.c

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -282,26 +282,14 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
282282
static PyObject *
283283
bytearray_format(PyByteArrayObject *self, PyObject *args)
284284
{
285-
PyObject *bytes_in, *bytes_out, *res;
286-
char *bytestring;
287-
288-
if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
285+
if (self == NULL || !PyByteArray_Check(self)) {
289286
PyErr_BadInternalCall();
290287
return NULL;
291288
}
292-
bytestring = PyByteArray_AS_STRING(self);
293-
bytes_in = PyBytes_FromString(bytestring);
294-
if (bytes_in == NULL)
295-
return NULL;
296-
bytes_out = _PyBytes_Format(bytes_in, args);
297-
Py_DECREF(bytes_in);
298-
if (bytes_out == NULL)
299-
return NULL;
300-
res = PyByteArray_FromObject(bytes_out);
301-
Py_DECREF(bytes_out);
302-
if (res == NULL)
303-
return NULL;
304-
return res;
289+
290+
return _PyBytes_FormatEx(PyByteArray_AS_STRING(self),
291+
PyByteArray_GET_SIZE(self),
292+
args, 1);
305293
}
306294

307295
/* Functions stuffed into the type object */

Objects/bytesobject.c

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -568,28 +568,32 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
568568
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
569569

570570
PyObject *
571-
_PyBytes_Format(PyObject *format, PyObject *args)
571+
_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
572+
PyObject *args, int use_bytearray)
572573
{
573-
char *fmt, *res;
574+
const char *fmt;
575+
char *res;
574576
Py_ssize_t arglen, argidx;
575577
Py_ssize_t fmtcnt;
576578
int args_owned = 0;
577579
PyObject *dict = NULL;
578580
_PyBytesWriter writer;
579581

580-
if (format == NULL || !PyBytes_Check(format) || args == NULL) {
582+
if (args == NULL) {
581583
PyErr_BadInternalCall();
582584
return NULL;
583585
}
584-
fmt = PyBytes_AS_STRING(format);
585-
fmtcnt = PyBytes_GET_SIZE(format);
586+
fmt = format;
587+
fmtcnt = format_len;
586588

587589
_PyBytesWriter_Init(&writer);
590+
writer.use_bytearray = use_bytearray;
588591

589592
res = _PyBytesWriter_Alloc(&writer, fmtcnt);
590593
if (res == NULL)
591594
return NULL;
592-
writer.overallocate = 1;
595+
if (!use_bytearray)
596+
writer.overallocate = 1;
593597

594598
if (PyTuple_Check(args)) {
595599
arglen = PyTuple_GET_SIZE(args);
@@ -613,10 +617,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
613617
pos = strchr(fmt + 1, '%');
614618
if (pos != NULL)
615619
len = pos - fmt;
616-
else {
617-
len = PyBytes_GET_SIZE(format);
618-
len -= (fmt - PyBytes_AS_STRING(format));
619-
}
620+
else
621+
len = format_len - (fmt - format);
620622
assert(len != 0);
621623

622624
Py_MEMCPY(res, fmt, len);
@@ -644,7 +646,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
644646

645647
fmt++;
646648
if (*fmt == '(') {
647-
char *keystart;
649+
const char *keystart;
648650
Py_ssize_t keylen;
649651
PyObject *key;
650652
int pcount = 1;
@@ -924,8 +926,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
924926
"unsupported format character '%c' (0x%x) "
925927
"at index %zd",
926928
c, c,
927-
(Py_ssize_t)(fmt - 1 -
928-
PyBytes_AsString(format)));
929+
(Py_ssize_t)(fmt - 1 - format));
929930
goto error;
930931
}
931932

@@ -1028,7 +1029,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
10281029

10291030
/* If overallocation was disabled, ensure that it was the last
10301031
write. Otherwise, we missed an optimization */
1031-
assert(writer.overallocate || fmtcnt < 0);
1032+
assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
10321033
} /* until end */
10331034

10341035
if (argidx < arglen && !dict) {
@@ -3233,11 +3234,15 @@ bytes_methods[] = {
32333234
};
32343235

32353236
static PyObject *
3236-
bytes_mod(PyObject *v, PyObject *w)
3237+
bytes_mod(PyObject *self, PyObject *args)
32373238
{
3238-
if (!PyBytes_Check(v))
3239-
Py_RETURN_NOTIMPLEMENTED;
3240-
return _PyBytes_Format(v, w);
3239+
if (self == NULL || !PyBytes_Check(self)) {
3240+
PyErr_BadInternalCall();
3241+
return NULL;
3242+
}
3243+
3244+
return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
3245+
args, 0);
32413246
}
32423247

32433248
static PyNumberMethods bytes_as_number = {

0 commit comments

Comments
 (0)