Skip to content

Commit 202fdca

Browse files
committed
Close #14716: str.format() now uses the new "unicode writer" API instead of the
PyAccu API. For example, it makes str.format() from 25% to 30% faster on Linux.
1 parent 9fad160 commit 202fdca

2 files changed

Lines changed: 148 additions & 170 deletions

File tree

Objects/stringlib/unicode_format.h

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
unicode_format.h -- implementation of str.format().
33
*/
44

5-
#include "accu.h"
6-
75
/* Defines for more efficiently reallocating the string buffer */
86
#define INITIAL_SIZE_INCREMENT 100
97
#define SIZE_MULTIPLIER 2
@@ -111,33 +109,6 @@ autonumber_state_error(AutoNumberState state, int field_name_is_empty)
111109
}
112110

113111

114-
/************************************************************************/
115-
/*********** Output string management functions ****************/
116-
/************************************************************************/
117-
118-
/*
119-
output_data dumps characters into our output string
120-
buffer.
121-
122-
In some cases, it has to reallocate the string.
123-
124-
It returns a status: 0 for a failed reallocation,
125-
1 for success.
126-
*/
127-
static int
128-
output_data(_PyAccu *acc, PyObject *s, Py_ssize_t start, Py_ssize_t end)
129-
{
130-
PyObject *substring;
131-
int r;
132-
133-
substring = PyUnicode_Substring(s, start, end);
134-
if (substring == NULL)
135-
return 0;
136-
r = _PyAccu_Accumulate(acc, substring);
137-
Py_DECREF(substring);
138-
return r == 0;
139-
}
140-
141112
/************************************************************************/
142113
/*********** Format string parsing -- integers and identifiers *********/
143114
/************************************************************************/
@@ -523,7 +494,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
523494
appends to the output.
524495
*/
525496
static int
526-
render_field(PyObject *fieldobj, SubString *format_spec, _PyAccu *acc)
497+
render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *writer)
527498
{
528499
int ok = 0;
529500
PyObject *result = NULL;
@@ -566,7 +537,8 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyAccu *acc)
566537
goto done;
567538

568539
assert(PyUnicode_Check(result));
569-
ok = output_data(acc, result, 0, PyUnicode_GET_LENGTH(result));
540+
541+
ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0);
570542
done:
571543
Py_XDECREF(format_spec_object);
572544
Py_XDECREF(result);
@@ -831,7 +803,7 @@ do_conversion(PyObject *obj, Py_UCS4 conversion)
831803
static int
832804
output_markup(SubString *field_name, SubString *format_spec,
833805
int format_spec_needs_expanding, Py_UCS4 conversion,
834-
_PyAccu *acc, PyObject *args, PyObject *kwargs,
806+
unicode_writer_t *writer, PyObject *args, PyObject *kwargs,
835807
int recursion_depth, AutoNumber *auto_number)
836808
{
837809
PyObject *tmp = NULL;
@@ -872,7 +844,7 @@ output_markup(SubString *field_name, SubString *format_spec,
872844
else
873845
actual_format_spec = format_spec;
874846

875-
if (render_field(fieldobj, actual_format_spec, acc) == 0)
847+
if (render_field(fieldobj, actual_format_spec, writer) == 0)
876848
goto done;
877849

878850
result = 1;
@@ -892,7 +864,7 @@ output_markup(SubString *field_name, SubString *format_spec,
892864
*/
893865
static int
894866
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
895-
_PyAccu *acc, int recursion_depth, AutoNumber *auto_number)
867+
unicode_writer_t *writer, int recursion_depth, AutoNumber *auto_number)
896868
{
897869
MarkupIterator iter;
898870
int format_spec_needs_expanding;
@@ -902,17 +874,21 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
902874
SubString field_name;
903875
SubString format_spec;
904876
Py_UCS4 conversion;
877+
int err;
905878

906879
MarkupIterator_init(&iter, input->str, input->start, input->end);
907880
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
908881
&field_name, &format_spec,
909882
&conversion,
910883
&format_spec_needs_expanding)) == 2) {
911-
if (!output_data(acc, literal.str, literal.start, literal.end))
884+
err = unicode_writer_write_str(writer,
885+
literal.str, literal.start,
886+
literal.end - literal.start);
887+
if (err == -1)
912888
return 0;
913889
if (field_present)
914890
if (!output_markup(&field_name, &format_spec,
915-
format_spec_needs_expanding, conversion, acc,
891+
format_spec_needs_expanding, conversion, writer,
916892
args, kwargs, recursion_depth, auto_number))
917893
return 0;
918894
}
@@ -928,7 +904,8 @@ static PyObject *
928904
build_string(SubString *input, PyObject *args, PyObject *kwargs,
929905
int recursion_depth, AutoNumber *auto_number)
930906
{
931-
_PyAccu acc;
907+
unicode_writer_t writer;
908+
Py_ssize_t initlen;
932909

933910
/* check the recursion level */
934911
if (recursion_depth <= 0) {
@@ -937,16 +914,17 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
937914
return NULL;
938915
}
939916

940-
if (_PyAccu_Init(&acc))
917+
initlen = PyUnicode_GET_LENGTH(input->str) + 100;
918+
if (unicode_writer_init(&writer, initlen, 127) == -1)
941919
return NULL;
942920

943-
if (!do_markup(input, args, kwargs, &acc, recursion_depth,
921+
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
944922
auto_number)) {
945-
_PyAccu_Destroy(&acc);
923+
unicode_writer_dealloc(&writer);
946924
return NULL;
947925
}
948926

949-
return _PyAccu_Finish(&acc);
927+
return unicode_writer_finish(&writer);
950928
}
951929

952930
/************************************************************************/

0 commit comments

Comments
 (0)