Skip to content

Commit 661aacc

Browse files
committed
Add use_bytearray attribute to _PyBytesWriter
Issue python#25399: Add a new use_bytearray attribute to _PyBytesWriter to use a bytearray buffer, instead of using a bytes object.
1 parent 199c9a6 commit 661aacc

2 files changed

Lines changed: 73 additions & 32 deletions

File tree

Include/bytesobject.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,21 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer,
128128
A _PyBytesWriter variable must be declared at the end of variables in a
129129
function to optimize the memory allocation on the stack. */
130130
typedef struct {
131-
/* bytes object */
131+
/* bytes, bytearray or NULL (when the small buffer is used) */
132132
PyObject *buffer;
133133

134-
/* Number of allocated size */
134+
/* Number of allocated size. */
135135
Py_ssize_t allocated;
136136

137137
/* Minimum number of allocated bytes,
138138
incremented by _PyBytesWriter_Prepare() */
139139
Py_ssize_t min_size;
140140

141-
/* If non-zero, overallocate the buffer (default: 0). */
141+
/* If non-zero, use a bytearray instead of a bytes object for buffer. */
142+
int use_bytearray;
143+
144+
/* If non-zero, overallocate the buffer (default: 0).
145+
This flag must be zero if use_bytearray is non-zero. */
142146
int overallocate;
143147

144148
/* Stack buffer */
@@ -153,7 +157,7 @@ typedef struct {
153157
PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer);
154158

155159
/* Get the buffer content and reset the writer.
156-
Return a bytes object.
160+
Return a bytes object, or a bytearray object if use_bytearray is non-zero.
157161
Raise an exception and return NULL on error. */
158162
PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer,
159163
void *str);

Objects/bytesobject.c

Lines changed: 65 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3852,11 +3852,8 @@ bytes_iter(PyObject *seq)
38523852
void
38533853
_PyBytesWriter_Init(_PyBytesWriter *writer)
38543854
{
3855-
writer->buffer = NULL;
3856-
writer->allocated = 0;
3857-
writer->min_size = 0;
3858-
writer->overallocate = 0;
3859-
writer->use_small_buffer = 0;
3855+
/* Set all attributes before small_buffer to 0 */
3856+
memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
38603857
#ifdef Py_DEBUG
38613858
memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
38623859
#endif
@@ -3871,13 +3868,17 @@ _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
38713868
Py_LOCAL_INLINE(char*)
38723869
_PyBytesWriter_AsString(_PyBytesWriter *writer)
38733870
{
3874-
if (!writer->use_small_buffer) {
3871+
if (writer->use_small_buffer) {
3872+
assert(writer->buffer == NULL);
3873+
return writer->small_buffer;
3874+
}
3875+
else if (writer->use_bytearray) {
38753876
assert(writer->buffer != NULL);
3876-
return PyBytes_AS_STRING(writer->buffer);
3877+
return PyByteArray_AS_STRING(writer->buffer);
38773878
}
38783879
else {
3879-
assert(writer->buffer == NULL);
3880-
return writer->small_buffer;
3880+
assert(writer->buffer != NULL);
3881+
return PyBytes_AS_STRING(writer->buffer);
38813882
}
38823883
}
38833884

@@ -3897,18 +3898,28 @@ _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
38973898
#ifdef Py_DEBUG
38983899
char *start, *end;
38993900

3900-
if (!writer->use_small_buffer) {
3901+
if (writer->use_small_buffer) {
3902+
assert(writer->buffer == NULL);
3903+
}
3904+
else {
39013905
assert(writer->buffer != NULL);
3902-
assert(PyBytes_CheckExact(writer->buffer));
3906+
if (writer->use_bytearray)
3907+
assert(PyByteArray_CheckExact(writer->buffer));
3908+
else
3909+
assert(PyBytes_CheckExact(writer->buffer));
39033910
assert(Py_REFCNT(writer->buffer) == 1);
39043911
}
3905-
else {
3906-
assert(writer->buffer == NULL);
3912+
3913+
if (writer->use_bytearray) {
3914+
/* bytearray has its own overallocation algorithm,
3915+
writer overallocation must be disabled */
3916+
assert(!writer->overallocate);
39073917
}
39083918

3909-
start = _PyBytesWriter_AsString(writer);
3919+
assert(0 <= writer->allocated);
39103920
assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
39113921
/* the last byte must always be null */
3922+
start = _PyBytesWriter_AsString(writer);
39123923
assert(start[writer->allocated] == 0);
39133924

39143925
end = start + writer->allocated;
@@ -3932,8 +3943,7 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
39323943

39333944
if (writer->min_size > PY_SSIZE_T_MAX - size) {
39343945
PyErr_NoMemory();
3935-
_PyBytesWriter_Dealloc(writer);
3936-
return NULL;
3946+
goto error;
39373947
}
39383948
writer->min_size += size;
39393949

@@ -3950,23 +3960,38 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
39503960

39513961
pos = _PyBytesWriter_GetPos(writer, str);
39523962
if (!writer->use_small_buffer) {
3953-
/* Note: Don't use a bytearray object because the conversion from
3954-
byterray to bytes requires to copy all bytes. */
3955-
if (_PyBytes_Resize(&writer->buffer, allocated)) {
3956-
assert(writer->buffer == NULL);
3957-
return NULL;
3963+
if (writer->use_bytearray) {
3964+
if (PyByteArray_Resize(writer->buffer, allocated))
3965+
goto error;
3966+
/* writer->allocated can be smaller than writer->buffer->ob_alloc,
3967+
but we cannot use ob_alloc because bytes may need to be moved
3968+
to use the whole buffer. bytearray uses an internal optimization
3969+
to avoid moving or copying bytes when bytes are removed at the
3970+
beginning (ex: del bytearray[:1]). */
3971+
}
3972+
else {
3973+
if (_PyBytes_Resize(&writer->buffer, allocated))
3974+
goto error;
39583975
}
39593976
}
39603977
else {
39613978
/* convert from stack buffer to bytes object buffer */
39623979
assert(writer->buffer == NULL);
39633980

3964-
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3981+
if (writer->use_bytearray)
3982+
writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3983+
else
3984+
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
39653985
if (writer->buffer == NULL)
3966-
return NULL;
3986+
goto error;
39673987

39683988
if (pos != 0) {
3969-
Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
3989+
char *dest;
3990+
if (writer->use_bytearray)
3991+
dest = PyByteArray_AS_STRING(writer->buffer);
3992+
else
3993+
dest = PyBytes_AS_STRING(writer->buffer);
3994+
Py_MEMCPY(dest,
39703995
writer->small_buffer,
39713996
pos);
39723997
}
@@ -3981,6 +4006,10 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
39814006
str = _PyBytesWriter_AsString(writer) + pos;
39824007
_PyBytesWriter_CheckConsistency(writer, str);
39834008
return str;
4009+
4010+
error:
4011+
_PyBytesWriter_Dealloc(writer);
4012+
return NULL;
39844013
}
39854014

39864015
/* Allocate the buffer to write size bytes.
@@ -4013,7 +4042,7 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
40134042
_PyBytesWriter_CheckConsistency(writer, str);
40144043

40154044
pos = _PyBytesWriter_GetPos(writer, str);
4016-
if (pos == 0) {
4045+
if (pos == 0 && !writer->use_bytearray) {
40174046
Py_CLEAR(writer->buffer);
40184047
/* Get the empty byte string singleton */
40194048
result = PyBytes_FromStringAndSize(NULL, 0);
@@ -4026,9 +4055,17 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
40264055
writer->buffer = NULL;
40274056

40284057
if (pos != writer->allocated) {
4029-
if (_PyBytes_Resize(&result, pos)) {
4030-
assert(result == NULL);
4031-
return NULL;
4058+
if (writer->use_bytearray) {
4059+
if (PyByteArray_Resize(result, pos)) {
4060+
Py_DECREF(result);
4061+
return NULL;
4062+
}
4063+
}
4064+
else {
4065+
if (_PyBytes_Resize(&result, pos)) {
4066+
assert(result == NULL);
4067+
return NULL;
4068+
}
40324069
}
40334070
}
40344071
}

0 commit comments

Comments
 (0)