Skip to content

Commit 18af0a1

Browse files
phenylshimaillia-v
andauthored
Improve speed of BytesQueueBuffer.get() by using memoryview (#3711)
Co-authored-by: femshima <49227365+femshima@users.noreply.github.com> Co-authored-by: Illia Volochii <illia.volochii@gmail.com>
1 parent 1f6abac commit 18af0a1

3 files changed

Lines changed: 31 additions & 3 deletions

File tree

changelog/3710.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved the performance of content decoding by optimizing ``BytesQueueBuffer`` class.

src/urllib3/response.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ class BytesQueueBuffer:
231231
"""
232232

233233
def __init__(self) -> None:
234-
self.buffer: typing.Deque[bytes] = collections.deque()
234+
self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
235235
self._size: int = 0
236236

237237
def __len__(self) -> int:
@@ -256,6 +256,7 @@ def get(self, n: int) -> bytes:
256256
chunk = self.buffer.popleft()
257257
chunk_length = len(chunk)
258258
if remaining < chunk_length:
259+
chunk = memoryview(chunk)
259260
left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
260261
ret.write(left_chunk)
261262
self.buffer.appendleft(right_chunk)
@@ -278,6 +279,8 @@ def get_all(self) -> bytes:
278279
return b""
279280
if len(buffer) == 1:
280281
result = buffer.pop()
282+
if isinstance(result, memoryview):
283+
result = result.tobytes()
281284
else:
282285
ret = io.BytesIO()
283286
ret.writelines(buffer.popleft() for _ in range(len(buffer)))

test/test_response.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,17 @@ def test_get_all_many(self) -> None:
108108
"12.5 MB", current_thread_only=True
109109
) # assert that we're not doubling memory usagelimit_mem
110110
def test_memory_usage(
111-
self, get_func: typing.Callable[[BytesQueueBuffer], str]
111+
self, get_func: typing.Callable[[BytesQueueBuffer], bytes]
112112
) -> None:
113113
# Allocate 10 1MiB chunks
114114
buffer = BytesQueueBuffer()
115115
for i in range(10):
116116
# This allocates 2MiB, putting the max at around 12MiB. Not sure why.
117117
buffer.put(bytes(2**20))
118118

119-
assert len(get_func(buffer)) == 10 * 2**20
119+
result = get_func(buffer)
120+
assert type(result) is bytes
121+
assert len(result) == 10 * 2**20
120122

121123
@pytest.mark.limit_memory("10.01 MB", current_thread_only=True)
122124
def test_get_all_memory_usage_single_chunk(self) -> None:
@@ -125,6 +127,28 @@ def test_get_all_memory_usage_single_chunk(self) -> None:
125127
buffer.put(chunk)
126128
assert buffer.get_all() is chunk
127129

130+
@pytest.mark.parametrize(
131+
"finish_with_get_all",
132+
(True, False),
133+
ids=("finish_with_get_all", "finish_with_get"),
134+
)
135+
@pytest.mark.limit_memory("11.01 MB", current_thread_only=True)
136+
def test_memory_usage_splitting_chunk(self, finish_with_get_all: bool) -> None:
137+
# Allocate a single 10MiB chunk, then read it in two parts.
138+
# Verifies that splitting a chunk doesn't cause additional memory allocation.
139+
buffer = BytesQueueBuffer()
140+
chunk = bytes(10 * 2**20) # 10 MiB
141+
buffer.put(chunk)
142+
for i in range(10):
143+
if finish_with_get_all and i == 9:
144+
result = buffer.get_all()
145+
else:
146+
result = buffer.get(2**20)
147+
assert type(result) is bytes
148+
assert len(result) == 2**20
149+
del result
150+
assert len(buffer) == 0
151+
128152

129153
# A known random (i.e, not-too-compressible) payload generated with:
130154
# "".join(random.choice(string.printable) for i in range(512))

0 commit comments

Comments
 (0)