Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def __init__(self, value):

class _Framer:

_FRAME_SIZE_MIN = 4
_FRAME_SIZE_TARGET = 64 * 1024

def __init__(self, file_write):
Expand All @@ -203,11 +204,12 @@ def commit_frame(self, force=False):
if f.tell() >= self._FRAME_SIZE_TARGET or force:
data = f.getbuffer()
write = self.file_write
# Issue a single call to the write method of the underlying
# file object for the frame opcode with the size of the
# frame. The concatenation is expected to be less expensive
# than issuing an additional call to write.
write(FRAME + pack("<Q", len(data)))
if len(data) >= self._FRAME_SIZE_MIN:
# Issue a single call to the write method of the underlying
# file object for the frame opcode with the size of the
# frame. The concatenation is expected to be less expensive
# than issuing an additional call to write.
write(FRAME + pack("<Q", len(data)))

# Issue a separate call to write to append the frame
# contents without concatenation to the above to avoid a
Expand Down
82 changes: 45 additions & 37 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -2037,6 +2037,7 @@ def test_setitems_on_non_dicts(self):

# Exercise framing (proto >= 4) for significant workloads

FRAME_SIZE_MIN = 4
FRAME_SIZE_TARGET = 64 * 1024

def check_frame_opcodes(self, pickled):
Expand All @@ -2047,36 +2048,43 @@ def check_frame_opcodes(self, pickled):
framed by default and are therefore considered a frame by themselves in
the following consistency check.
"""
last_arg = last_pos = last_frame_opcode_size = None
frameless_opcode_sizes = {
'BINBYTES': 5,
'BINUNICODE': 5,
'BINBYTES8': 9,
'BINUNICODE8': 9,
}
frame_end = frameless_start = None
frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
for op, arg, pos in pickletools.genops(pickled):
if op.name in frameless_opcode_sizes:
if len(arg) > self.FRAME_SIZE_TARGET:
frame_opcode_size = frameless_opcode_sizes[op.name]
arg = len(arg)
else:
continue
elif op.name == 'FRAME':
frame_opcode_size = 9
else:
continue

if last_pos is not None:
# The previous frame's size should be equal to the number
# of bytes up to the current frame.
frame_size = pos - last_pos - last_frame_opcode_size
self.assertEqual(frame_size, last_arg)
last_arg, last_pos = arg, pos
last_frame_opcode_size = frame_opcode_size
# The last frame's size should be equal to the number of bytes up
# to the pickle's end.
frame_size = len(pickled) - last_pos - last_frame_opcode_size
self.assertEqual(frame_size, last_arg)
if frame_end is not None:
self.assertLessEqual(pos, frame_end)
if pos == frame_end:
frame_end = None

if frame_end is not None: # framed
self.assertNotEqual(op.name, 'FRAME')
if op.name in frameless_opcodes:
# Only short bytes and str objects should be written
# in a frame
self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)

else: # not framed
if (op.name == 'FRAME' or
(op.name in frameless_opcodes and
len(arg) > self.FRAME_SIZE_TARGET)):
# Frame or large bytes or str object
if frameless_start is not None:
# Only short data should be written outside of a frame
self.assertLess(pos - frameless_start,
self.FRAME_SIZE_MIN)
frameless_start = None
elif frameless_start is None and op.name != 'PROTO':
frameless_start = pos

if op.name == 'FRAME':
self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
frame_end = pos + 9 + arg

pos = len(pickled)
if frame_end is not None:
self.assertEqual(frame_end, pos)
elif frameless_start is not None:
self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)

def test_framing_many_objects(self):
obj = list(range(10**5))
Expand All @@ -2095,7 +2103,8 @@ def test_framing_many_objects(self):

def test_framing_large_objects(self):
N = 1024 * 1024
obj = [b'x' * N, b'y' * N, 'z' * N]
small_items = [[i] for i in range(10)]
obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
for fast in [False, True]:
with self.subTest(proto=proto, fast=fast):
Expand All @@ -2119,12 +2128,9 @@ def test_framing_large_objects(self):
# Perform full equality check if the lengths match.
self.assertEqual(obj, unpickled)
n_frames = count_opcode(pickle.FRAME, pickled)
if not fast:
# One frame per memoize for each large object.
self.assertGreaterEqual(n_frames, len(obj))
else:
# One frame at the beginning and one at the end.
self.assertGreaterEqual(n_frames, 2)
# A single frame for small objects between
# first two large objects.
self.assertEqual(n_frames, 1)
self.check_frame_opcodes(pickled)

def test_optional_frames(self):
Expand Down Expand Up @@ -2152,7 +2158,9 @@ def remove_frames(pickled, keep_frame=None):

frame_size = self.FRAME_SIZE_TARGET
num_frames = 20
obj = [bytes([i]) * frame_size for i in range(num_frames)]
# Large byte objects (dict values) intermitted with small objects
# (dict keys)
obj = {i: bytes([i]) * frame_size for i in range(num_frames)}

for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
pickled = self.dumps(obj, proto)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pickling with protocol 4 no longer creates too small frames.
18 changes: 9 additions & 9 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ enum {
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH = 8192 * 16,

FRAME_SIZE_MIN = 4,
FRAME_SIZE_TARGET = 64 * 1024,

FRAME_HEADER_SIZE = 9
};

Expand Down Expand Up @@ -949,13 +949,6 @@ _write_size64(char *out, size_t value)
}
}

static void
_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
{
qdata[0] = FRAME;
_write_size64(qdata + 1, frame_len);
}

static int
_Pickler_CommitFrame(PicklerObject *self)
{
Expand All @@ -966,7 +959,14 @@ _Pickler_CommitFrame(PicklerObject *self)
return 0;
frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
_Pickler_WriteFrameHeader(self, qdata, frame_len);
if (frame_len >= FRAME_SIZE_MIN) {
qdata[0] = FRAME;
_write_size64(qdata + 1, frame_len);
}
else {
memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
self->output_len -= FRAME_HEADER_SIZE;
}
self->frame_start = -1;
return 0;
}
Expand Down