3636import codecs
3737import _compat_pickle
3838
39+ from _pickle import PickleBuffer
40+
3941__all__ = ["PickleError" , "PicklingError" , "UnpicklingError" , "Pickler" ,
40- "Unpickler" , "dump" , "dumps" , "load" , "loads" ]
42+ "Unpickler" , "dump" , "dumps" , "load" , "loads" , "PickleBuffer" ]
4143
4244# Shortcut for use in isinstance testing
4345bytes_types = (bytes , bytearray )
5153 "2.0" , # Protocol 2
5254 "3.0" , # Protocol 3
5355 "4.0" , # Protocol 4
56+ "5.0" , # Protocol 5
5457 ] # Old format versions we can read
5558
5659# This is the highest protocol number we know how to read.
57- HIGHEST_PROTOCOL = 4
60+ HIGHEST_PROTOCOL = 5
5861
5962# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
6063# Only bump this if the oldest still supported version of Python already
@@ -167,6 +170,7 @@ def __init__(self, value):
167170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
168171
169172# Protocol 4
173+
170174SHORT_BINUNICODE = b'\x8c ' # push short string; UTF-8 length < 256 bytes
171175BINUNICODE8 = b'\x8d ' # push very long string
172176BINBYTES8 = b'\x8e ' # push very long bytes string
@@ -178,6 +182,12 @@ def __init__(self, value):
178182MEMOIZE = b'\x94 ' # store top of the stack in memo
179183FRAME = b'\x95 ' # indicate the beginning of a new frame
180184
185+ # Protocol 5
186+
187+ BYTEARRAY8 = b'\x96 ' # push bytearray
188+ NEXT_BUFFER = b'\x97 ' # push next out-of-band buffer
189+ READONLY_BUFFER = b'\x98 ' # make top of stack readonly
190+
181191__all__ .extend ([x for x in dir () if re .match ("[A-Z][A-Z0-9_]+$" , x )])
182192
183193
@@ -251,6 +261,23 @@ def __init__(self, file_read, file_readline, file_tell=None):
251261 self .file_readline = file_readline
252262 self .current_frame = None
253263
264+ def readinto (self , buf ):
265+ if self .current_frame :
266+ n = self .current_frame .readinto (buf )
267+ if n == 0 and len (buf ) != 0 :
268+ self .current_frame = None
269+ n = len (buf )
270+ buf [:] = self .file_read (n )
271+ return n
272+ if n < len (buf ):
273+ raise UnpicklingError (
274+ "pickle exhausted before end of frame" )
275+ return n
276+ else :
277+ n = len (buf )
278+ buf [:] = self .file_read (n )
279+ return n
280+
254281 def read (self , n ):
255282 if self .current_frame :
256283 data = self .current_frame .read (n )
@@ -371,7 +398,8 @@ def decode_long(data):
371398
372399class _Pickler :
373400
374- def __init__ (self , file , protocol = None , * , fix_imports = True ):
401+ def __init__ (self , file , protocol = None , * , fix_imports = True ,
402+ buffer_callback = None ):
375403 """This takes a binary file for writing a pickle data stream.
376404
377405 The optional *protocol* argument tells the pickler to use the
@@ -393,13 +421,27 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
393421 will try to map the new Python 3 names to the old module names
394422 used in Python 2, so that the pickle data stream is readable
395423 with Python 2.
424+
425+ If *buffer_callback* is None (the default), buffer views are
426+ serialized into *file* as part of the pickle stream.
427+
428+ If *buffer_callback* is not None, then it can be called any number
429+ of times with a buffer view. If the callback returns a false value
430+ (such as None), the given buffer is out-of-band; otherwise the
431+ buffer is serialized in-band, i.e. inside the pickle stream.
432+
433+ It is an error if *buffer_callback* is not None and *protocol*
434+ is None or smaller than 5.
396435 """
397436 if protocol is None :
398437 protocol = DEFAULT_PROTOCOL
399438 if protocol < 0 :
400439 protocol = HIGHEST_PROTOCOL
401440 elif not 0 <= protocol <= HIGHEST_PROTOCOL :
402441 raise ValueError ("pickle protocol must be <= %d" % HIGHEST_PROTOCOL )
442+ if buffer_callback is not None and protocol < 5 :
443+ raise ValueError ("buffer_callback needs protocol >= 5" )
444+ self ._buffer_callback = buffer_callback
403445 try :
404446 self ._file_write = file .write
405447 except AttributeError :
@@ -756,6 +798,46 @@ def save_bytes(self, obj):
756798 self .memoize (obj )
757799 dispatch [bytes ] = save_bytes
758800
801+ def save_bytearray (self , obj ):
802+ if self .proto < 5 :
803+ if not obj : # bytearray is empty
804+ self .save_reduce (bytearray , (), obj = obj )
805+ else :
806+ self .save_reduce (bytearray , (bytes (obj ),), obj = obj )
807+ return
808+ n = len (obj )
809+ if n >= self .framer ._FRAME_SIZE_TARGET :
810+ self ._write_large_bytes (BYTEARRAY8 + pack ("<Q" , n ), obj )
811+ else :
812+ self .write (BYTEARRAY8 + pack ("<Q" , n ) + obj )
813+ dispatch [bytearray ] = save_bytearray
814+
815+ def save_picklebuffer (self , obj ):
816+ if self .proto < 5 :
817+ raise PicklingError ("PickleBuffer can only pickled with "
818+ "protocol >= 5" )
819+ with obj .raw () as m :
820+ if not m .contiguous :
821+ raise PicklingError ("PickleBuffer can not be pickled when "
822+ "pointing to a non-contiguous buffer" )
823+ in_band = True
824+ if self ._buffer_callback is not None :
825+ in_band = bool (self ._buffer_callback (obj ))
826+ if in_band :
827+ # Write data in-band
828+ # XXX The C implementation avoids a copy here
829+ if m .readonly :
830+ self .save_bytes (m .tobytes ())
831+ else :
832+ self .save_bytearray (m .tobytes ())
833+ else :
834+ # Write data out-of-band
835+ self .write (NEXT_BUFFER )
836+ if m .readonly :
837+ self .write (READONLY_BUFFER )
838+
839+ dispatch [PickleBuffer ] = save_picklebuffer
840+
759841 def save_str (self , obj ):
760842 if self .bin :
761843 encoded = obj .encode ('utf-8' , 'surrogatepass' )
@@ -1042,7 +1124,7 @@ def save_type(self, obj):
10421124class _Unpickler :
10431125
10441126 def __init__ (self , file , * , fix_imports = True ,
1045- encoding = "ASCII" , errors = "strict" ):
1127+ encoding = "ASCII" , errors = "strict" , buffers = None ):
10461128 """This takes a binary file for reading a pickle data stream.
10471129
10481130 The protocol version of the pickle is detected automatically, so
@@ -1061,7 +1143,17 @@ def __init__(self, file, *, fix_imports=True,
10611143 reading, a BytesIO object, or any other custom object that
10621144 meets this interface.
10631145
1064- Optional keyword arguments are *fix_imports*, *encoding* and
1146+ If *buffers* is not None, it should be an iterable of buffer-enabled
1147+ objects that is consumed each time the pickle stream references
1148+ an out-of-band buffer view. Such buffers have been given in order
1149+ to the *buffer_callback* of a Pickler object.
1150+
1151+ If *buffers* is None (the default), then the buffers are taken
1152+ from the pickle stream, assuming they are serialized there.
1153+ It is an error for *buffers* to be None if the pickle stream
1154+ was produced with a non-None *buffer_callback*.
1155+
1156+ Other optional arguments are *fix_imports*, *encoding* and
10651157 *errors*, which are used to control compatibility support for
10661158 pickle stream generated by Python 2. If *fix_imports* is True,
10671159 pickle will try to map the old Python 2 names to the new names
@@ -1070,6 +1162,7 @@ def __init__(self, file, *, fix_imports=True,
10701162 default to 'ASCII' and 'strict', respectively. *encoding* can be
10711163 'bytes' to read theses 8-bit string instances as bytes objects.
10721164 """
1165+ self ._buffers = iter (buffers ) if buffers is not None else None
10731166 self ._file_readline = file .readline
10741167 self ._file_read = file .read
10751168 self .memo = {}
@@ -1090,6 +1183,7 @@ def load(self):
10901183 "%s.__init__()" % (self .__class__ .__name__ ,))
10911184 self ._unframer = _Unframer (self ._file_read , self ._file_readline )
10921185 self .read = self ._unframer .read
1186+ self .readinto = self ._unframer .readinto
10931187 self .readline = self ._unframer .readline
10941188 self .metastack = []
10951189 self .stack = []
@@ -1276,6 +1370,34 @@ def load_binbytes8(self):
12761370 self .append (self .read (len ))
12771371 dispatch [BINBYTES8 [0 ]] = load_binbytes8
12781372
1373+ def load_bytearray8 (self ):
1374+ len , = unpack ('<Q' , self .read (8 ))
1375+ if len > maxsize :
1376+ raise UnpicklingError ("BYTEARRAY8 exceeds system's maximum size "
1377+ "of %d bytes" % maxsize )
1378+ b = bytearray (len )
1379+ self .readinto (b )
1380+ self .append (b )
1381+ dispatch [BYTEARRAY8 [0 ]] = load_bytearray8
1382+
1383+ def load_next_buffer (self ):
1384+ if self ._buffers is None :
1385+ raise UnpicklingError ("pickle stream refers to out-of-band data "
1386+ "but no *buffers* argument was given" )
1387+ try :
1388+ buf = next (self ._buffers )
1389+ except StopIteration :
1390+ raise UnpicklingError ("not enough out-of-band buffers" )
1391+ self .append (buf )
1392+ dispatch [NEXT_BUFFER [0 ]] = load_next_buffer
1393+
1394+ def load_readonly_buffer (self ):
1395+ buf = self .stack [- 1 ]
1396+ with memoryview (buf ) as m :
1397+ if not m .readonly :
1398+ self .stack [- 1 ] = m .toreadonly ()
1399+ dispatch [READONLY_BUFFER [0 ]] = load_readonly_buffer
1400+
12791401 def load_short_binstring (self ):
12801402 len = self .read (1 )[0 ]
12811403 data = self .read (len )
@@ -1600,25 +1722,29 @@ def load_stop(self):
16001722
16011723# Shorthands
16021724
1603- def _dump (obj , file , protocol = None , * , fix_imports = True ):
1604- _Pickler (file , protocol , fix_imports = fix_imports ).dump (obj )
1725+ def _dump (obj , file , protocol = None , * , fix_imports = True , buffer_callback = None ):
1726+ _Pickler (file , protocol , fix_imports = fix_imports ,
1727+ buffer_callback = buffer_callback ).dump (obj )
16051728
1606- def _dumps (obj , protocol = None , * , fix_imports = True ):
1729+ def _dumps (obj , protocol = None , * , fix_imports = True , buffer_callback = None ):
16071730 f = io .BytesIO ()
1608- _Pickler (f , protocol , fix_imports = fix_imports ).dump (obj )
1731+ _Pickler (f , protocol , fix_imports = fix_imports ,
1732+ buffer_callback = buffer_callback ).dump (obj )
16091733 res = f .getvalue ()
16101734 assert isinstance (res , bytes_types )
16111735 return res
16121736
1613- def _load (file , * , fix_imports = True , encoding = "ASCII" , errors = "strict" ):
1614- return _Unpickler (file , fix_imports = fix_imports ,
1737+ def _load (file , * , fix_imports = True , encoding = "ASCII" , errors = "strict" ,
1738+ buffers = None ):
1739+ return _Unpickler (file , fix_imports = fix_imports , buffers = buffers ,
16151740 encoding = encoding , errors = errors ).load ()
16161741
1617- def _loads (s , * , fix_imports = True , encoding = "ASCII" , errors = "strict" ):
1742+ def _loads (s , * , fix_imports = True , encoding = "ASCII" , errors = "strict" ,
1743+ buffers = None ):
16181744 if isinstance (s , str ):
16191745 raise TypeError ("Can't load pickle from unicode string" )
16201746 file = io .BytesIO (s )
1621- return _Unpickler (file , fix_imports = fix_imports ,
1747+ return _Unpickler (file , fix_imports = fix_imports , buffers = buffers ,
16221748 encoding = encoding , errors = errors ).load ()
16231749
16241750# Use the faster _pickle if possible
0 commit comments