Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ChangeLog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Release Date: TBD

* Remove Python 2 support from the ``msgpack/_cmsgpack``.
``msgpack/fallback`` still supports Python 2.
* Remove encoding and unicode_errors options from the Packer.
* Remove ``encoding`` option from the Packer.


0.6.2
Expand Down
34 changes: 27 additions & 7 deletions msgpack/_packer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,15 @@ cdef class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.

:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
"""
cdef msgpack_packer pk
cdef object _default
cdef object _berrors
cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint autoreset
Expand All @@ -104,10 +110,8 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0

def __init__(self, default=None,
bint use_single_float=False,
bint autoreset=True,
bint use_bin_type=False,
def __init__(self, *, default=None, unicode_errors=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint strict_types=False):
self.use_float = use_single_float
self.strict_types = strict_types
Expand All @@ -118,6 +122,12 @@ cdef class Packer(object):
raise TypeError("default must be a callable.")
self._default = default

self._berrors = unicode_errors
if unicode_errors is None:
self.unicode_errors = NULL
else:
self.unicode_errors = self._berrors

def __dealloc__(self):
PyMem_Free(self.pk.buf)
self.pk.buf = NULL
Expand Down Expand Up @@ -183,9 +193,19 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise ValueError("unicode string is too large")
if self.unicode_errors == NULL:
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise ValueError("unicode string is too large")
else:
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("unicode string is too large")
ret = msgpack_pack_raw(&self.pk, L)
if ret == 0:
rawval = o
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = <dict>o
L = len(d)
Expand Down
11 changes: 8 additions & 3 deletions msgpack/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def _unpack(self, execute=EX_CONSTRUCT):
elif self._raw:
obj = bytes(obj)
else:
obj = obj.decode('utf_8')
obj = obj.decode('utf_8', self._unicode_errors)
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
Expand Down Expand Up @@ -752,14 +752,19 @@ class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.

:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
"""
def __init__(self, default=None,
def __init__(self, default=None, unicode_errors=None,
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._unicode_errors = unicode_errors or "strict"
self._buffer = StringIO()
if default is not None:
if not callable(default):
Expand Down Expand Up @@ -816,7 +821,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT,
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, unicode):
obj = obj.encode("utf-8")
obj = obj.encode("utf-8", self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise ValueError("String is too large")
Expand Down
16 changes: 14 additions & 2 deletions test/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import OrderedDict
from io import BytesIO
import struct
import sys

import pytest
from pytest import raises, xfail
Expand Down Expand Up @@ -54,13 +55,24 @@ def testPackByteArrays():
for td in test_data:
check(td)

@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
def testIgnoreUnicodeErrors():
re = unpackb(packb(b'abc\xeddef', use_bin_type=False),
raw=False, unicode_errors='ignore')
assert re == "abcdef"

def testStrictUnicodeUnpack():
packed = packb(b'abc\xeddef')
packed = packb(b'abc\xeddef', use_bin_type=False)
with pytest.raises(UnicodeDecodeError):
unpackb(packed, raw=False, use_list=1)

@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
def testIgnoreErrorsPack():
re = unpackb(packb(u"abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors='ignore'), raw=False, use_list=1)
assert re == "abcdef"

def testDecodeBinary():
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
re = unpackb(packb(b"abc"), use_list=1)
assert re == b"abc"

def testPackFloat():
Expand Down