Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Revert "Remove encoding/unicode_errors options from Packer (#378)"
This reverts commit e1ed004.
  • Loading branch information
methane committed Dec 3, 2019
commit fd83111f750a9c4ebd801a50fe847f51e6eceabd
46 changes: 39 additions & 7 deletions msgpack/_packer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,19 @@ cdef class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.

:param str unicode_errors:
Error handler for encoding unicode. (default: 'strict')

:param str encoding:
(deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8')
"""
cdef msgpack_packer pk
cdef object _default
cdef object _bencoding
cdef object _berrors
cdef const char *encoding
cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint autoreset
Expand All @@ -104,11 +114,11 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0

def __init__(self, default=None,
bint use_single_float=False,
bint autoreset=True,
bint use_bin_type=False,
def __init__(self, default=None, encoding=None, unicode_errors=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint strict_types=False):
if encoding is not None:
PyErr_WarnEx(DeprecationWarning, "encoding is deprecated.", 1)
self.use_float = use_single_float
self.strict_types = strict_types
self.autoreset = autoreset
Expand All @@ -118,6 +128,18 @@ cdef class Packer(object):
raise TypeError("default must be a callable.")
self._default = default

self._bencoding = encoding
if encoding is None:
self.encoding = 'utf-8'
else:
self.encoding = self._bencoding

self._berrors = unicode_errors
if unicode_errors is None:
self.unicode_errors = NULL
else:
self.unicode_errors = self._berrors

def __dealloc__(self):
PyMem_Free(self.pk.buf)
self.pk.buf = NULL
Expand Down Expand Up @@ -183,9 +205,19 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise ValueError("unicode string is too large")
if self.encoding == NULL and self.unicode_errors == NULL:
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise ValueError("unicode string is too large")
else:
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("unicode string is too large")
ret = msgpack_pack_raw(&self.pk, L)
if ret == 0:
rawval = o
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = <dict>o
L = len(d)
Expand Down
26 changes: 24 additions & 2 deletions msgpack/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,14 +752,32 @@ class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.

:param str encoding:
(deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8')

:param str unicode_errors:
Error handler for encoding unicode. (default: 'strict')
"""
def __init__(self, default=None,
def __init__(self, default=None, encoding=None, unicode_errors=None,
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
if encoding is None:
encoding = 'utf_8'
else:
warnings.warn(
"encoding is deprecated, Use raw=False instead.",
DeprecationWarning, stacklevel=2)

if unicode_errors is None:
unicode_errors = 'strict'

self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._encoding = encoding
self._unicode_errors = unicode_errors
self._buffer = StringIO()
if default is not None:
if not callable(default):
Expand Down Expand Up @@ -816,7 +834,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT,
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, unicode):
obj = obj.encode("utf-8")
if self._encoding is None:
raise TypeError(
"Can't encode unicode string: "
"no encoding is specified")
obj = obj.encode(self._encoding, self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise ValueError("String is too large")
Expand Down
30 changes: 30 additions & 0 deletions test/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ def testPackUnicode():
re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack()
assert re == td

def testPackUTF32(): # deprecated
try:
test_data = [
"",
"abcd",
["defgh"],
"Русский текст",
]
for td in test_data:
with pytest.deprecated_call():
re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32')
assert re == td
except LookupError as e:
xfail(e)

def testPackBytes():
test_data = [
b"", b"abcd", (b"defgh",),
Expand All @@ -54,11 +69,26 @@ def testPackByteArrays():
for td in test_data:
check(td)

def testIgnoreUnicodeErrors(): # deprecated
with pytest.deprecated_call():
re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
assert re == "abcdef"

def testStrictUnicodeUnpack():
packed = packb(b'abc\xeddef')
with pytest.raises(UnicodeDecodeError):
unpackb(packed, raw=False, use_list=1)

def testStrictUnicodePack(): # deprecated
with raises(UnicodeEncodeError):
with pytest.deprecated_call():
packb("abc\xeddef", encoding='ascii', unicode_errors='strict')

def testIgnoreErrorsPack(): # deprecated
with pytest.deprecated_call():
re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1)
assert re == "abcdef"

def testDecodeBinary():
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
assert re == b"abc"
Expand Down