Skip to content

Commit 47a00f3

Browse files
committed
support encoding error handlers that return bytes (closes python#16585)
1 parent aff4723 commit 47a00f3

3 files changed

Lines changed: 13 additions & 2 deletions

File tree

Lib/test/test_multibytecodec.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def test_errorcallback_longindex(self):
4545
self.assertRaises(IndexError, dec,
4646
b'apple\x92ham\x93spam', 'test.cjktest')
4747

48+
def test_errorhandler_returns_bytes(self):
49+
enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape')
50+
self.assertEqual(enc, b'\x819\xa79\x80')
51+
4852
def test_codingspec(self):
4953
try:
5054
for enc in ALL_CJKENCODINGS:

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ Core and Builtins
9898
Library
9999
-------
100100

101+
- Issue #16585: Make CJK encoders support error handlers that return bytes per
102+
PEP 383.
103+
101104
- Issue #10182: The re module doesn't truncate indices to 32 bits anymore.
102105
Patch by Serhiy Storchaka.
103106

Modules/cjkcodecs/multibytecodec.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,15 @@ multibytecodec_encerror(MultibyteCodec *codec,
316316
goto errorexit;
317317

318318
if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
319-
!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
319+
(!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
320320
!PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
321321
PyErr_SetString(PyExc_TypeError,
322322
"encoding error handler must return "
323323
"(str, int) tuple");
324324
goto errorexit;
325325
}
326326

327-
{
327+
if (PyUnicode_Check(tobj)) {
328328
const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
329329

330330
retstr = multibytecodec_encode(codec, state, &uraw,
@@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
333333
if (retstr == NULL)
334334
goto errorexit;
335335
}
336+
else {
337+
Py_INCREF(tobj);
338+
retstr = tobj;
339+
}
336340

337341
assert(PyBytes_Check(retstr));
338342
retstrsize = PyBytes_GET_SIZE(retstr);

0 commit comments

Comments
 (0)