Skip to content

Commit ae71592

Browse files
author
lemburg
committed
Patch #435971: UTF-7 codec by Brian Quinlan.
git-svn-id: http://svn.python.org/projects/python/trunk@23253 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 6dfb1c6 commit ae71592

File tree

5 files changed

+392
-1
lines changed

5 files changed

+392
-1
lines changed

Include/unicodeobject.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,24 @@ extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
607607
const char *errors /* error handling */
608608
);
609609

610+
/* --- UTF-7 Codecs ------------------------------------------------------- */
611+
612+
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF7(
613+
const char *string, /* UTF-7 encoded string */
614+
int length, /* size of string */
615+
const char *errors /* error handling */
616+
);
617+
618+
extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF7(
619+
const Py_UNICODE *data, /* Unicode char buffer */
620+
int length, /* number of Py_UNICODE chars to encode */
621+
int encodeSetO, /* force the encoder to encode characters in
622+
Set O, as described in RFC2152 */
623+
int encodeWhiteSpace, /* force the encoder to encode space, tab,
624+
carriage return and linefeed characters */
625+
const char *errors /* error handling */
626+
);
627+
610628
/* --- UTF-8 Codecs ------------------------------------------------------- */
611629

612630
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(

Lib/encodings/aliases.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
'latin': 'latin_1',
1515
'latin1': 'latin_1',
1616

17+
# UTF-7
18+
'utf7': 'utf_7',
19+
'u7': 'utf_7',
20+
1721
# UTF-8
1822
'utf': 'utf_8',
1923
'utf8': 'utf_8',

Lib/test/test_unicode.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,32 @@ def test_fixup(s):
377377
# Test builtin codecs
378378
print 'Testing builtin codecs...',
379379

380+
# UTF-7 specific encoding tests:
381+
utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
382+
(u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
383+
(u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
384+
(u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
385+
(u'+', '+-'),
386+
(u'+-', '+--'),
387+
(u'+?', '+-?'),
388+
(u'\?', '+AFw?'),
389+
(u'+?', '+-?'),
390+
(ur'\\?', '+AFwAXA?'),
391+
(ur'\\\?', '+AFwAXABc?'),
392+
(ur'++--', '+-+---')]
393+
394+
for x,y in utfTests:
395+
verify( x.encode('utf-7') == y )
396+
397+
try:
398+
unicode('+3ADYAA-', 'utf-7') # surrogates not supported
399+
except UnicodeError:
400+
pass
401+
else:
402+
raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
403+
404+
verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
405+
380406
# UTF-8 specific encoding tests:
381407
verify(u'\u20ac'.encode('utf-8') == \
382408
''.join((chr(0xe2), chr(0x82), chr(0xac))) )
@@ -439,6 +465,7 @@ def __str__(self):
439465
verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
440466

441467
verify(u'hello'.encode('ascii') == 'hello')
468+
verify(u'hello'.encode('utf-7') == 'hello')
442469
verify(u'hello'.encode('utf-8') == 'hello')
443470
verify(u'hello'.encode('utf8') == 'hello')
444471
verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
@@ -447,7 +474,7 @@ def __str__(self):
447474

448475
# Roundtrip safety for BMP (just the first 1024 chars)
449476
u = u''.join(map(unichr, range(1024)))
450-
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
477+
for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
451478
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
452479
verify(unicode(u.encode(encoding),encoding) == u)
453480

Modules/_codecsmodule.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,22 @@ unicode_internal_decode(PyObject *self,
123123
}
124124
}
125125

126+
static PyObject *
127+
utf_7_decode(PyObject *self,
128+
PyObject *args)
129+
{
130+
const char *data;
131+
int size;
132+
const char *errors = NULL;
133+
134+
if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
135+
&data, &size, &errors))
136+
return NULL;
137+
138+
return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
139+
size);
140+
}
141+
126142
static PyObject *
127143
utf_8_decode(PyObject *self,
128144
PyObject *args)
@@ -381,6 +397,30 @@ unicode_internal_encode(PyObject *self,
381397
}
382398
}
383399

400+
static PyObject *
401+
utf_7_encode(PyObject *self,
402+
PyObject *args)
403+
{
404+
PyObject *str, *v;
405+
const char *errors = NULL;
406+
407+
if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
408+
&str, &errors))
409+
return NULL;
410+
411+
str = PyUnicode_FromObject(str);
412+
if (str == NULL)
413+
return NULL;
414+
v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
415+
PyUnicode_GET_SIZE(str),
416+
0,
417+
0,
418+
errors),
419+
PyUnicode_GET_SIZE(str));
420+
Py_DECREF(str);
421+
return v;
422+
}
423+
384424
static PyObject *
385425
utf_8_encode(PyObject *self,
386426
PyObject *args)
@@ -632,6 +672,8 @@ static PyMethodDef _codecs_functions[] = {
632672
#ifdef Py_USING_UNICODE
633673
{"utf_8_encode", utf_8_encode, 1},
634674
{"utf_8_decode", utf_8_decode, 1},
675+
{"utf_7_encode", utf_7_encode, 1},
676+
{"utf_7_decode", utf_7_decode, 1},
635677
{"utf_16_encode", utf_16_encode, 1},
636678
{"utf_16_le_encode", utf_16_le_encode, 1},
637679
{"utf_16_be_encode", utf_16_be_encode, 1},

0 commit comments

Comments
 (0)