Skip to content

Commit cc8764c

Browse files
committed
Add C API PyUnicode_FromOrdinal() which exposes unichr() at C level.
u'%c' will now raise a ValueError in case the argument is an integer outside the valid range of Unicode code point ordinals. Closes SF bug #593581.
1 parent 078151d commit cc8764c

5 files changed

Lines changed: 82 additions & 35 deletions

File tree

Include/unicodeobject.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,18 @@ extern DL_IMPORT(int) PyUnicode_AsWideChar(
517517

518518
#endif
519519

520+
/* --- Unicode ordinals --------------------------------------------------- */
521+
522+
/* Create a Unicode Object from the given Unicode code point ordinal.
523+
524+
The ordinal must be in range(0x10000) on narrow Python builds
525+
(UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
526+
raised in case it is not.
527+
528+
*/
529+
530+
extern DL_IMPORT(PyObject*) PyUnicode_FromOrdinal(int ordinal);
531+
520532
/* === Builtin Codecs =====================================================
521533
522534
Many of these APIs take two arguments encoding and errors. These

Lib/test/test_unicode.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ def test_fixup(s):
453453
else:
454454
verify(value == u'abc, def')
455455

456+
for ordinal in (-100, 0x20000):
457+
try:
458+
u"%c" % ordinal
459+
except ValueError:
460+
pass
461+
else:
462+
print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
463+
456464
# formatting jobs delegated from the string implementation:
457465
verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
458466
verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')

Misc/NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ Type/class unification and new-style classes
66

77
Core and builtins
88

9+
- u'%c' will now raise a ValueError in case the argument is an
10+
integer outside the valid range of Unicode code point ordinals.
11+
912
- The tempfile module has been overhauled for enhanced security. The
1013
mktemp() function is now deprecated; new, safe replacements are
1114
mkstemp() (for files) and mkdtemp() (for directories), and the
@@ -437,6 +440,9 @@ Build
437440

438441
C API
439442

443+
- New C API PyUnicode_FromOrdinal() which exposes unichr() at C
444+
level.
445+
440446
- New functions PyErr_SetExcFromWindowsErr() and
441447
PyErr_SetExcFromWindowsErrWithFilename(). Similar to
442448
PyErr_SetFromWindowsErrWithFilename() and

Objects/unicodeobject.c

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,45 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
390390

391391
#endif
392392

393+
PyObject *PyUnicode_FromOrdinal(int ordinal)
394+
{
395+
Py_UNICODE s[2];
396+
397+
#ifdef Py_UNICODE_WIDE
398+
if (ordinal < 0 || ordinal > 0x10ffff) {
399+
PyErr_SetString(PyExc_ValueError,
400+
"unichr() arg not in range(0x110000) "
401+
"(wide Python build)");
402+
return NULL;
403+
}
404+
#else
405+
if (ordinal < 0 || ordinal > 0xffff) {
406+
PyErr_SetString(PyExc_ValueError,
407+
"unichr() arg not in range(0x10000) "
408+
"(narrow Python build)");
409+
return NULL;
410+
}
411+
#endif
412+
413+
if (ordinal <= 0xffff) {
414+
/* UCS-2 character */
415+
s[0] = (Py_UNICODE) ordinal;
416+
return PyUnicode_FromUnicode(s, 1);
417+
}
418+
else {
419+
#ifndef Py_UNICODE_WIDE
420+
/* UCS-4 character. store as two surrogate characters */
421+
ordinal -= 0x10000L;
422+
s[0] = 0xD800 + (Py_UNICODE) (ordinal >> 10);
423+
s[1] = 0xDC00 + (Py_UNICODE) (ordinal & 0x03FF);
424+
return PyUnicode_FromUnicode(s, 2);
425+
#else
426+
s[0] = (Py_UNICODE)ordinal;
427+
return PyUnicode_FromUnicode(s, 1);
428+
#endif
429+
}
430+
}
431+
393432
PyObject *PyUnicode_FromObject(register PyObject *obj)
394433
{
395434
/* XXX Perhaps we should make this API an alias of
@@ -5373,7 +5412,22 @@ formatchar(Py_UNICODE *buf,
53735412
x = PyInt_AsLong(v);
53745413
if (x == -1 && PyErr_Occurred())
53755414
goto onError;
5376-
buf[0] = (char) x;
5415+
#ifdef Py_UNICODE_WIDE
5416+
if (x < 0 || x > 0x10ffff) {
5417+
PyErr_SetString(PyExc_ValueError,
5418+
"%c arg not in range(0x110000) "
5419+
"(wide Python build)");
5420+
return -1;
5421+
}
5422+
#else
5423+
if (x < 0 || x > 0xffff) {
5424+
PyErr_SetString(PyExc_ValueError,
5425+
"%c arg not in range(0x10000) "
5426+
"(narrow Python build)");
5427+
return -1;
5428+
}
5429+
#endif
5430+
buf[0] = (Py_UNICODE) x;
53775431
}
53785432
buf[1] = '\0';
53795433
return 1;

Python/bltinmodule.c

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -260,44 +260,11 @@ static PyObject *
260260
builtin_unichr(PyObject *self, PyObject *args)
261261
{
262262
long x;
263-
Py_UNICODE s[2];
264263

265264
if (!PyArg_ParseTuple(args, "l:unichr", &x))
266265
return NULL;
267266

268-
#ifdef Py_UNICODE_WIDE
269-
if (x < 0 || x > 0x10ffff) {
270-
PyErr_SetString(PyExc_ValueError,
271-
"unichr() arg not in range(0x110000) "
272-
"(wide Python build)");
273-
return NULL;
274-
}
275-
#else
276-
if (x < 0 || x > 0xffff) {
277-
PyErr_SetString(PyExc_ValueError,
278-
"unichr() arg not in range(0x10000) "
279-
"(narrow Python build)");
280-
return NULL;
281-
}
282-
#endif
283-
284-
if (x <= 0xffff) {
285-
/* UCS-2 character */
286-
s[0] = (Py_UNICODE) x;
287-
return PyUnicode_FromUnicode(s, 1);
288-
}
289-
else {
290-
#ifndef Py_UNICODE_WIDE
291-
/* UCS-4 character. store as two surrogate characters */
292-
x -= 0x10000L;
293-
s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
294-
s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
295-
return PyUnicode_FromUnicode(s, 2);
296-
#else
297-
s[0] = (Py_UNICODE)x;
298-
return PyUnicode_FromUnicode(s, 1);
299-
#endif
300-
}
267+
return PyUnicode_FromOrdinal(x);
301268
}
302269

303270
PyDoc_STRVAR(unichr_doc,

0 commit comments

Comments
 (0)