Skip to content

Commit 8dba4e0

Browse files
author
Victor Stinner
committed
array module uses the new Unicode API
* Use Py_UCS4* buffer instead of Py_UNICODE* * Use "I" or "L" format, instead of "u" format
1 parent f8bb7d0 commit 8dba4e0

File tree

2 files changed

+36
-35
lines changed

2 files changed

+36
-35
lines changed

Lib/test/test_array.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,14 @@ def test_buffer_info(self):
218218
self.assertEqual(bi[1], len(a))
219219

220220
def test_byteswap(self):
221-
a = array.array(self.typecode, self.example)
221+
if self.typecode == 'u':
222+
example = '\U00100100'
223+
else:
224+
example = self.example
225+
a = array.array(self.typecode, example)
222226
self.assertRaises(TypeError, a.byteswap, 42)
223227
if a.itemsize in (1, 2, 4, 8):
224-
b = array.array(self.typecode, self.example)
228+
b = array.array(self.typecode, example)
225229
b.byteswap()
226230
if a.itemsize==1:
227231
self.assertEqual(a, b)

Modules/arraymodule.c

Lines changed: 30 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -174,24 +174,25 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
174174
static PyObject *
175175
u_getitem(arrayobject *ap, Py_ssize_t i)
176176
{
177-
return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
177+
return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
178178
}
179179

180180
static int
181181
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
182182
{
183-
Py_UNICODE *p;
184-
Py_ssize_t len;
183+
PyObject *p;
185184

186-
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
185+
if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
186+
return -1;
187+
if (PyUnicode_READY(p))
187188
return -1;
188-
if (len != 1) {
189+
if (PyUnicode_GET_LENGTH(p) != 1) {
189190
PyErr_SetString(PyExc_TypeError,
190191
"array item must be unicode character");
191192
return -1;
192193
}
193194
if (i >= 0)
194-
((Py_UNICODE *)ap->ob_item)[i] = p[0];
195+
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
195196
return 0;
196197
}
197198

@@ -443,6 +444,13 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
443444
return 0;
444445
}
445446

447+
#if SIZEOF_INT == 4
448+
# define STRUCT_LONG_FORMAT "I"
449+
#elif SIZEOF_LONG == 4
450+
# define STRUCT_LONG_FORMAT "L"
451+
#else
452+
# error "Unable to get struct format for Py_UCS4"
453+
#endif
446454

447455
/* Description of types.
448456
*
@@ -452,7 +460,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
452460
static struct arraydescr descriptors[] = {
453461
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
454462
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
455-
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
463+
{'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
456464
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
457465
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
458466
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
@@ -1508,25 +1516,26 @@ This method is deprecated. Use tobytes instead.");
15081516
static PyObject *
15091517
array_fromunicode(arrayobject *self, PyObject *args)
15101518
{
1511-
Py_UNICODE *ustr;
1519+
PyObject *ustr;
15121520
Py_ssize_t n;
1513-
char typecode;
15141521

1515-
if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
1522+
if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
15161523
return NULL;
1517-
typecode = self->ob_descr->typecode;
1518-
if ((typecode != 'u')) {
1524+
if (self->ob_descr->typecode != 'u') {
15191525
PyErr_SetString(PyExc_ValueError,
15201526
"fromunicode() may only be called on "
15211527
"unicode type arrays");
15221528
return NULL;
15231529
}
1530+
if (PyUnicode_READY(ustr))
1531+
return NULL;
1532+
n = PyUnicode_GET_LENGTH(ustr);
15241533
if (n > 0) {
15251534
Py_ssize_t old_size = Py_SIZE(self);
15261535
if (array_resize(self, old_size + n) == -1)
15271536
return NULL;
1528-
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
1529-
ustr, n * sizeof(Py_UNICODE));
1537+
if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
1538+
return NULL;
15301539
}
15311540

15321541
Py_INCREF(Py_None);
@@ -1545,14 +1554,14 @@ append Unicode data to an array of some other type.");
15451554
static PyObject *
15461555
array_tounicode(arrayobject *self, PyObject *unused)
15471556
{
1548-
char typecode;
1549-
typecode = self->ob_descr->typecode;
1550-
if ((typecode != 'u')) {
1557+
if (self->ob_descr->typecode != 'u') {
15511558
PyErr_SetString(PyExc_ValueError,
15521559
"tounicode() may only be called on unicode type arrays");
15531560
return NULL;
15541561
}
1555-
return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
1562+
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1563+
(Py_UCS4 *) self->ob_item,
1564+
Py_SIZE(self));
15561565
}
15571566

15581567
PyDoc_STRVAR(tounicode_doc,
@@ -1659,13 +1668,7 @@ typecode_to_mformat_code(char typecode)
16591668
return UNSIGNED_INT8;
16601669

16611670
case 'u':
1662-
if (sizeof(Py_UNICODE) == 2) {
1663-
return UTF16_LE + is_big_endian;
1664-
}
1665-
if (sizeof(Py_UNICODE) == 4) {
1666-
return UTF32_LE + is_big_endian;
1667-
}
1668-
return UNKNOWN_FORMAT;
1671+
return UTF32_LE + is_big_endian;
16691672

16701673
case 'f':
16711674
if (sizeof(float) == 4) {
@@ -2411,14 +2414,8 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
24112414
view->strides = &(view->itemsize);
24122415
view->format = NULL;
24132416
view->internal = NULL;
2414-
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
2417+
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
24152418
view->format = self->ob_descr->formats;
2416-
#ifdef Py_UNICODE_WIDE
2417-
if (self->ob_descr->typecode == 'u') {
2418-
view->format = "w";
2419-
}
2420-
#endif
2421-
}
24222419

24232420
finish:
24242421
self->ob_exports++;
@@ -2543,7 +2540,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25432540
return NULL;
25442541
}
25452542
self->ob_item = item;
2546-
Py_SIZE(self) = n / sizeof(Py_UNICODE);
2543+
Py_SIZE(self) = n / sizeof(Py_UCS4);
25472544
memcpy(item, PyUnicode_AS_DATA(initial), n);
25482545
self->allocated = Py_SIZE(self);
25492546
}

0 commit comments

Comments
 (0)