@@ -174,24 +174,25 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
174174static PyObject *
175175u_getitem (arrayobject * ap , Py_ssize_t i )
176176{
177- return PyUnicode_FromUnicode ( & (( Py_UNICODE * ) ap -> ob_item )[i ], 1 );
177+ return PyUnicode_FromOrdinal ((( Py_UCS4 * ) ap -> ob_item )[i ]);
178178}
179179
180180static int
181181u_setitem (arrayobject * ap , Py_ssize_t i , PyObject * v )
182182{
183- Py_UNICODE * p ;
184- Py_ssize_t len ;
183+ PyObject * p ;
185184
186- if (!PyArg_Parse (v , "u#;array item must be unicode character" , & p , & len ))
185+ if (!PyArg_Parse (v , "U;array item must be unicode character" , & p ))
186+ return -1 ;
187+ if (PyUnicode_READY (p ))
187188 return -1 ;
188- if (len != 1 ) {
189+ if (PyUnicode_GET_LENGTH ( p ) != 1 ) {
189190 PyErr_SetString (PyExc_TypeError ,
190191 "array item must be unicode character" );
191192 return -1 ;
192193 }
193194 if (i >= 0 )
194- ((Py_UNICODE * )ap -> ob_item )[i ] = p [ 0 ] ;
195+ ((Py_UCS4 * )ap -> ob_item )[i ] = PyUnicode_READ_CHAR ( p , 0 ) ;
195196 return 0 ;
196197}
197198
@@ -443,6 +444,13 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
443444 return 0 ;
444445}
445446
447+ #if SIZEOF_INT == 4
448+ # define STRUCT_LONG_FORMAT "I"
449+ #elif SIZEOF_LONG == 4
450+ # define STRUCT_LONG_FORMAT "L"
451+ #else
452+ # error "Unable to get struct format for Py_UCS4"
453+ #endif
446454
447455/* Description of types.
448456 *
@@ -452,7 +460,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
452460static struct arraydescr descriptors [] = {
453461 {'b' , 1 , b_getitem , b_setitem , "b" , 1 , 1 },
454462 {'B' , 1 , BB_getitem , BB_setitem , "B" , 1 , 0 },
455- {'u' , sizeof (Py_UNICODE ), u_getitem , u_setitem , "u" , 0 , 0 },
463+ {'u' , sizeof (Py_UCS4 ), u_getitem , u_setitem , STRUCT_LONG_FORMAT , 0 , 0 },
456464 {'h' , sizeof (short ), h_getitem , h_setitem , "h" , 1 , 1 },
457465 {'H' , sizeof (short ), HH_getitem , HH_setitem , "H" , 1 , 0 },
458466 {'i' , sizeof (int ), i_getitem , i_setitem , "i" , 1 , 1 },
@@ -1508,25 +1516,26 @@ This method is deprecated. Use tobytes instead.");
15081516static PyObject *
15091517array_fromunicode (arrayobject * self , PyObject * args )
15101518{
1511- Py_UNICODE * ustr ;
1519+ PyObject * ustr ;
15121520 Py_ssize_t n ;
1513- char typecode ;
15141521
1515- if (!PyArg_ParseTuple (args , "u# :fromunicode" , & ustr , & n ))
1522+ if (!PyArg_ParseTuple (args , "U :fromunicode" , & ustr ))
15161523 return NULL ;
1517- typecode = self -> ob_descr -> typecode ;
1518- if ((typecode != 'u' )) {
1524+ if (self -> ob_descr -> typecode != 'u' ) {
15191525 PyErr_SetString (PyExc_ValueError ,
15201526 "fromunicode() may only be called on "
15211527 "unicode type arrays" );
15221528 return NULL ;
15231529 }
1530+ if (PyUnicode_READY (ustr ))
1531+ return NULL ;
1532+ n = PyUnicode_GET_LENGTH (ustr );
15241533 if (n > 0 ) {
15251534 Py_ssize_t old_size = Py_SIZE (self );
15261535 if (array_resize (self , old_size + n ) == -1 )
15271536 return NULL ;
1528- memcpy ( self -> ob_item + old_size * sizeof ( Py_UNICODE ),
1529- ustr , n * sizeof ( Py_UNICODE )) ;
1537+ if (! PyUnicode_AsUCS4 ( ustr , ( Py_UCS4 * ) self -> ob_item + old_size , n , 0 ))
1538+ return NULL ;
15301539 }
15311540
15321541 Py_INCREF (Py_None );
@@ -1545,14 +1554,14 @@ append Unicode data to an array of some other type.");
15451554static PyObject *
15461555array_tounicode (arrayobject * self , PyObject * unused )
15471556{
1548- char typecode ;
1549- typecode = self -> ob_descr -> typecode ;
1550- if ((typecode != 'u' )) {
1557+ if (self -> ob_descr -> typecode != 'u' ) {
15511558 PyErr_SetString (PyExc_ValueError ,
15521559 "tounicode() may only be called on unicode type arrays" );
15531560 return NULL ;
15541561 }
1555- return PyUnicode_FromUnicode ((Py_UNICODE * ) self -> ob_item , Py_SIZE (self ));
1562+ return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND ,
1563+ (Py_UCS4 * ) self -> ob_item ,
1564+ Py_SIZE (self ));
15561565}
15571566
15581567PyDoc_STRVAR (tounicode_doc ,
@@ -1659,13 +1668,7 @@ typecode_to_mformat_code(char typecode)
16591668 return UNSIGNED_INT8 ;
16601669
16611670 case 'u' :
1662- if (sizeof (Py_UNICODE ) == 2 ) {
1663- return UTF16_LE + is_big_endian ;
1664- }
1665- if (sizeof (Py_UNICODE ) == 4 ) {
1666- return UTF32_LE + is_big_endian ;
1667- }
1668- return UNKNOWN_FORMAT ;
1671+ return UTF32_LE + is_big_endian ;
16691672
16701673 case 'f' :
16711674 if (sizeof (float ) == 4 ) {
@@ -2411,14 +2414,8 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
24112414 view -> strides = & (view -> itemsize );
24122415 view -> format = NULL ;
24132416 view -> internal = NULL ;
2414- if ((flags & PyBUF_FORMAT ) == PyBUF_FORMAT ) {
2417+ if ((flags & PyBUF_FORMAT ) == PyBUF_FORMAT )
24152418 view -> format = self -> ob_descr -> formats ;
2416- #ifdef Py_UNICODE_WIDE
2417- if (self -> ob_descr -> typecode == 'u' ) {
2418- view -> format = "w" ;
2419- }
2420- #endif
2421- }
24222419
24232420 finish :
24242421 self -> ob_exports ++ ;
@@ -2543,7 +2540,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25432540 return NULL ;
25442541 }
25452542 self -> ob_item = item ;
2546- Py_SIZE (self ) = n / sizeof (Py_UNICODE );
2543+ Py_SIZE (self ) = n / sizeof (Py_UCS4 );
25472544 memcpy (item , PyUnicode_AS_DATA (initial ), n );
25482545 self -> allocated = Py_SIZE (self );
25492546 }
0 commit comments