File tree Expand file tree Collapse file tree 2 files changed +43
-4
lines changed
Expand file tree Collapse file tree 2 files changed +43
-4
lines changed Original file line number Diff line number Diff line change @@ -916,6 +916,13 @@ def test_isascii(self):
916916 self .checkequal (True , '\x00 \x7f ' , 'isascii' )
917917 self .checkequal (False , '\x80 ' , 'isascii' )
918918 self .checkequal (False , '\xe9 ' , 'isascii' )
919+ # bytes.isascii() and bytearray.isascii() has optimization which
920+ # check 4 or 8 bytes at once. So check some alignments.
921+ for p in range (8 ):
922+ self .checkequal (True , ' ' * p + '\x7f ' , 'isascii' )
923+ self .checkequal (False , ' ' * p + '\x80 ' , 'isascii' )
924+ self .checkequal (True , ' ' * p + '\x7f ' + ' ' * 8 , 'isascii' )
925+ self .checkequal (False , ' ' * p + '\x80 ' + ' ' * 8 , 'isascii' )
919926
920927 def test_isdigit (self ):
921928 self .checkequal (False , '' , 'isdigit' )
Original file line number Diff line number Diff line change @@ -98,19 +98,51 @@ PyDoc_STRVAR_shared(_Py_isascii__doc__,
9898Return True if B is empty or all characters in B are ASCII,\n\
9999False otherwise." );
100100
101+ // Optimization is copied from ascii_decode in unicodeobject.c
102+ /* Mask to quickly check whether a C 'long' contains a
103+ non-ASCII, UTF8-encoded char. */
104+ #if (SIZEOF_LONG == 8 )
105+ # define ASCII_CHAR_MASK 0x8080808080808080UL
106+ #elif (SIZEOF_LONG == 4 )
107+ # define ASCII_CHAR_MASK 0x80808080UL
108+ #else
109+ # error C 'long' size should be either 4 or 8!
110+ #endif
111+
101112PyObject *
102113_Py_bytes_isascii (const char * cptr , Py_ssize_t len )
103114{
104- const unsigned char * p = (unsigned char * ) cptr ;
105- const unsigned char * e = p + len ;
106- for (; p < e ; p ++ ) {
107- if (* p >= 128 ) {
115+ const char * p = cptr ;
116+ const char * end = p + len ;
117+ const char * aligned_end = (const char * ) _Py_ALIGN_DOWN (end , SIZEOF_LONG );
118+
119+ while (p < end ) {
120+ /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121+ for an explanation. */
122+ if (_Py_IS_ALIGNED (p , SIZEOF_LONG )) {
123+ /* Help allocation */
124+ const char * _p = p ;
125+ while (_p < aligned_end ) {
126+ unsigned long value = * (unsigned long * ) _p ;
127+ if (value & ASCII_CHAR_MASK ) {
128+ Py_RETURN_FALSE ;
129+ }
130+ _p += SIZEOF_LONG ;
131+ }
132+ p = _p ;
133+ if (_p == end )
134+ break ;
135+ }
136+ if ((unsigned char )* p & 0x80 ) {
108137 Py_RETURN_FALSE ;
109138 }
139+ p ++ ;
110140 }
111141 Py_RETURN_TRUE ;
112142}
113143
144+ #undef ASCII_CHAR_MASK
145+
114146
115147PyDoc_STRVAR_shared (_Py_isdigit__doc__ ,
116148"B.isdigit() -> bool\n\
You can’t perform that action at this time.
0 commit comments