Skip to content

Commit a54bf86

Browse files
committed
Write an unrolled version of find_max_char
1 parent 47816f4 commit a54bf86

1 file changed

Lines changed: 38 additions & 15 deletions

File tree

Objects/stringlib/find_max_char.h

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,46 @@ Py_LOCAL_INLINE(Py_UCS4)
2020
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
2121
{
2222
const unsigned char *p = (const unsigned char *) begin;
23+
const unsigned char *_begin = (const unsigned char *)begin;
24+
const unsigned char *aligned_start = (const unsigned char *)(
25+
((intptr_t)_begin + (SIZEOF_SIZE_T - 1)) & ~(SIZEOF_SIZE_T - 1));
26+
const unsigned char *_end = (const unsigned char *)end;
27+
const size_t *aligned_end = (const size_t *)((intptr_t)_end & ~(SIZEOF_SIZE_T - 1));
28+
const size_t *unrolled_end = aligned_end - 3;
29+
unsigned char accumulator = 0;
30+
/* Do not test each character individually, bit use bitwise OR and test
31+
all characters at once. */
32+
while (p < _end && p < aligned_start) {
33+
accumulator |= *p;
34+
p += 1;
35+
}
36+
if (accumulator & 0x80) {
37+
return 255;
38+
} else if (p == end) {
39+
return 127;
40+
}
2341

24-
while (p < end) {
25-
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
26-
/* Help register allocation */
27-
const unsigned char *_p = p;
28-
while (_p + SIZEOF_SIZE_T <= end) {
29-
size_t value = *(const size_t *) _p;
30-
if (value & UCS1_ASCII_CHAR_MASK)
31-
return 255;
32-
_p += SIZEOF_SIZE_T;
33-
}
34-
p = _p;
35-
if (p == end)
36-
break;
37-
}
38-
if (*p++ & 0x80)
42+
/* On 64-bit platforms with 128-bit vectors (x86-64, arm64) the
43+
compiler can load 4 size_t values into two 16-byte vectors and do a
44+
vector bitwise OR. */
45+
const size_t *_p = (const size_t *)p;
46+
while (_p < unrolled_end) {
47+
size_t value = _p[0] | _p[1] | _p[2] | _p[3];
48+
if (value & UCS1_ASCII_CHAR_MASK) {
3949
return 255;
50+
}
51+
_p += 4;
52+
}
53+
size_t value = 0;
54+
while (_p < aligned_end) {
55+
value |= *_p;
56+
}
57+
p = (const unsigned char *)_p;
58+
while (p < _end) {
59+
value |= *p;
60+
}
61+
if (value & UCS1_ASCII_CHAR_MASK) {
62+
return 255;
4063
}
4164
return 127;
4265
}

0 commit comments

Comments
 (0)