From 561e99c1055368a22453701b4e58dc44d8467304 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 24 Jun 2026 09:10:36 +0200 Subject: [PATCH] Speed up matching of case-insensitive character sets Handle IN_IGNORE, IN_UNI_IGNORE and IN_LOC_IGNORE in SRE(count) so that a repeated case-insensitive set (e.g. [a-z]+ with re.I) scans inline instead of falling back to the per-character match loop. About 2x faster. Co-Authored-By: Claude Opus 4.8 (1M context) --- Modules/_sre/sre_lib.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 6e6ae46f05a50f..71eb7541d35ba5 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -213,6 +213,29 @@ SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) ptr++; break; + case SRE_OP_IN_IGNORE: + /* repeated set, case-insensitive (ascii) */ + TRACE(("|%p|%p|COUNT IN_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset)(state, pattern + 2, + (SRE_CODE) sre_lower_ascii(*ptr))) + ptr++; + break; + + case SRE_OP_IN_UNI_IGNORE: + /* repeated set, case-insensitive (unicode) */ + TRACE(("|%p|%p|COUNT IN_UNI_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset)(state, pattern + 2, + (SRE_CODE) sre_lower_unicode(*ptr))) + ptr++; + break; + + case SRE_OP_IN_LOC_IGNORE: + /* repeated set, case-insensitive (locale) */ + TRACE(("|%p|%p|COUNT IN_LOC_IGNORE\n", pattern, ptr)); + while (ptr < end && SRE(charset_loc_ignore)(state, pattern + 2, *ptr)) + ptr++; + break; + case SRE_OP_ANY: /* repeated dot wildcard. */ TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));