"""Benchmark: SRE(count) fast path for case-insensitive set repeats."""
import re
import pyperf
N = 100
MIXED = ("aBcDeFgHiJkLmNoPqRsTuVwX" * N)[:N]
ALNUM = ("aB3dE6gH9kLmN0pQrStUvWx1" * N)[:N]
WORD = ("aB_dE_gH_kLmN_pQrStUvW_1" * N)[:N]
NODIGIT = ("aBcDeF gHiJkL!mNoPqR.sT?" * N)[:N]
BYTES = MIXED.encode("latin1")
SCANS = [
("scan_alpha_uni", re.compile(r"[a-z]+", re.I), MIXED),
("scan_alpha_asc", re.compile(r"[a-z]+", re.I | re.A), MIXED),
("scan_alnum_uni", re.compile(r"[a-z0-9]+", re.I), ALNUM),
("scan_word_uni", re.compile(r"[a-z0-9_]+",re.I), WORD),
("scan_neg_uni", re.compile(r"[^0-9]+", re.I), NODIGIT),
("scan_vowels_uni", re.compile(r"[aeiou]+", re.I), "aAeEiIoOuU" * (N // 10)),
("scan_alpha_loc", re.compile(rb"[a-z]+", re.L | re.I), BYTES),
]
DOC = ("The Quick Brown Fox jumps over 12 Lazy Dogs near IP 10_0_0_1 and Node7. " * 50)
FINDS = [
("find_words_ci", re.compile(r"[a-z]+", re.I), DOC),
("find_ident_ci", re.compile(r"[a-z_][a-z0-9_]*", re.I), DOC),
]
def make_scan(p, s):
def run():
assert p.match(s) is not None
return run
runner = pyperf.Runner()
for name, p, s in SCANS:
runner.bench_func(name, make_scan(p, s))
for name, p, s in FINDS:
runner.bench_func(name, (lambda p, s: lambda: p.findall(s))(p, s))
Feature or enhancement
Proposal:
A
REPEAT_ONEover a case-insensitive character set — e.g.[a-z]+withre.IGNORECASE— does not use the fastSRE(count)path. The compiled inner opcode isIN_IGNORE/IN_UNI_IGNORE/IN_LOC_IGNORE, none of which has a case inSRE(count). The case-sensitiveSRE_OP_INalready has a fast case.Adding the three
IN_*_IGNOREcases toSRE(count)lets them scan inline.Benchmark
[a-z]+re.I|re.A(IN_IGNORE)[a-z]+re.I(IN_UNI_IGNORE)[aeiou]+re.I[a-z0-9]+re.I[a-z0-9_]+re.I[a-z]+re.L|re.Ibytes (IN_LOC_IGNORE)findall [a-z]+re.Ifindall [a-z_][a-z0-9_]*re.I[^0-9]+re.Iis unchanged — it has no cased members, so it stays a plainIN(already fast).benchmark script (pyperf)
Run under the unpatched and patched builds, then
python -m pyperf compare_to before.json after.json --table.Has this already been discussed elsewhere?
No response given
Links to previous discussion of this feature:
No response
Linked PRs