From 5c7b7977b76efe16da0f499758b37295c5399eac Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 24 Jun 2026 08:59:13 +0200 Subject: [PATCH 1/2] Compile single-category character sets to a bare CATEGORY opcode A character set containing exactly one category, e.g. [\d] or [^\s], now compiles to a single CATEGORY opcode (like \d or \S) instead of an IN block. The negated form maps to the complementary category. This speeds up matching and reduces the size of the compiled byte code. Co-Authored-By: Claude Opus 4.8 (1M context) --- Lib/re/_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 3c41c43409534b..b8c19cd3070c4d 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -625,6 +625,12 @@ def _parse(source, state, verbose, nested, first=False): subpatternappend((NOT_LITERAL, set[0][1])) else: subpatternappend(set[0]) + elif _len(set) == 1 and set[0][0] is CATEGORY: + # optimization: a lone category like [\d] or [^\d] + if negate: + subpatternappend((CATEGORY, CH_NEGATE[set[0][1]])) + else: + subpatternappend(set[0]) else: if negate: set.insert(0, (NEGATE, None)) From f95141e8b07f0c3971994524a973b16f28c98d5d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 24 Jun 2026 13:40:52 +0300 Subject: [PATCH 2/2] Update What's New and add NEWS. --- Doc/whatsnew/3.16.rst | 10 ++++++---- .../2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst | 5 +++++ 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index e3f04739e3b49d..f9e54cde10afe0 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -288,10 +288,12 @@ re -- * Character class escapes (``\d``, ``\D``, ``\s``, ``\S``, ``\w`` and ``\W``) - outside a character set are now compiled to a single ``CATEGORY`` opcode - instead of being wrapped in an ``IN`` block. This speeds up matching of - patterns such as ``\d+`` and reduces the size of the compiled byte code. - (Contributed by Serhiy Storchaka in :gh:`152033`.) + outside a character set, and character sets containing a single such escape + (such as ``[\d]`` or ``[^\s]``), are now compiled to a single ``CATEGORY`` + opcode instead of being wrapped in an ``IN`` block. This speeds up matching + of patterns such as ``\d+`` and reduces the size of the compiled byte code. + (Contributed by Serhiy Storchaka in :gh:`152033` and Pieter Eendebak in + :gh:`152056`.) module_name ----------- diff --git a/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst b/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst new file mode 100644 index 00000000000000..6e71d720cd19be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst @@ -0,0 +1,5 @@ +Optimize matching of a character set that contains a single character +category, such as ``[\d]`` or ``[^\s]``: it is now compiled to a single +``CATEGORY`` opcode, the same as the corresponding ``\d`` or ``\S`` escape, +instead of being wrapped in an ``IN`` block. This speeds up matching and +reduces the size of the compiled byte code. Patch by Pieter Eendebak.