diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index e3f04739e3b49d..f9e54cde10afe0 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -288,10 +288,12 @@ re -- * Character class escapes (``\d``, ``\D``, ``\s``, ``\S``, ``\w`` and ``\W``) - outside a character set are now compiled to a single ``CATEGORY`` opcode - instead of being wrapped in an ``IN`` block. This speeds up matching of - patterns such as ``\d+`` and reduces the size of the compiled byte code. - (Contributed by Serhiy Storchaka in :gh:`152033`.) + outside a character set, and character sets containing a single such escape + (such as ``[\d]`` or ``[^\s]``), are now compiled to a single ``CATEGORY`` + opcode instead of being wrapped in an ``IN`` block. This speeds up matching + of patterns such as ``\d+`` and reduces the size of the compiled byte code. + (Contributed by Serhiy Storchaka in :gh:`152033` and Pieter Eendebak in + :gh:`152056`.) module_name ----------- diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 3c41c43409534b..b8c19cd3070c4d 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -625,6 +625,12 @@ def _parse(source, state, verbose, nested, first=False): subpatternappend((NOT_LITERAL, set[0][1])) else: subpatternappend(set[0]) + elif _len(set) == 1 and set[0][0] is CATEGORY: + # optimization: a lone category like [\d] or [^\d] + if negate: + subpatternappend((CATEGORY, CH_NEGATE[set[0][1]])) + else: + subpatternappend(set[0]) else: if negate: set.insert(0, (NEGATE, None)) diff --git a/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst b/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst new file mode 100644 index 00000000000000..6e71d720cd19be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-24-10-30-00.gh-issue-152056.Qk7mZ2.rst @@ -0,0 +1,5 @@ +Optimize matching of a character set that contains a single character +category, such as ``[\d]`` or ``[^\s]``: it is now compiled to a single +``CATEGORY`` opcode, the same as the corresponding ``\d`` or ``\S`` escape, +instead of being wrapped in an ``IN`` block. This speeds up matching and +reduces the size of the compiled byte code. Patch by Pieter Eendebak.