Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-89973: Fix re.error in the fnmatch module.
Character ranges with upper bound less that lower bound are now
interpreted as empty ranges, for compatibility with other glob
pattern implementations. Previously it was re.error.
  • Loading branch information
serhiy-storchaka committed May 22, 2022
commit ab4b759a6f24c95548f4f37238fba93f342fa6d7
23 changes: 17 additions & 6 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
Expand All @@ -115,18 +115,29 @@ def translate(pat):
i = k+1
k = k+3
chunks.append(pat[i:j])
if not chunks[-1]:
del chunks[-1]
chunks[-1] += '-'
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
if not stuff:
add(f'(?!)') # never match
elif stuff == '!':
add(f'.') # match any character
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
Expand Down
71 changes: 71 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import unittest
import os
import string
import warnings

from fnmatch import fnmatch, fnmatchcase, translate, filter
Expand Down Expand Up @@ -91,6 +92,76 @@ def test_sep(self):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')

def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
# Special cases.
for c in tescases:
check(c, '[^az]', c in '^az')
check(c, '[[az]', c in '[az')
check(c, r'[\]', c == '\\')
check(c, r'[\az]', c in r'\az')
check(c, r'[!]]', c != ']')
check('[', '[')
check('[]', '[]')
check('[!', '[!')
check('[!]', '[!]')

def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[b-d]', c in 'bcd')
check(c, '[!b-d]', c not in 'bcd')
check(c, '[b-dx-z]', c in 'bcdxyz')
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
# Special cases.
for c in tescases:
check(c, '[!-#]', c not in '-#')
check(c, '[!--/]', c not in '-./')
check(c, '[^-`]', c in '^_`')
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
check(c, '[!b-]', c not in '-b')
check(c, '[-b]', c in '-b')
check(c, '[!-b]', c not in '-b')
check(c, '[-]', c in '-')
check(c, '[!-]', c not in '-')
# Upper bound is less that lower bound: error in RE.
for c in tescases:
check(c, '[d-b]', False)
check(c, '[!d-b]', True)
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
check(c, '[d-b[-^]', c in '[\\]^')

def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains
character range with upeer bound lower than lower bound (e.g. ``[c-a]``).
Comment thread
serhiy-storchaka marked this conversation as resolved.
Outdated
Now such ranges are interpreted as empty ranges.