Skip to content

Commit 90f9edb

Browse files
Fix ValidationError when using multiple * in path pattern
- Change to `fnmatch` introduces named groups, that need to be filtered out from the `groupdict` (python/cpython#19908)
1 parent 167c6ea commit 90f9edb

2 files changed

Lines changed: 54 additions & 7 deletions

File tree

halfpipe/ingest/glob.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def tag_glob(pathname, entities=None, dironly=False) -> Generator[tuple[str, dic
2727
"""
2828
dirname, basename = op.split(pathname)
2929
if not dirname:
30-
# print(repr(dirname), repr(basename))
3130
if _isrecursive(basename):
3231
dir_generator = _rlistdir(dirname, dironly)
3332
else:
@@ -40,7 +39,6 @@ def tag_glob(pathname, entities=None, dironly=False) -> Generator[tuple[str, dic
4039
else:
4140
dirs = [(dirname, dict())]
4241
for dirname, dirtagdict in dirs:
43-
# print("40", repr(dirname), repr(dirtagdict))
4442
for name, tagdict in _tag_glob_in_dir(dirname, basename, entities, dironly, dirtagdict):
4543
yield (op.join(dirname, name), _combine_tagdict(dirtagdict, tagdict))
4644

@@ -60,13 +58,16 @@ def _tag_glob_in_dir(dirname, basename, entities, dironly, parenttagdict):
6058
adapted from cpython glob
6159
only basename can contain magic
6260
"""
63-
# print("60", repr(dirname), repr(basename), repr(entities), repr(parenttagdict))
6461
assert not has_magic(dirname)
65-
match = _translate(basename, entities, parenttagdict)
62+
fullmatch = _translate(basename, entities, parenttagdict)
6663
for x in _iterdir(dirname, dironly):
67-
matchobj = match(x)
64+
matchobj = fullmatch(x)
6865
if matchobj is not None:
69-
yield x, matchobj.groupdict()
66+
yield x, {
67+
entity: value
68+
for entity, value in matchobj.groupdict().items()
69+
if entity in entities # filter out groups added by fnmatch such as "g0"
70+
}
7071

7172

7273
def get_entities_in_path(pat):
@@ -178,7 +179,6 @@ def _rlistdir(dirname, dironly):
178179
for x in names:
179180
path = op.join(dirname, x) if dirname else x
180181
yield path
181-
# print("176", repr(dirname), repr(path))
182182
yield from _rlistdir(path, dironly)
183183

184184

halfpipe/ingest/tests/test_glob.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# -*- coding: utf-8 -*-
2+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3+
# vi: set ft=python sts=4 ts=4 sw=4 et:
4+
5+
from string import ascii_lowercase, digits
6+
from random import seed, choices
7+
8+
from ..glob import tag_glob
9+
10+
11+
def _random_string(length: int = 5) -> str:
12+
return "".join(choices(ascii_lowercase + digits, k=length))
13+
14+
15+
def test_tag_glob(tmp_path):
16+
seed(a=0x5e6128c4)
17+
18+
ground_truth: dict[str, dict[str, int]] = dict()
19+
20+
for i in range(10):
21+
for j in range(1, 3):
22+
file_path = tmp_path / f"sub-{i:02d}" / f"{_random_string()}_T1w_{_random_string()}_run-{j:02d}.txt"
23+
file_path.parent.mkdir(exist_ok=True)
24+
file_path.touch()
25+
26+
ground_truth[str(file_path)] = dict(subject=i, run=j)
27+
28+
file_path = tmp_path / f"sub-{i:02d}" / f"{_random_string()}_T2w_{_random_string()}_run-{j:02d}.txt"
29+
file_path.parent.mkdir(exist_ok=True)
30+
file_path.touch()
31+
32+
path_pattern = str(tmp_path / "sub-{subject}" / "*_T1w_*_run-{run}.txt")
33+
34+
matched_file_paths = set()
35+
36+
for file_path, tag_dict in tag_glob(path_pattern, entities=["subject", "run"]):
37+
assert file_path in ground_truth
38+
39+
assert set(ground_truth[file_path].keys()) == set(tag_dict.keys())
40+
41+
a = set(ground_truth[file_path].items())
42+
b = set((entity, int(value)) for entity, value in tag_dict.items())
43+
assert a == b
44+
45+
matched_file_paths.add(file_path)
46+
47+
assert set(ground_truth.keys()) == matched_file_paths

0 commit comments

Comments
 (0)