Skip to content

Commit 6f65ceb

Browse files
author
jlgijsbers
committed
Patch #943206:
`glob.glob()` currently calls itself recursively to build a list of matches of the dirname part of the pattern and then filters by the basename part. This is effectively BFS. ``glob.glob('*/*/*/*/*/foo')`` will build a huge list of all directories 5 levels deep even if only a handful of them contain a ``foo`` entry. A generator-based recusion would never have to store these list at once by implementing DFS. This patch converts the `glob` function to an `iglob` recursive generator . `glob()` now just returns ``list(iglob(pattern))``. I also cleaned up the code a bit (reduced duplicate `has_magic()` checks and created a second `glob0` helper func so that the main loop need not be duplicated). Thanks to Cherniavsky Beni for the patch! git-svn-id: http://svn.python.org/projects/python/trunk@38236 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 915795f commit 6f65ceb

3 files changed

Lines changed: 50 additions & 24 deletions

File tree

Doc/lib/libglob.tex

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,20 @@ \section{\module{glob} ---
1616
\index{filenames!pathname expansion}
1717

1818
\begin{funcdesc}{glob}{pathname}
19-
Returns a possibly-empty list of path names that match \var{pathname},
19+
Return a possibly-empty list of path names that match \var{pathname},
2020
which must be a string containing a path specification.
2121
\var{pathname} can be either absolute (like
2222
\file{/usr/src/Python-1.5/Makefile}) or relative (like
2323
\file{../../Tools/*/*.gif}), and can contain shell-style wildcards.
2424
Broken symlinks are included in the results (as in the shell).
2525
\end{funcdesc}
2626

27+
\begin{funcdesc}{iglob}{pathname}
28+
Return an iterator which yields the same values as \function{glob()}
29+
without actually storing them all simultaneously.
30+
\versionadded{2.5}
31+
\end{funcdesc}
32+
2733
For example, consider a directory containing only the following files:
2834
\file{1.gif}, \file{2.txt}, and \file{card.gif}. \function{glob()}
2935
will produce the following results. Notice how any leading components

Lib/glob.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,43 +4,50 @@
44
import fnmatch
55
import re
66

7-
__all__ = ["glob"]
7+
__all__ = ["glob", "iglob"]
88

99
def glob(pathname):
1010
"""Return a list of paths matching a pathname pattern.
1111
1212
The pattern may contain simple shell-style wildcards a la fnmatch.
1313
14+
"""
15+
return list(iglob(pathname))
16+
17+
def iglob(pathname):
18+
"""Return a list of paths matching a pathname pattern.
19+
20+
The pattern may contain simple shell-style wildcards a la fnmatch.
21+
1422
"""
1523
if not has_magic(pathname):
1624
if os.path.lexists(pathname):
17-
return [pathname]
18-
else:
19-
return []
25+
yield pathname
26+
return
2027
dirname, basename = os.path.split(pathname)
2128
if not dirname:
22-
return glob1(os.curdir, basename)
23-
elif has_magic(dirname):
24-
list = glob(dirname)
29+
for name in glob1(os.curdir, basename):
30+
yield name
31+
return
32+
if has_magic(dirname):
33+
dirs = iglob(dirname)
2534
else:
26-
list = [dirname]
27-
if not has_magic(basename):
28-
result = []
29-
for dirname in list:
30-
if basename or os.path.isdir(dirname):
31-
name = os.path.join(dirname, basename)
32-
if os.path.lexists(name):
33-
result.append(name)
35+
dirs = [dirname]
36+
if has_magic(basename):
37+
glob_in_dir = glob1
3438
else:
35-
result = []
36-
for dirname in list:
37-
sublist = glob1(dirname, basename)
38-
for name in sublist:
39-
result.append(os.path.join(dirname, name))
40-
return result
39+
glob_in_dir = glob0
40+
for dirname in dirs:
41+
for name in glob_in_dir(dirname, basename):
42+
yield os.path.join(dirname, name)
43+
44+
# These 2 helper functions non-recursively glob inside a literal directory.
45+
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
46+
# takes a literal basename (so it only has to check for its existence).
4147

4248
def glob1(dirname, pattern):
43-
if not dirname: dirname = os.curdir
49+
if not dirname:
50+
dirname = os.curdir
4451
try:
4552
names = os.listdir(dirname)
4653
except os.error:
@@ -49,6 +56,17 @@ def glob1(dirname, pattern):
4956
names=filter(lambda x: x[0]!='.',names)
5057
return fnmatch.filter(names,pattern)
5158

59+
def glob0(dirname, basename):
60+
if basename == '':
61+
# `os.path.split()` returns an empty basename for paths ending with a
62+
# directory separator. 'q*x/' should match only directories.
63+
if os.isdir(dirname):
64+
return [basename]
65+
else:
66+
if os.path.lexists(os.path.join(dirname, basename)):
67+
return [basename]
68+
return []
69+
5270

5371
magic_check = re.compile('[*?[]')
5472

Lib/test/test_glob.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ def glob(self, *parts):
6161
else:
6262
pattern = os.path.join(*parts)
6363
p = os.path.join(self.tempdir, pattern)
64-
return glob.glob(p)
64+
res = glob.glob(p)
65+
self.assertEqual(list(glob.iglob(p)), res)
66+
return res
6567

6668
def assertSequencesEqual_noorder(self, l1, l2):
6769
self.assertEqual(set(l1), set(l2))

0 commit comments

Comments
 (0)