|
| 1 | +"""Filename globbing utility.""" |
| 2 | + |
| 3 | +import os |
| 4 | +import re |
| 5 | +import fnmatch |
| 6 | + |
| 7 | +__all__ = ["glob", "iglob", "escape"] |
| 8 | + |
| 9 | +def glob(pathname, *, recursive=False): |
| 10 | + """Return a list of paths matching a pathname pattern. |
| 11 | +
|
| 12 | + The pattern may contain simple shell-style wildcards a la |
| 13 | + fnmatch. However, unlike fnmatch, filenames starting with a |
| 14 | + dot are special cases that are not matched by '*' and '?' |
| 15 | + patterns. |
| 16 | +
|
| 17 | + If recursive is true, the pattern '**' will match any files and |
| 18 | + zero or more directories and subdirectories. |
| 19 | + """ |
| 20 | + return list(iglob(pathname, recursive=recursive)) |
| 21 | + |
| 22 | +def iglob(pathname, *, recursive=False): |
| 23 | + """Return an iterator which yields the paths matching a pathname pattern. |
| 24 | +
|
| 25 | + The pattern may contain simple shell-style wildcards a la |
| 26 | + fnmatch. However, unlike fnmatch, filenames starting with a |
| 27 | + dot are special cases that are not matched by '*' and '?' |
| 28 | + patterns. |
| 29 | +
|
| 30 | + If recursive is true, the pattern '**' will match any files and |
| 31 | + zero or more directories and subdirectories. |
| 32 | + """ |
| 33 | + it = _iglob(pathname, recursive, False) |
| 34 | + if recursive and _isrecursive(pathname): |
| 35 | + s = next(it) # skip empty string |
| 36 | + assert not s |
| 37 | + return it |
| 38 | + |
| 39 | +def _iglob(pathname, recursive, dironly): |
| 40 | + dirname, basename = os.path.split(pathname) |
| 41 | + if not has_magic(pathname): |
| 42 | + assert not dironly |
| 43 | + if basename: |
| 44 | + if os.path.lexists(pathname): |
| 45 | + yield pathname |
| 46 | + else: |
| 47 | + # Patterns ending with a slash should match only directories |
| 48 | + if os.path.isdir(dirname): |
| 49 | + yield pathname |
| 50 | + return |
| 51 | + if not dirname: |
| 52 | + if recursive and _isrecursive(basename): |
| 53 | + yield from _glob2(dirname, basename, dironly) |
| 54 | + else: |
| 55 | + yield from _glob1(dirname, basename, dironly) |
| 56 | + return |
| 57 | + # `os.path.split()` returns the argument itself as a dirname if it is a |
| 58 | + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path |
| 59 | + # contains magic characters (i.e. r'\\?\C:'). |
| 60 | + if dirname != pathname and has_magic(dirname): |
| 61 | + dirs = _iglob(dirname, recursive, True) |
| 62 | + else: |
| 63 | + dirs = [dirname] |
| 64 | + if has_magic(basename): |
| 65 | + if recursive and _isrecursive(basename): |
| 66 | + glob_in_dir = _glob2 |
| 67 | + else: |
| 68 | + glob_in_dir = _glob1 |
| 69 | + else: |
| 70 | + glob_in_dir = _glob0 |
| 71 | + for dirname in dirs: |
| 72 | + for name in glob_in_dir(dirname, basename, dironly): |
| 73 | + yield os.path.join(dirname, name) |
| 74 | + |
| 75 | +# These 2 helper functions non-recursively glob inside a literal directory. |
| 76 | +# They return a list of basenames. _glob1 accepts a pattern while _glob0 |
| 77 | +# takes a literal basename (so it only has to check for its existence). |
| 78 | + |
| 79 | +def _glob1(dirname, pattern, dironly): |
| 80 | + names = list(_iterdir(dirname, dironly)) |
| 81 | + if not _ishidden(pattern): |
| 82 | + names = (x for x in names if not _ishidden(x)) |
| 83 | + return fnmatch.filter(names, pattern) |
| 84 | + |
| 85 | +def _glob0(dirname, basename, dironly): |
| 86 | + if not basename: |
| 87 | + # `os.path.split()` returns an empty basename for paths ending with a |
| 88 | + # directory separator. 'q*x/' should match only directories. |
| 89 | + if os.path.isdir(dirname): |
| 90 | + return [basename] |
| 91 | + else: |
| 92 | + if os.path.lexists(os.path.join(dirname, basename)): |
| 93 | + return [basename] |
| 94 | + return [] |
| 95 | + |
| 96 | +# Following functions are not public but can be used by third-party code. |
| 97 | + |
| 98 | +def glob0(dirname, pattern): |
| 99 | + return _glob0(dirname, pattern, False) |
| 100 | + |
| 101 | +def glob1(dirname, pattern): |
| 102 | + return _glob1(dirname, pattern, False) |
| 103 | + |
| 104 | +# This helper function recursively yields relative pathnames inside a literal |
| 105 | +# directory. |
| 106 | + |
| 107 | +def _glob2(dirname, pattern, dironly): |
| 108 | + assert _isrecursive(pattern) |
| 109 | + yield pattern[:0] |
| 110 | + yield from _rlistdir(dirname, dironly) |
| 111 | + |
| 112 | +# If dironly is false, yields all file names inside a directory. |
| 113 | +# If dironly is true, yields only directory names. |
| 114 | +def _iterdir(dirname, dironly): |
| 115 | + if not dirname: |
| 116 | + if isinstance(dirname, bytes): |
| 117 | + dirname = bytes(os.curdir, 'ASCII') |
| 118 | + else: |
| 119 | + dirname = os.curdir |
| 120 | + try: |
| 121 | + with os.scandir(dirname) as it: |
| 122 | + for entry in it: |
| 123 | + try: |
| 124 | + if not dironly or entry.is_dir(): |
| 125 | + yield entry.name |
| 126 | + except OSError: |
| 127 | + pass |
| 128 | + except OSError: |
| 129 | + return |
| 130 | + |
| 131 | +# Recursively yields relative pathnames inside a literal directory. |
| 132 | +def _rlistdir(dirname, dironly): |
| 133 | + names = list(_iterdir(dirname, dironly)) |
| 134 | + for x in names: |
| 135 | + if not _ishidden(x): |
| 136 | + yield x |
| 137 | + path = os.path.join(dirname, x) if dirname else x |
| 138 | + for y in _rlistdir(path, dironly): |
| 139 | + yield os.path.join(x, y) |
| 140 | + |
| 141 | + |
| 142 | +magic_check = re.compile('([*?[])') |
| 143 | +magic_check_bytes = re.compile(b'([*?[])') |
| 144 | + |
| 145 | +def has_magic(s): |
| 146 | + if isinstance(s, bytes): |
| 147 | + match = magic_check_bytes.search(s) |
| 148 | + else: |
| 149 | + match = magic_check.search(s) |
| 150 | + return match is not None |
| 151 | + |
| 152 | +def _ishidden(path): |
| 153 | + return path[0] in ('.', b'.'[0]) |
| 154 | + |
| 155 | +def _isrecursive(pattern): |
| 156 | + if isinstance(pattern, bytes): |
| 157 | + return pattern == b'**' |
| 158 | + else: |
| 159 | + return pattern == '**' |
| 160 | + |
| 161 | +def escape(pathname): |
| 162 | + """Escape all special characters. |
| 163 | + """ |
| 164 | + # Escaping is done by wrapping any of "*?[" between square brackets. |
| 165 | + # Metacharacters do not work in the drive part and shouldn't be escaped. |
| 166 | + drive, pathname = os.path.splitdrive(pathname) |
| 167 | + if isinstance(pathname, bytes): |
| 168 | + pathname = magic_check_bytes.sub(br'[\1]', pathname) |
| 169 | + else: |
| 170 | + pathname = magic_check.sub(r'[\1]', pathname) |
| 171 | + return drive + pathname |
0 commit comments