From 16a01b85ee778077a2f7a53d3dc488717e87a8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Mon, 9 Jul 2012 12:16:24 +0200 Subject: [PATCH 01/35] Expose the whole glob module in the root namespace. --- src/glob2/impl.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/glob2/impl.py b/src/glob2/impl.py index 00828da..da4fc25 100644 --- a/src/glob2/impl.py +++ b/src/glob2/impl.py @@ -6,9 +6,6 @@ import fnmatch -__all__ = ["glob", "iglob"] - - def glob(pathname, with_matches=False): """Return a list of paths matching a pathname pattern. From 320d40c4f1589bd76596c60472d7637c5a2f5aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 12:54:12 +0200 Subject: [PATCH 02/35] Renamed BRANCH_TODO to LOCAL_TODO. --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 619a31d..72639fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ *.pyc -/BRANCH_TODO +/LOCAL_TODO # distutils/setuptools /dist/ @@ -7,4 +7,4 @@ # IDEs *.wpr -/.idea/ \ No newline at end of file +/.idea/ From 40c1e22fd0ae18c45ca1904c494839a561759e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 12:55:01 +0200 Subject: [PATCH 03/35] Added PyCharm project files. --- .idea/encodings.xml | 5 +++++ .idea/modules.xml | 9 +++++++++ .idea/python-glob2.iml | 13 +++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 .idea/encodings.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/python-glob2.iml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..e206d70 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..7a498d8 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/python-glob2.iml b/.idea/python-glob2.iml new file mode 100644 index 0000000..aaa9d00 --- /dev/null +++ b/.idea/python-glob2.iml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + From ea4c11aea9f498ca631ed19dee4829404f9da2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 14:28:53 +0200 Subject: [PATCH 04/35] Provide to allow replacing filesystem dependency. By moving the implementation to a class, and making it possible to provide custom implementations for the filesystem functions that are used, it is now possible to use the globbing code on virtual filesystem implementations. This is now clearly moving away from the original glob module, and as such, I took the liberty to refactor a bit, change some of the internal names used, and to add comments. The public interface, the glob.glob and glob.iglob functions, remains unchanged. --- src/glob2/impl.py | 225 +++++++++++++++++++++++++--------------------- 1 file changed, 125 insertions(+), 100 deletions(-) diff --git a/src/glob2/impl.py b/src/glob2/impl.py index da4fc25..b54ec41 100644 --- a/src/glob2/impl.py +++ b/src/glob2/impl.py @@ -6,107 +6,132 @@ import fnmatch -def glob(pathname, with_matches=False): - """Return a list of paths matching a pathname pattern. - - The pattern may contain simple shell-style wildcards a la fnmatch. - - """ - return list(iglob(pathname, with_matches)) - - -def iglob(pathname, with_matches=False): - """Return an iterator which yields the paths matching a pathname - pattern. - - The pattern may contain simple shell-style wildcards a la fnmatch. - - If ``with_matches`` is True, then for each matching path - a 2-tuple will be returned; the second element if the tuple - will be a list of the parts of the path that matched the individual - wildcards. - """ - result = iglob_internal(pathname) - if with_matches: - return result - return map(lambda s: s[0], result) - - - -def iglob_internal(pathname, _root=True): - """ - ``_root`` is required to differentiate between the user's call to - iglob(), and subsequent recursive calls, for the purposes of resolving - certain special cases of ** wildcards. Specifically, "**" is supposed - to include the current directory for purposes of globbing, but the - directory itself should never be returned. So if ** is the lastmost - part of the ``pathname`` given the user to the root call, we want to - ignore the current directory. For this, we need to know which the root - call is. - """ - if not has_magic(pathname): - if os.path.lexists(pathname): - yield pathname, () - return - dirname, basename = os.path.split(pathname) - if not dirname: - for name, groups in glob1(os.curdir, basename, not _root): - yield name, groups - return - if has_magic(dirname): - dirs = iglob_internal(dirname, _root=False) - else: - dirs = [(dirname, ())] - if has_magic(basename): - glob_in_dir = lambda dir, pat: glob1(dir, pat, not _root) - else: - glob_in_dir = glob0 - for dirname, dir_groups in dirs: - for name, groups in glob_in_dir(dirname, basename): - yield os.path.join(dirname, name), dir_groups + groups - -# These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. `glob1` accepts a pattern while `glob0` -# takes a literal basename (so it only has to check for its existence). - -def glob1(dirname, pattern, include_root): - if not dirname: - dirname = os.curdir - if isinstance(pattern, unicode) and not isinstance(dirname, unicode): - dirname = unicode(dirname, sys.getfilesystemencoding() or - sys.getdefaultencoding()) - try: - if pattern == '**': - # Include the current directory in **, if asked; by adding - # an empty string as opposed to '.', be spare ourselves - # having to deal with os.path.normpath() later. - names = [''] if include_root else [] - for top, dirs, files in os.walk(dirname): - _mkabs = lambda s: os.path.join(top[len(dirname)+1:], s) - names.extend(map(_mkabs, dirs)) - names.extend(map(_mkabs, files)) - # Reset pattern so that fnmatch(), which does not understand - # ** specifically, will only return a single group match. - pattern = '*' +class Globber(object): + + listdir = staticmethod(os.listdir) + walk = staticmethod(os.walk) + exists = staticmethod(os.path.lexists) + + def glob(self, pathname, with_matches=False): + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la fnmatch. + + """ + return list(self.iglob(pathname, with_matches)) + + def iglob(self, pathname, with_matches=False): + """Return an iterator which yields the paths matching a pathname + pattern. + + The pattern may contain simple shell-style wildcards a la fnmatch. + + If ``with_matches`` is True, then for each matching path + a 2-tuple will be returned; the second element if the tuple + will be a list of the parts of the path that matched the individual + wildcards. + """ + result = self._iglob(pathname) + if with_matches: + return result + return map(lambda s: s[0], result) + + def _iglob(self, pathname, rootcall=True): + """Internal implementation that backs :meth:`iglob`. + + ``rootcall`` is required to differentiate between the user's call to + iglob(), and subsequent recursive calls, for the purposes of resolving + certain special cases of ** wildcards. Specifically, "**" is supposed + to include the current directory for purposes of globbing, but the + directory itself should never be returned. So if ** is the lastmost + part of the ``pathname`` given the user to the root call, we want to + ignore the current directory. For this, we need to know which the root + call is. + """ + + # Short-circuit if no glob magic + if not has_magic(pathname): + if self.exists(pathname): + yield pathname, () + return + + # If no directory part is left, assume the working directory + dirname, basename = os.path.split(pathname) + + # If the directory is globbed, recurse to resolve. + # If at this point there is no directory part left, we simply + # continue with dirname="", which will search the current dir. + if dirname and has_magic(dirname): + # Note that this may return files, which will be ignored + # later when we try to use them as directories. + # Prefiltering them here would only require more IO ops. + dirs = self._iglob(dirname, rootcall=False) else: - names = os.listdir(dirname) - except os.error: - return [] - if pattern[0] != '.': - # Do not filter out the '' that we might have added earlier - names = filter(lambda x: not x or x[0] != '.', names) - return fnmatch.filter(names, pattern) - -def glob0(dirname, basename): - if basename == '': - # `os.path.split()` returns an empty basename for paths ending with a - # directory separator. 'q*x/' should match only directories. - if os.path.isdir(dirname): - return [(basename, ())] - else: - if os.path.lexists(os.path.join(dirname, basename)): - return [(basename, ())] - return [] + dirs = [(dirname, ())] + + # Resolve ``basename`` expr for every directory found + for dirname, dir_groups in dirs: + for name, groups in self.resolve_pattern( + dirname, basename, not rootcall): + yield os.path.join(dirname, name), dir_groups + groups + + def resolve_pattern(self, dirname, pattern, globstar_with_root): + """Apply ``pattern`` (contains no path elements) to the + literal directory`` in dirname``. + + If pattern=='', this will filter for directories. This is + a special case that happens when the user's glob expression ends + with a slash (in which case we only want directories). It simpler + and faster to filter here than in :meth:`_iglob`. + """ + + if isinstance(pattern, unicode) and not isinstance(dirname, unicode): + dirname = unicode(dirname, sys.getfilesystemencoding() or + sys.getdefaultencoding()) + + # If no magic, short-circuit, only check for existence + if not has_magic(pattern): + if pattern == '': + if os.path.isdir(dirname): + return [(pattern, ())] + else: + if self.exists(os.path.join(dirname, pattern)): + return [(pattern, ())] + return [] + + if not dirname: + dirname = os.curdir + + try: + if pattern == '**': + # Include the current directory in **, if asked; by adding + # an empty string as opposed to '.', be spare ourselves + # having to deal with os.path.normpath() later. + names = [''] if globstar_with_root else [] + for top, dirs, files in self.walk(dirname): + _mkabs = lambda s: os.path.join(top[len(dirname)+1:], s) + names.extend(map(_mkabs, dirs)) + names.extend(map(_mkabs, files)) + # Reset pattern so that fnmatch(), which does not understand + # ** specifically, will only return a single group match. + pattern = '*' + else: + names = self.listdir(dirname) + except os.error: + return [] + + if pattern[0] != '.': + # Remove hidden files by default, but take care to ensure + # that the empty string we may have added earlier remains. + # Do not filter out the '' that we might have added earlier + names = filter(lambda x: not x or x[0] != '.', names) + return fnmatch.filter(names, pattern) + + +default_globber = Globber() +glob = default_globber.glob +iglob = default_globber.iglob +del default_globber magic_check = re.compile('[*?[]') From 839b040337a9fd1183a0cebc756f6e49a4ab190d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 22:56:05 +0200 Subject: [PATCH 05/35] Providing a custom os.walk implementation. By using the other filesystem APIs already wrapped, a subclass does not need implement walk() itself. Also fixed os.path.isdir() not being wrapped. --- src/glob2/impl.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/glob2/impl.py b/src/glob2/impl.py index b54ec41..e9e6e10 100644 --- a/src/glob2/impl.py +++ b/src/glob2/impl.py @@ -9,9 +9,34 @@ class Globber(object): listdir = staticmethod(os.listdir) - walk = staticmethod(os.walk) + isdir = staticmethod(os.path.isdir) + islink = staticmethod(os.path.islink) exists = staticmethod(os.path.lexists) + def walk(self, top, followlinks=False): + """A simplified version of os.walk (code copied) that uses + ``self.listdir``, and the other local filesystem methods. + + Because we don't care about file/directory distinctions, only + a single list is returned. + """ + try: + names = self.listdir(top) + except os.error, err: + return + + items = [] + for name in names: + items.append(name) + + yield top, items + + for name in items: + new_path = os.path.join(top, name) + if followlinks or not self.islink(new_path): + for x in self.walk(new_path, followlinks): + yield x + def glob(self, pathname, with_matches=False): """Return a list of paths matching a pathname pattern. @@ -92,7 +117,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root): # If no magic, short-circuit, only check for existence if not has_magic(pattern): if pattern == '': - if os.path.isdir(dirname): + if self.isdir(dirname): return [(pattern, ())] else: if self.exists(os.path.join(dirname, pattern)): @@ -108,10 +133,9 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root): # an empty string as opposed to '.', be spare ourselves # having to deal with os.path.normpath() later. names = [''] if globstar_with_root else [] - for top, dirs, files in self.walk(dirname): + for top, entries in self.walk(dirname): _mkabs = lambda s: os.path.join(top[len(dirname)+1:], s) - names.extend(map(_mkabs, dirs)) - names.extend(map(_mkabs, files)) + names.extend(map(_mkabs, entries)) # Reset pattern so that fnmatch(), which does not understand # ** specifically, will only return a single group match. pattern = '*' From 1925917e2b83445a2402e9ea56e8e57f76ae7da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 23:14:55 +0200 Subject: [PATCH 06/35] Document subclassing the globber. --- README.rst | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ebe0a7c..139f3a9 100644 --- a/README.rst +++ b/README.rst @@ -10,9 +10,12 @@ This is an extended version of Python's builtin glob module - A recursive '**' globbing syntax, akin for example to the ``globstar`` option of the bash shell. +- The ability to replace the filesystem functions used, in order to glob + on virtual filesystems. -Example -------- + +Examples +-------- Matches being returned: @@ -40,3 +43,39 @@ desired effect. ``**`` will match ".", so ``**/*.py`` returns Python files in the current directory. If this is not wanted, ``*/**/*.py`` should be used instead. + + +Custom Globber: + +:: + + from glob2 import Globber + + class VirtualStorageGlobber(Globber): + def __init__(self, storage): + self.storage = storage + def listdir(self, path): + # Must raise os.error if path is not a directory + return self.storage.listdir(path) + def exists(self, path): + return self.storage.exists(path) + def isdir(self, path): + # Used only for trailing slash syntax (``foo/``). + return self.storage.isdir(path) + def islink(self, path): + # Used only for recursive glob (``**``). + return self.storage.islink(path) + + globber = VirtualStorageGlobber(sftp_storage) + globber.glob('/var/www/**/*.js') + + +If ``isdir`` and/or ``islink`` cannot be implemented for a storage, you can +make them return a fixed value, with the following consequences: + + - If ``isdir`` returns ``True``, a glob expression ending with a slash + will return all items, even non-directories, if it returns ``False``, + the same glob expression will return nothing. + + - Return ``islink`` ``True``, the recursive globbing syntax ** will + follow all links. If you return ``False``, it will not work at all. From 61cc98f260befe679d83aeb2948ed57bcc681893 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 23:16:09 +0200 Subject: [PATCH 07/35] Improve README formatting. --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 139f3a9..320b894 100644 --- a/README.rst +++ b/README.rst @@ -18,6 +18,7 @@ Examples -------- Matches being returned: +~~~~~~~~~~~~~~~~~~~~~~~ :: @@ -28,6 +29,7 @@ Matches being returned: Recursive glob: +~~~~~~~~~~~~~~~ :: @@ -46,6 +48,7 @@ instead. Custom Globber: +~~~~~~~~~~~~~~~ :: From d28e36a8b1e2a13b023e34c9101047c0d5fe3782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Tue, 10 Jul 2012 23:17:31 +0200 Subject: [PATCH 08/35] Fix ReST link syntax. --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 320b894..8d0b5a2 100644 --- a/README.rst +++ b/README.rst @@ -76,9 +76,9 @@ Custom Globber: If ``isdir`` and/or ``islink`` cannot be implemented for a storage, you can make them return a fixed value, with the following consequences: - - If ``isdir`` returns ``True``, a glob expression ending with a slash - will return all items, even non-directories, if it returns ``False``, - the same glob expression will return nothing. +- If ``isdir`` returns ``True``, a glob expression ending with a slash + will return all items, even non-directories, if it returns ``False``, + the same glob expression will return nothing. - - Return ``islink`` ``True``, the recursive globbing syntax ** will - follow all links. If you return ``False``, it will not work at all. +- Return ``islink`` ``True``, the recursive globbing syntax ** will + follow all links. If you return ``False``, it will not work at all. From efb3ce574855121ec47d1d87ff9db1a6eec97dbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 21:00:56 +0200 Subject: [PATCH 09/35] Add original files from Python 3.3.1. --- src/glob2/fnmatch.py | 45 +++++++++++++++++++------------------------- src/glob2/glob.py | 42 ++++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/glob2/fnmatch.py b/src/glob2/fnmatch.py index ffe99b5..6330b0c 100644 --- a/src/glob2/fnmatch.py +++ b/src/glob2/fnmatch.py @@ -9,18 +9,13 @@ The function translate(PATTERN) returns a regular expression corresponding to PATTERN. (It does not compile it.) """ - +import os +import posixpath import re +import functools __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -_cache = {} -_MAXCACHE = 100 - -def _purge(): - """Clear the pattern cache""" - _cache.clear() - def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -36,23 +31,25 @@ def fnmatch(name, pat): if the operating system requires it. If you don't want this, use fnmatchcase(FILENAME, PATTERN). """ - - import os name = os.path.normcase(name) pat = os.path.normcase(pat) return fnmatchcase(name, pat) -def filter(names, pat): - """Return the subset of the list NAMES that match PAT""" - import os,posixpath - result=[] - pat=os.path.normcase(pat) - if not pat in _cache: +@functools.lru_cache(maxsize=256, typed=True) +def _compile_pattern(pat): + if isinstance(pat, bytes): + pat_str = str(pat, 'ISO-8859-1') + res_str = translate(pat_str) + res = bytes(res_str, 'ISO-8859-1') + else: res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re.compile(res) - match=_cache[pat].match + return re.compile(res).match + +def filter(names, pat): + """Return the subset of the list NAMES that match PAT.""" + result = [] + pat = os.path.normcase(pat) + match = _compile_pattern(pat) if os.path is posixpath: # normcase on posix is NOP. Optimize it away from the loop. for name in names: @@ -70,13 +67,9 @@ def fnmatchcase(name, pat): This is a version of fnmatch() which doesn't case-normalize its arguments. """ + match = _compile_pattern(pat) + return match(name) is not None - if not pat in _cache: - res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re.compile(res) - return _cache[pat].match(name) is not None def translate(pat): """Translate a shell PATTERN to a regular expression. diff --git a/src/glob2/glob.py b/src/glob2/glob.py index 04364be..1f60265 100644 --- a/src/glob2/glob.py +++ b/src/glob2/glob.py @@ -1,6 +1,5 @@ """Filename globbing utility.""" -import sys import os import re import fnmatch @@ -10,7 +9,10 @@ def glob(pathname): """Return a list of paths matching a pathname pattern. - The pattern may contain simple shell-style wildcards a la fnmatch. + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. """ return list(iglob(pathname)) @@ -18,7 +20,10 @@ def glob(pathname): def iglob(pathname): """Return an iterator which yields the paths matching a pathname pattern. - The pattern may contain simple shell-style wildcards a la fnmatch. + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. """ if not has_magic(pathname): @@ -27,10 +32,13 @@ def iglob(pathname): return dirname, basename = os.path.split(pathname) if not dirname: - for name in glob1(os.curdir, basename): + for name in glob1(None, basename): yield name return - if has_magic(dirname): + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): dirs = iglob(dirname) else: dirs = [dirname] @@ -48,20 +56,20 @@ def iglob(pathname): def glob1(dirname, pattern): if not dirname: - dirname = os.curdir - if isinstance(pattern, unicode) and not isinstance(dirname, unicode): - dirname = unicode(dirname, sys.getfilesystemencoding() or - sys.getdefaultencoding()) + if isinstance(pattern, bytes): + dirname = bytes(os.curdir, 'ASCII') + else: + dirname = os.curdir try: names = os.listdir(dirname) except os.error: return [] - if pattern[0] != '.': - names = filter(lambda x: x[0] != '.', names) + if not _ishidden(pattern): + names = [x for x in names if not _ishidden(x)] return fnmatch.filter(names, pattern) def glob0(dirname, basename): - if basename == '': + if not basename: # `os.path.split()` returns an empty basename for paths ending with a # directory separator. 'q*x/' should match only directories. if os.path.isdir(dirname): @@ -73,6 +81,14 @@ def glob0(dirname, basename): magic_check = re.compile('[*?[]') +magic_check_bytes = re.compile(b'[*?[]') def has_magic(s): - return magic_check.search(s) is not None + if isinstance(s, bytes): + match = magic_check_bytes.search(s) + else: + match = magic_check.search(s) + return match is not None + +def _ishidden(path): + return path[0] in ('.', b'.'[0]) From 687667507d77d2d5fdef491daf09efa25fc0d7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 21:47:48 +0200 Subject: [PATCH 10/35] Make work on Python 3.3 and 2.x. Before this, we had a mix of Python 2 (the original) with the version from Python 3 merged in. --- src/glob2/__init__.py | 3 +- src/glob2/compat.py | 333 ++++++++++++++++++++++++++++++++++++++++++ src/glob2/fnmatch.py | 11 +- src/glob2/impl.py | 9 +- test.py | 2 +- 5 files changed, 349 insertions(+), 9 deletions(-) create mode 100644 src/glob2/compat.py diff --git a/src/glob2/__init__.py b/src/glob2/__init__.py index 83dcfe1..55bf09f 100644 --- a/src/glob2/__init__.py +++ b/src/glob2/__init__.py @@ -1,4 +1,5 @@ -from impl import * +from __future__ import absolute_import +from .impl import * __version__ = (0, 3) diff --git a/src/glob2/compat.py b/src/glob2/compat.py new file mode 100644 index 0000000..2606516 --- /dev/null +++ b/src/glob2/compat.py @@ -0,0 +1,333 @@ +# Back-port functools.lru_cache to Python 2 (and <= 3.2) +# {{{ http://code.activestate.com/recipes/578078/ (r6) + +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + +class _HashedSeq(list): + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + +def _make_key(args, kwds, typed, + kwd_mark = (object(),), + fasttypes = {int, str, frozenset, type(None)}, + sorted=sorted, tuple=tuple, type=type, len=len): + 'Make a cache key from optionally typed positional and keyword arguments' + key = args + if kwds: + sorted_items = sorted(kwds.items()) + key += kwd_mark + for item in sorted_items: + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for k, v in sorted_items) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return _HashedSeq(key) + +def lru_cache(maxsize=100, typed=False): + """Least-recently-used cache decorator. + + If *maxsize* is set to None, the LRU features are disabled and the cache + can grow without bound. + + If *typed* is True, arguments of different types will be cached separately. + For example, f(3.0) and f(3) will be treated as distinct calls with + distinct results. + + Arguments to the cached function must be hashable. + + View the cache statistics named tuple (hits, misses, maxsize, currsize) with + f.cache_info(). Clear the cache and statistics with f.cache_clear(). + Access the underlying function with f.__wrapped__. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + + # Users should only access the lru_cache through its public API: + # cache_info, cache_clear, and f.__wrapped__ + # The internals of the lru_cache are encapsulated for thread safety and + # to allow the implementation to change (including a possible C version). + + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + make_key = _make_key + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + if maxsize == 0: + + def wrapper(*args, **kwds): + # no caching, just do a statistics update after a successful call + result = user_function(*args, **kwds) + stats[MISSES] += 1 + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # simple caching without ordering or size limit + key = make_key(args, kwds, typed) + result = cache_get(key, root) # root used here as a unique not-found sentinel + if result is not root: + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + cache[key] = result + stats[MISSES] += 1 + return result + + else: + + def wrapper(*args, **kwds): + # size limited caching that tracks accesses by recency + key = make_key(args, kwds, typed) if kwds or typed else args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + oldvalue = root[RESULT] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function +## end of http://code.activestate.com/recipes/578078/ }}} +## {{{ http://code.activestate.com/recipes/578078/ (r6) +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + +class _HashedSeq(list): + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + +def _make_key(args, kwds, typed, + kwd_mark = (object(),), + fasttypes = {int, str, frozenset, type(None)}, + sorted=sorted, tuple=tuple, type=type, len=len): + 'Make a cache key from optionally typed positional and keyword arguments' + key = args + if kwds: + sorted_items = sorted(kwds.items()) + key += kwd_mark + for item in sorted_items: + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for k, v in sorted_items) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return _HashedSeq(key) + +def lru_cache(maxsize=100, typed=False): + """Least-recently-used cache decorator. + + If *maxsize* is set to None, the LRU features are disabled and the cache + can grow without bound. + + If *typed* is True, arguments of different types will be cached separately. + For example, f(3.0) and f(3) will be treated as distinct calls with + distinct results. + + Arguments to the cached function must be hashable. + + View the cache statistics named tuple (hits, misses, maxsize, currsize) with + f.cache_info(). Clear the cache and statistics with f.cache_clear(). + Access the underlying function with f.__wrapped__. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + + # Users should only access the lru_cache through its public API: + # cache_info, cache_clear, and f.__wrapped__ + # The internals of the lru_cache are encapsulated for thread safety and + # to allow the implementation to change (including a possible C version). + + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + make_key = _make_key + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + if maxsize == 0: + + def wrapper(*args, **kwds): + # no caching, just do a statistics update after a successful call + result = user_function(*args, **kwds) + stats[MISSES] += 1 + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # simple caching without ordering or size limit + key = make_key(args, kwds, typed) + result = cache_get(key, root) # root used here as a unique not-found sentinel + if result is not root: + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + cache[key] = result + stats[MISSES] += 1 + return result + + else: + + def wrapper(*args, **kwds): + # size limited caching that tracks accesses by recency + key = make_key(args, kwds, typed) if kwds or typed else args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + oldvalue = root[RESULT] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function \ No newline at end of file diff --git a/src/glob2/fnmatch.py b/src/glob2/fnmatch.py index 4ffb11a..47db550 100644 --- a/src/glob2/fnmatch.py +++ b/src/glob2/fnmatch.py @@ -12,7 +12,10 @@ import os import posixpath import re -import functools +try: + from functools import lru_cache +except ImportError: + from .compat import lru_cache __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] @@ -35,12 +38,12 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) -@functools.lru_cache(maxsize=256, typed=True) +lru_cache(maxsize=256, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): - pat_str = str(pat, 'ISO-8859-1') + pat_str = pat.decode('ISO-8859-1') res_str = translate(pat_str) - res = bytes(res_str, 'ISO-8859-1') + res = res_str.encode('ISO-8859-1') else: res = translate(pat) return re.compile(res).match diff --git a/src/glob2/impl.py b/src/glob2/impl.py index 78fe9ec..0186451 100644 --- a/src/glob2/impl.py +++ b/src/glob2/impl.py @@ -1,8 +1,11 @@ """Filename globbing utility.""" +from __future__ import absolute_import + +import sys import os import re -import fnmatch +from . import fnmatch class Globber(object): @@ -21,7 +24,7 @@ def walk(self, top, followlinks=False): """ try: names = self.listdir(top) - except os.error, err: + except os.error as err: return items = [] @@ -118,7 +121,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root): and faster to filter here than in :meth:`_iglob`. """ - if PY3: + if sys.version_info[0] == 3: if isinstance(pattern, bytes): dirname = bytes(os.curdir, 'ASCII') else: diff --git a/test.py b/test.py index f06e7bd..14aac0a 100644 --- a/test.py +++ b/test.py @@ -50,7 +50,7 @@ class BaseTest(object): def setup(self): self.basedir = tempfile.mkdtemp() - self._old_cwd = os.getcwdu() + self._old_cwd = os.getcwd() os.chdir(self.basedir) self.setup_files() From 28582f2302f7010ed2e61eb61ba63156b670aa3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 21:49:51 +0200 Subject: [PATCH 11/35] Promote Python 3 compatibility. --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 8d0b5a2..dca85bc 100644 --- a/README.rst +++ b/README.rst @@ -13,6 +13,8 @@ This is an extended version of Python's builtin glob module - The ability to replace the filesystem functions used, in order to glob on virtual filesystems. +- Compatible with Python 2 and Python 3 (tested with 3.3). + Examples -------- From a6ce4b1add0bcc664240cd63b27e460194e27c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 21:57:02 +0200 Subject: [PATCH 12/35] Increment version number for new release. --- src/glob2/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glob2/__init__.py b/src/glob2/__init__.py index 55bf09f..29986bf 100644 --- a/src/glob2/__init__.py +++ b/src/glob2/__init__.py @@ -2,4 +2,4 @@ from .impl import * -__version__ = (0, 3) +__version__ = (0, 4) From a90d18e04e1b12fef85ac23efccdab17aa161072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 22:07:21 +0200 Subject: [PATCH 13/35] Declare Python 3 support. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d865004..7be53a3 100755 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', + 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries', ], packages = find_packages('src'), From 88142054d69e33f0e29687523006840857c27a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 8 May 2013 22:13:45 +0200 Subject: [PATCH 14/35] In readme, specify which glob code we're based on. --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index dca85bc..4988180 100644 --- a/README.rst +++ b/README.rst @@ -15,6 +15,8 @@ This is an extended version of Python's builtin glob module - Compatible with Python 2 and Python 3 (tested with 3.3). +It's currently based on the glob code from Python 3.3.1. + Examples -------- From 9df5ad54bee5306d5c550b0f76cb9555aebacf63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Mon, 13 May 2013 14:48:24 +0200 Subject: [PATCH 15/35] Bring back 2.6 compatibility. Also, the compat module had the lru_cache recipe copied in twice. --- src/glob2/compat.py | 168 +------------------------------------------- 1 file changed, 1 insertion(+), 167 deletions(-) diff --git a/src/glob2/compat.py b/src/glob2/compat.py index 2606516..b4df988 100644 --- a/src/glob2/compat.py +++ b/src/glob2/compat.py @@ -19,173 +19,7 @@ def __hash__(self): def _make_key(args, kwds, typed, kwd_mark = (object(),), - fasttypes = {int, str, frozenset, type(None)}, - sorted=sorted, tuple=tuple, type=type, len=len): - 'Make a cache key from optionally typed positional and keyword arguments' - key = args - if kwds: - sorted_items = sorted(kwds.items()) - key += kwd_mark - for item in sorted_items: - key += item - if typed: - key += tuple(type(v) for v in args) - if kwds: - key += tuple(type(v) for k, v in sorted_items) - elif len(key) == 1 and type(key[0]) in fasttypes: - return key[0] - return _HashedSeq(key) - -def lru_cache(maxsize=100, typed=False): - """Least-recently-used cache decorator. - - If *maxsize* is set to None, the LRU features are disabled and the cache - can grow without bound. - - If *typed* is True, arguments of different types will be cached separately. - For example, f(3.0) and f(3) will be treated as distinct calls with - distinct results. - - Arguments to the cached function must be hashable. - - View the cache statistics named tuple (hits, misses, maxsize, currsize) with - f.cache_info(). Clear the cache and statistics with f.cache_clear(). - Access the underlying function with f.__wrapped__. - - See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used - - """ - - # Users should only access the lru_cache through its public API: - # cache_info, cache_clear, and f.__wrapped__ - # The internals of the lru_cache are encapsulated for thread safety and - # to allow the implementation to change (including a possible C version). - - def decorating_function(user_function): - - cache = dict() - stats = [0, 0] # make statistics updateable non-locally - HITS, MISSES = 0, 1 # names for the stats fields - make_key = _make_key - cache_get = cache.get # bound method to lookup key or return None - _len = len # localize the global len() function - lock = RLock() # because linkedlist updates aren't threadsafe - root = [] # root of the circular doubly linked list - root[:] = [root, root, None, None] # initialize by pointing to self - nonlocal_root = [root] # make updateable non-locally - PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields - - if maxsize == 0: - - def wrapper(*args, **kwds): - # no caching, just do a statistics update after a successful call - result = user_function(*args, **kwds) - stats[MISSES] += 1 - return result - - elif maxsize is None: - - def wrapper(*args, **kwds): - # simple caching without ordering or size limit - key = make_key(args, kwds, typed) - result = cache_get(key, root) # root used here as a unique not-found sentinel - if result is not root: - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - cache[key] = result - stats[MISSES] += 1 - return result - - else: - - def wrapper(*args, **kwds): - # size limited caching that tracks accesses by recency - key = make_key(args, kwds, typed) if kwds or typed else args - with lock: - link = cache_get(key) - if link is not None: - # record recent use of the key by moving it to the front of the list - root, = nonlocal_root - link_prev, link_next, key, result = link - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - last = root[PREV] - last[NEXT] = root[PREV] = link - link[PREV] = last - link[NEXT] = root - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - with lock: - root, = nonlocal_root - if key in cache: - # getting here means that this same key was added to the - # cache while the lock was released. since the link - # update is already done, we need only return the - # computed result and update the count of misses. - pass - elif _len(cache) >= maxsize: - # use the old root to store the new key and result - oldroot = root - oldroot[KEY] = key - oldroot[RESULT] = result - # empty the oldest link and make it the new root - root = nonlocal_root[0] = oldroot[NEXT] - oldkey = root[KEY] - oldvalue = root[RESULT] - root[KEY] = root[RESULT] = None - # now update the cache dictionary for the new links - del cache[oldkey] - cache[key] = oldroot - else: - # put result in a new link at the front of the list - last = root[PREV] - link = [last, root, key, result] - last[NEXT] = root[PREV] = cache[key] = link - stats[MISSES] += 1 - return result - - def cache_info(): - """Report cache statistics""" - with lock: - return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) - - def cache_clear(): - """Clear the cache and cache statistics""" - with lock: - cache.clear() - root = nonlocal_root[0] - root[:] = [root, root, None, None] - stats[:] = [0, 0] - - wrapper.__wrapped__ = user_function - wrapper.cache_info = cache_info - wrapper.cache_clear = cache_clear - return update_wrapper(wrapper, user_function) - - return decorating_function -## end of http://code.activestate.com/recipes/578078/ }}} -## {{{ http://code.activestate.com/recipes/578078/ (r6) -from collections import namedtuple -from functools import update_wrapper -from threading import RLock - -_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) - -class _HashedSeq(list): - __slots__ = 'hashvalue' - - def __init__(self, tup, hash=hash): - self[:] = tup - self.hashvalue = hash(tup) - - def __hash__(self): - return self.hashvalue - -def _make_key(args, kwds, typed, - kwd_mark = (object(),), - fasttypes = {int, str, frozenset, type(None)}, + fasttypes = set((int, str, frozenset, type(None))), sorted=sorted, tuple=tuple, type=type, len=len): 'Make a cache key from optionally typed positional and keyword arguments' key = args From 880ac00407776db1b2ac7a3b41b9ae28425b8ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Mon, 13 May 2013 14:48:56 +0200 Subject: [PATCH 16/35] Prepare new version. --- src/glob2/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glob2/__init__.py b/src/glob2/__init__.py index 29986bf..0bf6759 100644 --- a/src/glob2/__init__.py +++ b/src/glob2/__init__.py @@ -2,4 +2,4 @@ from .impl import * -__version__ = (0, 4) +__version__ = (0, 4, 1) From fec70a24f7aa37730a2582f2dc20f41ad3e40559 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 21 Jan 2014 11:08:08 +0100 Subject: [PATCH 17/35] Fixed comment typo --- src/glob2/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glob2/impl.py b/src/glob2/impl.py index 0186451..ee2afe9 100644 --- a/src/glob2/impl.py +++ b/src/glob2/impl.py @@ -145,7 +145,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root): try: if pattern == '**': # Include the current directory in **, if asked; by adding - # an empty string as opposed to '.', be spare ourselves + # an empty string as opposed to '.', we spare ourselves # having to deal with os.path.normpath() later. names = [''] if globstar_with_root else [] for top, entries in self.walk(dirname): From f7e34dba118c8a77ad23aa4eb77a9384393ed791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Wed, 22 Jan 2014 10:25:59 +0100 Subject: [PATCH 18/35] Add license file. --- LICENSE | 27 +++++++++++++++++++++++++++ MANIFEST.in | 2 +- setup.py | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..953b038 --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2008, Michael Elsdörfer +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 3c983d6..84339d7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include README.rst CHANGES +include README.rst CHANGES LICENSE include test.py diff --git a/setup.py b/setup.py index 7be53a3..78931c4 100755 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 'and supports recursive wildcards', author = 'Michael Elsdoerfer', author_email = 'michael@elsdoerfer.com', + license='BSD', url = 'http://github.com/miracle2k/python-glob2/', classifiers = [ 'Development Status :: 3 - Alpha', From 0ee1841fd1d669a41f49c7a89f486a35500f16b8 Mon Sep 17 00:00:00 2001 From: Jools Wills Date: Sat, 9 May 2015 18:23:45 +0100 Subject: [PATCH 19/35] make fnmatch.translate also support globstar and make * non-greedy. eg before: pattern="/home/*/cache" would match /home/user/cache /home/user/something/cache /home/suer/something/something/cache now: pattern="/home/*/cache" would match /home/user/cache pattern="/home/**/cache" would match /home/user/cache /home/user/something/cache /home/suer/something/something/cache this brings it in line with the other functions in glob2, and although changes the functionality of a single * - makes it match the behaviour of most shells, and is more intuitive. --- src/glob2/fnmatch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/glob2/fnmatch.py b/src/glob2/fnmatch.py index 47db550..949f8fd 100644 --- a/src/glob2/fnmatch.py +++ b/src/glob2/fnmatch.py @@ -88,7 +88,11 @@ def translate(pat): c = pat[i] i = i+1 if c == '*': - res = res + '(.*)' + if i < n and pat[i] == '*': + res = res + '(.*)' + i = i+1 + else: + res = res + '([^\\' + os.path.sep + ']*)' elif c == '?': res = res + '(.)' elif c == '[': From 471a5fcd9c942249a2fca056dd8a774006a3aba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Fri, 22 May 2015 22:43:16 +0200 Subject: [PATCH 20/35] Stop using the ./src subdirectory. --- {src/glob2 => glob2}/__init__.py | 0 {src/glob2 => glob2}/compat.py | 0 {src/glob2 => glob2}/fnmatch.py | 0 {src/glob2 => glob2}/impl.py | 0 setup.py | 5 ++--- 5 files changed, 2 insertions(+), 3 deletions(-) rename {src/glob2 => glob2}/__init__.py (100%) rename {src/glob2 => glob2}/compat.py (100%) rename {src/glob2 => glob2}/fnmatch.py (100%) rename {src/glob2 => glob2}/impl.py (100%) diff --git a/src/glob2/__init__.py b/glob2/__init__.py similarity index 100% rename from src/glob2/__init__.py rename to glob2/__init__.py diff --git a/src/glob2/compat.py b/glob2/compat.py similarity index 100% rename from src/glob2/compat.py rename to glob2/compat.py diff --git a/src/glob2/fnmatch.py b/glob2/fnmatch.py similarity index 100% rename from src/glob2/fnmatch.py rename to glob2/fnmatch.py diff --git a/src/glob2/impl.py b/glob2/impl.py similarity index 100% rename from src/glob2/impl.py rename to glob2/impl.py diff --git a/setup.py b/setup.py index 78931c4..89c65d6 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ here = os.path.dirname(os.path.abspath(__file__)) version_re = re.compile( r'__version__ = (\(.*?\))') -fp = open(os.path.join(here, 'src/glob2', '__init__.py')) +fp = open(os.path.join(here, 'glob2', '__init__.py')) version = None for line in fp: match = version_re.search(line) @@ -38,6 +38,5 @@ 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries', ], - packages = find_packages('src'), - package_dir = {'': 'src'}, + packages = find_packages() ) From 65d0c2d7d1e874dfd26f1eab3e61cbf23ce84fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Fri, 22 May 2015 22:45:52 +0200 Subject: [PATCH 21/35] Remove nosetests dependency. You can now use other testing frameworks. --- test.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/test.py b/test.py index 14aac0a..b3e8f24 100644 --- a/test.py +++ b/test.py @@ -3,8 +3,6 @@ import shutil import tempfile -from nose.tools import assert_equals - import glob2 from glob2 import fnmatch @@ -91,69 +89,69 @@ def setup_files(self): def test_recursive(self): # ** includes the current directory - assert_equals(sorted(glob2.glob('**/*.py', True)), [ + assert sorted(glob2.glob('**/*.py', True)) == [ ('a/bar.py', ('a', 'bar')), ('a/foo/hello.py', ('a/foo', 'hello')), ('b/bar.py', ('b', 'bar')), ('file.py', ('', 'file')), - ]) + ] def test_exclude_root_directory(self): - # If files from the rot directory should not be included, + # If files from the root directory should not be included, # this is the syntax to use: - assert_equals(sorted(glob2.glob('*/**/*.py', True)), [ + assert sorted(glob2.glob('*/**/*.py', True)) == [ ('a/bar.py', ('a', '', 'bar')), ('a/foo/hello.py', ('a', 'foo', 'hello')), ('b/bar.py', ('b', '', 'bar')) - ]) + ] def test_only_directories(self): # Return directories only - assert_equals(sorted(glob2.glob('**/', True)), [ + assert sorted(glob2.glob('**/', True)) == [ ('a/', ('a',)), ('a/foo/', ('a/foo',)), ('b/', ('b',)), - ]) + ] def test_parent_dir(self): # Make sure ".." can be used os.chdir(path.join(self.basedir, 'b')) - assert_equals(sorted(glob2.glob('../a/**/*.py', True)), [ + assert sorted(glob2.glob('../a/**/*.py', True)), [ ('../a/bar.py', ('', 'bar')), ('../a/foo/hello.py', ('foo', 'hello')) - ]) + ] def test_fixed_basename(self): - assert_equals(sorted(glob2.glob('**/bar.py', True)), [ + assert sorted(glob2.glob('**/bar.py', True)) == [ ('a/bar.py', ('a',)), ('b/bar.py', ('b',)), - ]) + ] def test_all_files(self): # Return all files os.chdir(path.join(self.basedir, 'a')) - assert_equals(sorted(glob2.glob('**', True)), [ + assert sorted(glob2.glob('**', True)) == [ ('bar.py', ('bar.py',)), ('foo', ('foo',)), ('foo/hello.py', ('foo/hello.py',)), ('foo/world.txt', ('foo/world.txt',)), - ]) + ] def test_root_directory_not_returned(self): # Ensure that a certain codepath (when the basename is globbed # with ** as opposed to the dirname) does not cause # the root directory to be part of the result. # -> b/ is NOT in the result! - assert_equals(sorted(glob2.glob('b/**', True)), [ + assert sorted(glob2.glob('b/**', True)) == [ ('b/bar.py', ('bar.py',)), ('b/py', ('py',)), - ]) + ] def test_non_glob(self): # Test without patterns. - assert_equals(glob2.glob(__file__, True), [ + assert glob2.glob(__file__, True) == [ (__file__, ()) - ]) - assert_equals(glob2.glob(__file__), [ + ] + assert glob2.glob(__file__) == [ (__file__) - ]) + ] From 58a66cd6000ec0a07b47fb6f5cfefb54e967e019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Fri, 22 May 2015 22:55:45 +0200 Subject: [PATCH 22/35] Revert "make fnmatch.translate also support globstar and make * non-greedy. eg" This reverts commit 0ee1841fd1d669a41f49c7a89f486a35500f16b8. --- glob2/fnmatch.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/glob2/fnmatch.py b/glob2/fnmatch.py index 949f8fd..47db550 100644 --- a/glob2/fnmatch.py +++ b/glob2/fnmatch.py @@ -88,11 +88,7 @@ def translate(pat): c = pat[i] i = i+1 if c == '*': - if i < n and pat[i] == '*': - res = res + '(.*)' - i = i+1 - else: - res = res + '([^\\' + os.path.sep + ']*)' + res = res + '(.*)' elif c == '?': res = res + '(.)' elif c == '[': From f6f40dfa43fd9440f74417f1c79749c41bd3ae29 Mon Sep 17 00:00:00 2001 From: Janne K <0x022b@gmail.com> Date: Sun, 1 Nov 2015 16:55:22 +0200 Subject: [PATCH 23/35] Add 'include_hidden' argument to include hidden files and folders --- glob2/impl.py | 25 +++++++++++++++---------- test.py | 17 +++++++++++++++++ 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/glob2/impl.py b/glob2/impl.py index ee2afe9..c577628 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -39,7 +39,7 @@ def walk(self, top, followlinks=False): for x in self.walk(new_path, followlinks): yield x - def glob(self, pathname, with_matches=False): + def glob(self, pathname, with_matches=False, include_hidden=False): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -47,10 +47,12 @@ def glob(self, pathname, with_matches=False): dot are special cases that are not matched by '*' and '?' patterns. + If ``include_hidden`` is True, then files and folders starting with + a dot are also returned. """ - return list(self.iglob(pathname, with_matches)) + return list(self.iglob(pathname, with_matches, include_hidden)) - def iglob(self, pathname, with_matches=False): + def iglob(self, pathname, with_matches=False, include_hidden=False): """Return an iterator which yields the paths matching a pathname pattern. @@ -63,13 +65,16 @@ def iglob(self, pathname, with_matches=False): a 2-tuple will be returned; the second element if the tuple will be a list of the parts of the path that matched the individual wildcards. + + If ``include_hidden`` is True, then files and folders starting with + a dot are also returned. """ - result = self._iglob(pathname) + result = self._iglob(pathname, include_hidden=include_hidden) if with_matches: return result return map(lambda s: s[0], result) - def _iglob(self, pathname, rootcall=True): + def _iglob(self, pathname, rootcall=True, include_hidden=False): """Internal implementation that backs :meth:`iglob`. ``rootcall`` is required to differentiate between the user's call to @@ -101,17 +106,17 @@ def _iglob(self, pathname, rootcall=True): # Note that this may return files, which will be ignored # later when we try to use them as directories. # Prefiltering them here would only require more IO ops. - dirs = self._iglob(dirname, rootcall=False) + dirs = self._iglob(dirname, False, include_hidden) else: dirs = [(dirname, ())] # Resolve ``basename`` expr for every directory found for dirname, dir_groups in dirs: for name, groups in self.resolve_pattern( - dirname, basename, not rootcall): + dirname, basename, not rootcall, include_hidden): yield os.path.join(dirname, name), dir_groups + groups - def resolve_pattern(self, dirname, pattern, globstar_with_root): + def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): """Apply ``pattern`` (contains no path elements) to the literal directory`` in dirname``. @@ -159,8 +164,8 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root): except os.error: return [] - if not _ishidden(pattern): - # Remove hidden files by default, but take care to ensure + if not include_hidden and not _ishidden(pattern): + # Remove hidden files, but take care to ensure # that the empty string we may have added earlier remains. # Do not filter out the '' that we might have added earlier names = filter(lambda x: not x or not _ishidden(x), names) diff --git a/test.py b/test.py index b3e8f24..c709ee3 100644 --- a/test.py +++ b/test.py @@ -155,3 +155,20 @@ def test_non_glob(self): assert glob2.glob(__file__) == [ (__file__) ] + + +class TestIncludeHidden(BaseTest): + + def setup_files(self): + self.makedirs('a', 'b', 'a/.foo') + self.touch('file.py', 'file.txt', 'a/.bar', 'README', 'b/py', + 'b/.bar', 'a/.foo/hello.py', 'a/.foo/world.txt') + + def test_hidden(self): + # ** includes the current directory + assert sorted(glob2.glob('*/*', True, include_hidden=True)), [ + ('a/.bar', ('a', '.bar')), + ('a/.foo', ('a', '.foo')), + ('b/.bar', ('b', '.bar')), + ('b/py', ('b', 'py')), + ] From 01441c813a03d83ca72d8cb7b5f1021ef7933bc6 Mon Sep 17 00:00:00 2001 From: Janne K <0x022b@gmail.com> Date: Sun, 1 Nov 2015 16:55:40 +0200 Subject: [PATCH 24/35] Fix argument quotation --- glob2/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glob2/impl.py b/glob2/impl.py index c577628..a38715c 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -118,7 +118,7 @@ def _iglob(self, pathname, rootcall=True, include_hidden=False): def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): """Apply ``pattern`` (contains no path elements) to the - literal directory`` in dirname``. + literal directory in ``dirname``. If pattern=='', this will filter for directories. This is a special case that happens when the user's glob expression ends From 62fd1e342e4c3175e5ada43205a5da2ef4a7900c Mon Sep 17 00:00:00 2001 From: Janne K <0x022b@gmail.com> Date: Sun, 1 Nov 2015 16:56:02 +0200 Subject: [PATCH 25/35] Add 'desktop.ini' file to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 72639fe..c476452 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ # IDEs *.wpr /.idea/ + +# Folder config file +[Dd]esktop.ini From b5a53b17fa19498db30f986de1057abaa5f78556 Mon Sep 17 00:00:00 2001 From: Benjamin van der Burgh Date: Wed, 27 Jul 2016 14:14:25 +0700 Subject: [PATCH 26/35] Fixed bug causing iglob to coerce iterator to list --- glob2/impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/glob2/impl.py b/glob2/impl.py index a38715c..001c09a 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -5,6 +5,7 @@ import sys import os import re +import itertools from . import fnmatch @@ -72,7 +73,7 @@ def iglob(self, pathname, with_matches=False, include_hidden=False): result = self._iglob(pathname, include_hidden=include_hidden) if with_matches: return result - return map(lambda s: s[0], result) + return itertools.imap(lambda s: s[0], result) def _iglob(self, pathname, rootcall=True, include_hidden=False): """Internal implementation that backs :meth:`iglob`. From 2402a671a095e5a3fbf8c6fe5db8d76ff5c30b6e Mon Sep 17 00:00:00 2001 From: Adam Chainz Date: Sun, 16 Oct 2016 11:16:19 +0100 Subject: [PATCH 27/35] Release as a universal wheel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By releasing as a [Python wheel](http://pythonwheels.com/) as well as a source distribution, you can speed up end user’s installs. After merging this command, to release you just need to run `python setup.py clean sdist bdist_wheel upload`. --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2a9acf1 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal = 1 From 79c50a553c38b3060f79b609af0887c4a96e7dec Mon Sep 17 00:00:00 2001 From: Pete Browne Date: Thu, 3 Nov 2016 15:33:11 -0500 Subject: [PATCH 28/35] Use map instead of imap on py35 --- glob2/impl.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glob2/impl.py b/glob2/impl.py index 001c09a..6c0845e 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -5,9 +5,13 @@ import sys import os import re -import itertools from . import fnmatch +try: + from itertools import imap +except ImportError: + imap = map + class Globber(object): @@ -73,7 +77,7 @@ def iglob(self, pathname, with_matches=False, include_hidden=False): result = self._iglob(pathname, include_hidden=include_hidden) if with_matches: return result - return itertools.imap(lambda s: s[0], result) + return imap(lambda s: s[0], result) def _iglob(self, pathname, rootcall=True, include_hidden=False): """Internal implementation that backs :meth:`iglob`. From ce2391970df3f3fd10e62151a82b1d4d76fb831d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Fri, 4 Nov 2016 13:15:01 +0100 Subject: [PATCH 29/35] Update CHANGES for new release. --- CHANGES | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES b/CHANGES index 4ebe2ee..d8f4c52 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,11 @@ +0.5 (2016-11-04) + - include_hidden option. + - Python 3 fixes. + - Publish a wheel. + +0.4 (2013-05-08) + - Support Python 3. + 0.3 (2012-01-19) - Fix non-glob patterns (patch by Zalan). - Don't shadow internal "glob" module. From 4851f8c82363113db551f86e612dc8d32b38692f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Fri, 4 Nov 2016 13:17:26 +0100 Subject: [PATCH 30/35] Update version number. --- glob2/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glob2/__init__.py b/glob2/__init__.py index 0bf6759..e350b3b 100644 --- a/glob2/__init__.py +++ b/glob2/__init__.py @@ -2,4 +2,4 @@ from .impl import * -__version__ = (0, 4, 1) +__version__ = (0, 5) From 56a6213cd28b9f1ed8d6ba0d363a378caf43becc Mon Sep 17 00:00:00 2001 From: xoviat Date: Thu, 30 Mar 2017 15:24:05 -0500 Subject: [PATCH 31/35] glob: allow forwards compatibility this change is simple, but it allows the functions to be called with an additional 'recursive' argument, which allows glob2 to be swapped out with the builtin version on newer python versions. --- glob2/impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glob2/impl.py b/glob2/impl.py index 6c0845e..87efe2b 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -44,7 +44,7 @@ def walk(self, top, followlinks=False): for x in self.walk(new_path, followlinks): yield x - def glob(self, pathname, with_matches=False, include_hidden=False): + def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -57,7 +57,7 @@ def glob(self, pathname, with_matches=False, include_hidden=False): """ return list(self.iglob(pathname, with_matches, include_hidden)) - def iglob(self, pathname, with_matches=False, include_hidden=False): + def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True): """Return an iterator which yields the paths matching a pathname pattern. From 48752dc8ea0540c96901966207a032c3a2739395 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 26 Apr 2017 02:11:22 +0200 Subject: [PATCH 32/35] fix(regex): enable lru_cache decorator --- glob2/fnmatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glob2/fnmatch.py b/glob2/fnmatch.py index 47db550..b31ee04 100644 --- a/glob2/fnmatch.py +++ b/glob2/fnmatch.py @@ -38,7 +38,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) -lru_cache(maxsize=256, typed=True) +@lru_cache(maxsize=256, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): pat_str = pat.decode('ISO-8859-1') From b777c1fdc28beb733f92b26d9a558832a95fabc9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 26 Apr 2017 02:41:41 +0200 Subject: [PATCH 33/35] feat(#10): configurable norm/case-sensitive/slashes preserving matches + Performance not seem to be affected when measure with cygwin-python-2. --- glob2/fnmatch.py | 73 +++++++++++++++++++++++++++++++++--------------- glob2/impl.py | 51 ++++++++++++++++++++++----------- 2 files changed, 85 insertions(+), 39 deletions(-) diff --git a/glob2/fnmatch.py b/glob2/fnmatch.py index b31ee04..b178bde 100644 --- a/glob2/fnmatch.py +++ b/glob2/fnmatch.py @@ -10,7 +10,6 @@ corresponding to PATTERN. (It does not compile it.) """ import os -import posixpath import re try: from functools import lru_cache @@ -19,7 +18,16 @@ __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -def fnmatch(name, pat): + +def _norm_paths(path, norm_paths, sep): + if norm_paths is None: + path = re.sub(r'\/', sep or os.sep, path) # cached internally + elif norm_paths: + path = os.path.normcase(path) + return path + + +def fnmatch(name, pat, norm_paths=True, case_sensitive=True, sep=None): """Test whether FILENAME matches PATTERN. Patterns are Unix shell style: @@ -33,46 +41,65 @@ def fnmatch(name, pat): Both FILENAME and PATTERN are first case-normalized if the operating system requires it. If you don't want this, use fnmatchcase(FILENAME, PATTERN). + + :param slashes: + :param norm_paths: + A tri-state boolean: + when true, invokes `os.path,.normcase()` on both paths, + when `None`, just equalize slashes/backslashes to `os.sep`, + when false, does not touch paths at all. + + Note that a side-effect of `normcase()` on *Windows* is that + it converts to lower-case all matches of `?glob()` functions. + :param case_sensitive: + defines the case-sensitiviness of regex doing the matches + :param sep: + in case only slahes replaced, what sep-char to substitute with; + if false, `os.sep` is used. + + Notice that by default, `normcase()` causes insensitive matching + on *Windows*, regardless of `case_insensitive` param. + Set ``norm_paths=None, case_sensitive=False`` to preserve + verbatim mathces. """ - name = os.path.normcase(name) - pat = os.path.normcase(pat) - return fnmatchcase(name, pat) + name, pat = [_norm_paths(p, norm_paths, sep) + for p in (name, pat)] + + return fnmatchcase(name, pat, case_sensitive=case_sensitive) + @lru_cache(maxsize=256, typed=True) -def _compile_pattern(pat): +def _compile_pattern(pat, case_sensitive): if isinstance(pat, bytes): pat_str = pat.decode('ISO-8859-1') res_str = translate(pat_str) res = res_str.encode('ISO-8859-1') else: res = translate(pat) - return re.compile(res).match + flags = 0 if case_sensitive else re.IGNORECASE + return re.compile(res, flags).match + -def filter(names, pat): +def filter(names, pat, norm_paths=True, case_sensitive=True, sep=None): """Return the subset of the list NAMES that match PAT.""" result = [] - pat = os.path.normcase(pat) - match = _compile_pattern(pat) - if os.path is posixpath: - # normcase on posix is NOP. Optimize it away from the loop. - for name in names: - m = match(name) - if m: - result.append((name, m.groups())) - else: - for name in names: - m = match(os.path.normcase(name)) - if m: - result.append((name, m.groups())) + pat = _norm_paths(pat, norm_paths, sep) + match = _compile_pattern(pat, case_sensitive) + for name in names: + m = match(_norm_paths(name, norm_paths, sep)) + if m: + result.append((name, + tuple(_norm_paths(p, norm_paths, sep) for p in m.groups()))) return result -def fnmatchcase(name, pat): + +def fnmatchcase(name, pat, case_sensitive=True): """Test whether FILENAME matches PATTERN, including case. This is a version of fnmatch() which doesn't case-normalize its arguments. """ - match = _compile_pattern(pat) + match = _compile_pattern(pat, case_sensitive) return match(name) is not None diff --git a/glob2/impl.py b/glob2/impl.py index 87efe2b..097bde6 100644 --- a/glob2/impl.py +++ b/glob2/impl.py @@ -5,6 +5,7 @@ import sys import os import re +from os.path import join from . import fnmatch try: @@ -20,7 +21,7 @@ class Globber(object): islink = staticmethod(os.path.islink) exists = staticmethod(os.path.lexists) - def walk(self, top, followlinks=False): + def walk(self, top, followlinks=False, sep=None): """A simplified version of os.walk (code copied) that uses ``self.listdir``, and the other local filesystem methods. @@ -39,12 +40,13 @@ def walk(self, top, followlinks=False): yield top, items for name in items: - new_path = os.path.join(top, name) + new_path = _join_paths([top, name], sep=sep) if followlinks or not self.islink(new_path): for x in self.walk(new_path, followlinks): yield x - def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True): + def glob(self, pathname, with_matches=False, include_hidden=False, recursive=True, + norm_paths=True, case_sensitive=True, sep=None): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -55,9 +57,11 @@ def glob(self, pathname, with_matches=False, include_hidden=False, recursive=Tru If ``include_hidden`` is True, then files and folders starting with a dot are also returned. """ - return list(self.iglob(pathname, with_matches, include_hidden)) + return list(self.iglob(pathname, with_matches, include_hidden, + norm_paths, case_sensitive, sep)) - def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True): + def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=True, + norm_paths=True, case_sensitive=True, sep=None): """Return an iterator which yields the paths matching a pathname pattern. @@ -74,12 +78,14 @@ def iglob(self, pathname, with_matches=False, include_hidden=False, recursive=Tr If ``include_hidden`` is True, then files and folders starting with a dot are also returned. """ - result = self._iglob(pathname, include_hidden=include_hidden) + result = self._iglob(pathname, True, include_hidden, + norm_paths, case_sensitive, sep) if with_matches: return result return imap(lambda s: s[0], result) - def _iglob(self, pathname, rootcall=True, include_hidden=False): + def _iglob(self, pathname, rootcall, include_hidden, + norm_paths, case_sensitive, sep): """Internal implementation that backs :meth:`iglob`. ``rootcall`` is required to differentiate between the user's call to @@ -111,17 +117,20 @@ def _iglob(self, pathname, rootcall=True, include_hidden=False): # Note that this may return files, which will be ignored # later when we try to use them as directories. # Prefiltering them here would only require more IO ops. - dirs = self._iglob(dirname, False, include_hidden) + dirs = self._iglob(dirname, False, include_hidden, + norm_paths, case_sensitive, sep) else: dirs = [(dirname, ())] # Resolve ``basename`` expr for every directory found for dirname, dir_groups in dirs: - for name, groups in self.resolve_pattern( - dirname, basename, not rootcall, include_hidden): - yield os.path.join(dirname, name), dir_groups + groups + for name, groups in self.resolve_pattern(dirname, basename, + not rootcall, include_hidden, + norm_paths, case_sensitive, sep): + yield _join_paths([dirname, name], sep=sep), dir_groups + groups - def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): + def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden, + norm_paths, case_sensitive, sep): """Apply ``pattern`` (contains no path elements) to the literal directory in ``dirname``. @@ -145,7 +154,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): if self.isdir(dirname): return [(pattern, ())] else: - if self.exists(os.path.join(dirname, pattern)): + if self.exists(_join_paths([dirname, pattern], sep=sep)): return [(pattern, ())] return [] @@ -158,8 +167,8 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): # an empty string as opposed to '.', we spare ourselves # having to deal with os.path.normpath() later. names = [''] if globstar_with_root else [] - for top, entries in self.walk(dirname): - _mkabs = lambda s: os.path.join(top[len(dirname)+1:], s) + for top, entries in self.walk(dirname, sep=sep): + _mkabs = lambda s: _join_paths([top[len(dirname) + 1:], s], sep=sep) names.extend(map(_mkabs, entries)) # Reset pattern so that fnmatch(), which does not understand # ** specifically, will only return a single group match. @@ -174,7 +183,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): # that the empty string we may have added earlier remains. # Do not filter out the '' that we might have added earlier names = filter(lambda x: not x or not _ishidden(x), names) - return fnmatch.filter(names, pattern) + return fnmatch.filter(names, pattern, norm_paths, case_sensitive, sep) default_globber = Globber() @@ -186,6 +195,7 @@ def resolve_pattern(self, dirname, pattern, globstar_with_root, include_hidden): magic_check = re.compile('[*?[]') magic_check_bytes = re.compile(b'[*?[]') + def has_magic(s): if isinstance(s, bytes): match = magic_check_bytes.search(s) @@ -193,5 +203,14 @@ def has_magic(s): match = magic_check.search(s) return match is not None + def _ishidden(path): return path[0] in ('.', b'.'[0]) + + +def _join_paths(paths, sep=None): + path = join(*paths) + if sep: + path = re.sub(r'\/', sep, path) # cached internally + return path + From 9b9c143cdb82f00ac9b363aac881b529cb16a0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Elsd=C3=B6rfer?= Date: Thu, 17 Aug 2017 23:12:29 +0100 Subject: [PATCH 34/35] Prepare a new release. --- CHANGES | 7 +++++++ glob2/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index d8f4c52..747f08f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +0.6 (2017-08-17) + - Support dummy recursive parameter in APIs to match the new + call signature of Python 3. + - Fix lru_cache (Kostis Anagnostopoulos). + - Introduce norm_paths and case_sensitive options to optionally + change the behaviour (Kostis Anagnostopoulos). + 0.5 (2016-11-04) - include_hidden option. - Python 3 fixes. diff --git a/glob2/__init__.py b/glob2/__init__.py index e350b3b..390b5f3 100644 --- a/glob2/__init__.py +++ b/glob2/__init__.py @@ -2,4 +2,4 @@ from .impl import * -__version__ = (0, 5) +__version__ = (0, 6) From be6981ec1a3ec089a934dbce3717d9ee6a01584e Mon Sep 17 00:00:00 2001 From: Jeremy Hamilton Date: Wed, 9 May 2018 15:13:11 -0400 Subject: [PATCH 35/35] Fixing re flags deprecation warning in python 3.6 --- glob2/fnmatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glob2/fnmatch.py b/glob2/fnmatch.py index b178bde..b94be65 100644 --- a/glob2/fnmatch.py +++ b/glob2/fnmatch.py @@ -138,4 +138,4 @@ def translate(pat): res = '%s([%s])' % (res, stuff) else: res = res + re.escape(c) - return res + '\Z(?ms)' + return '(?ms)' + res + '\Z'