From ba86e39431467c42a6cbfb11b2458b4085240f2f Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Sat, 15 Feb 2020 21:44:03 +0000 Subject: [PATCH 1/3] bpo-12915: Improve Unicode support for package names and attributes. --- Lib/pkgutil.py | 13 +++++++------ Lib/test/test_pkgutil.py | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index 4bc3083ac197eb..4c184678a29128 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -638,8 +638,8 @@ def get_data(package, resource): return loader.get_data(resource_name) -_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*' -_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I) +_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*' +_NAME_PATTERN = re.compile(f'^(?P{_DOTTED_WORDS})(?P:(?P{_DOTTED_WORDS})?)?$', re.U) del _DOTTED_WORDS def resolve_name(name): @@ -677,11 +677,12 @@ def resolve_name(name): m = _NAME_PATTERN.match(name) if not m: raise ValueError(f'invalid format: {name!r}') - groups = m.groups() - if groups[2]: + gd = m.groupdict() + if gd.get('cln'): # there is a colon - a one-step import is all that's needed - mod = importlib.import_module(groups[0]) - parts = groups[3].split('.') if groups[3] else [] + mod = importlib.import_module(gd['pkg']) + parts = gd.get('obj') + parts = parts.split('.') if parts else [] else: # no colon - have to iterate to find the package boundary parts = name.split('.') diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 906150b10495bf..178cc25f56c0a7 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -231,6 +231,26 @@ def test_name_resolution(self): ('ZeroDivisionError', ImportError), ) + # add some Unicode package names to the mix. + + unicode_words = ('\u0935\u092e\u0938', + '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868') + + for uw in unicode_words: + d = os.path.join(self.dirname, uw) + os.makedirs(d, exist_ok=True) + # make an empty __init__.py file + f = os.path.join(d, '__init__.py') + with open(f, 'w') as f: + f.write('') + f.flush() + # now import the package we just created; clearing the caches is + # needed, otherwise the newly created package isn't found + importlib.invalidate_caches() + mod = importlib.import_module(uw) + success_cases += (uw, mod), + failure_cases += (uw[:-1], ImportError), + for s, expected in success_cases: with self.subTest(s=s): o = pkgutil.resolve_name(s) From 698895a50a776b5c725ed2500168a64aeaa718a5 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Sun, 16 Feb 2020 09:22:06 +0000 Subject: [PATCH 2/3] Add some more failure subtest cases, including one with a non-ASCII digit character. --- Lib/test/test_pkgutil.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 178cc25f56c0a7..3d130fee730a5c 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -229,6 +229,8 @@ def test_name_resolution(self): ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError), ('ZeroDivisionError', ImportError), + ('os.path.9abc', ValueError), + ('9abc', ValueError), ) # add some Unicode package names to the mix. @@ -251,6 +253,9 @@ def test_name_resolution(self): success_cases += (uw, mod), failure_cases += (uw[:-1], ImportError), + # add an example with a Unicode digit at the start + failure_cases += ('\u0966\u0935\u092e\u0938', ValueError), + for s, expected in success_cases: with self.subTest(s=s): o = pkgutil.resolve_name(s) From 245158b3efd252e95dc5c0573c467b49279ff0ec Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Mon, 17 Feb 2020 23:59:05 +0000 Subject: [PATCH 3/3] Added subtests with European, Korean and Japanese words. --- Lib/test/test_pkgutil.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 3d130fee730a5c..53456c2f7659e2 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -236,6 +236,12 @@ def test_name_resolution(self): # add some Unicode package names to the mix. unicode_words = ('\u0935\u092e\u0938', + '\xe9', '\xc8', + '\uc548\ub155\ud558\uc138\uc694', + '\u3055\u3088\u306a\u3089', + '\u3042\u308a\u304c\u3068\u3046', + '\u0425\u043e\u0440\u043e\u0448\u043e', + '\u0441\u043f\u0430\u0441\u0438\u0431\u043e', '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868') for uw in unicode_words: @@ -251,7 +257,8 @@ def test_name_resolution(self): importlib.invalidate_caches() mod = importlib.import_module(uw) success_cases += (uw, mod), - failure_cases += (uw[:-1], ImportError), + if len(uw) > 1: + failure_cases += (uw[:-1], ImportError), # add an example with a Unicode digit at the start failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),