From ba86e39431467c42a6cbfb11b2458b4085240f2f Mon Sep 17 00:00:00 2001
From: Vinay Sajip <vinay_sajip@yahoo.co.uk>
Date: Sat, 15 Feb 2020 21:44:03 +0000
Subject: [PATCH 1/3] bpo-12915: Improve Unicode support for package names and
 attributes.

---
 Lib/pkgutil.py           | 13 +++++++------
 Lib/test/test_pkgutil.py | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
index 4bc3083ac197eb..4c184678a29128 100644
--- a/Lib/pkgutil.py
+++ b/Lib/pkgutil.py
@@ -638,8 +638,8 @@ def get_data(package, resource):
     return loader.get_data(resource_name)
 
 
-_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*'
-_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I)
+_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
+_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U)
 del _DOTTED_WORDS
 
 def resolve_name(name):
@@ -677,11 +677,12 @@ def resolve_name(name):
     m = _NAME_PATTERN.match(name)
     if not m:
         raise ValueError(f'invalid format: {name!r}')
-    groups = m.groups()
-    if groups[2]:
+    gd = m.groupdict()
+    if gd.get('cln'):
         # there is a colon - a one-step import is all that's needed
-        mod = importlib.import_module(groups[0])
-        parts = groups[3].split('.') if groups[3] else []
+        mod = importlib.import_module(gd['pkg'])
+        parts = gd.get('obj')
+        parts = parts.split('.') if parts else []
     else:
         # no colon - have to iterate to find the package boundary
         parts = name.split('.')
diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index 906150b10495bf..178cc25f56c0a7 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -231,6 +231,26 @@ def test_name_resolution(self):
             ('ZeroDivisionError', ImportError),
         )
 
+        # add some Unicode package names to the mix.
+
+        unicode_words = ('\u0935\u092e\u0938',
+                         '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868')
+
+        for uw in unicode_words:
+            d = os.path.join(self.dirname, uw)
+            os.makedirs(d, exist_ok=True)
+            # make an empty __init__.py file
+            f = os.path.join(d, '__init__.py')
+            with open(f, 'w') as f:
+                f.write('')
+                f.flush()
+            # now import the package we just created; clearing the caches is
+            # needed, otherwise the newly created package isn't found
+            importlib.invalidate_caches()
+            mod = importlib.import_module(uw)
+            success_cases += (uw, mod),
+            failure_cases += (uw[:-1], ImportError),
+
         for s, expected in success_cases:
             with self.subTest(s=s):
                 o = pkgutil.resolve_name(s)

From 698895a50a776b5c725ed2500168a64aeaa718a5 Mon Sep 17 00:00:00 2001
From: Vinay Sajip <vinay_sajip@yahoo.co.uk>
Date: Sun, 16 Feb 2020 09:22:06 +0000
Subject: [PATCH 2/3] Add some more failure subtest cases, including one with a
 non-ASCII digit character.

---
 Lib/test/test_pkgutil.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index 178cc25f56c0a7..3d130fee730a5c 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -229,6 +229,8 @@ def test_name_resolution(self):
             ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError),
             ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError),
             ('ZeroDivisionError', ImportError),
+            ('os.path.9abc', ValueError),
+            ('9abc', ValueError),
         )
 
         # add some Unicode package names to the mix.
@@ -251,6 +253,9 @@ def test_name_resolution(self):
             success_cases += (uw, mod),
             failure_cases += (uw[:-1], ImportError),
 
+        # add an example with a Unicode digit at the start
+        failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),
+
         for s, expected in success_cases:
             with self.subTest(s=s):
                 o = pkgutil.resolve_name(s)

From 245158b3efd252e95dc5c0573c467b49279ff0ec Mon Sep 17 00:00:00 2001
From: Vinay Sajip <vinay_sajip@yahoo.co.uk>
Date: Mon, 17 Feb 2020 23:59:05 +0000
Subject: [PATCH 3/3] Added subtests with European, Korean and Japanese words.

---
 Lib/test/test_pkgutil.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index 3d130fee730a5c..53456c2f7659e2 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -236,6 +236,12 @@ def test_name_resolution(self):
         # add some Unicode package names to the mix.
 
         unicode_words = ('\u0935\u092e\u0938',
+                         '\xe9', '\xc8',
+                         '\uc548\ub155\ud558\uc138\uc694',
+                         '\u3055\u3088\u306a\u3089',
+                         '\u3042\u308a\u304c\u3068\u3046',
+                         '\u0425\u043e\u0440\u043e\u0448\u043e',
+                         '\u0441\u043f\u0430\u0441\u0438\u0431\u043e',
                          '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868')
 
         for uw in unicode_words:
@@ -251,7 +257,8 @@ def test_name_resolution(self):
             importlib.invalidate_caches()
             mod = importlib.import_module(uw)
             success_cases += (uw, mod),
-            failure_cases += (uw[:-1], ImportError),
+            if len(uw) > 1:
+                failure_cases += (uw[:-1], ImportError),
 
         # add an example with a Unicode digit at the start
         failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),