Add few more tests.

python · serhiy-storchaka · May 14, 2024 · Aug 15, 2019 · Aug 27, 2019 · Aug 27, 2019
commit 62957fe05a1b50ad980848d35a601cb2028a0b58
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -103,15 +103,17 @@
 
 class UrlParseTestCase(unittest.TestCase):
 
-    def checkRoundtrips(self, url, parsed, split):
+    def checkRoundtrips(self, url, parsed, split, url2=None):
+        if url2 is None:
+            url2 = url
         result = urllib.parse.urlparse(url)
         self.assertSequenceEqual(result, parsed)
         t = (result.scheme, result.netloc, result.path,
              result.params, result.query, result.fragment)
         self.assertSequenceEqual(t, parsed)
         # put it back together and it should be the same
         result2 = urllib.parse.urlunparse(result)
-        self.assertSequenceEqual(result2, url)
+        self.assertSequenceEqual(result2, url2)
         self.assertSequenceEqual(result2, result.geturl())
 
         # the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
              result.query, result.fragment)
         self.assertSequenceEqual(t, split)
         result2 = urllib.parse.urlunsplit(result)
-        self.assertSequenceEqual(result2, url)
+        self.assertSequenceEqual(result2, url2)
         self.assertSequenceEqual(result2, result.geturl())
 
         # check the fixpoint property of re-parsing the result of geturl()
@@ -243,6 +245,38 @@ def _encode(t):
         for url, parsed, split in str_cases + bytes_cases:
             self.checkRoundtrips(url, parsed, split)
 
+    def test_roundtrips_normalization(self):
+        str_cases = [
+            ('///path/to/file',
+             '/path/to/file',
+             ('', '', '/path/to/file', '', '', ''),
+             ('', '', '/path/to/file', '', '')),
+            ('scheme:///path/to/file',
+             'scheme:/path/to/file',
+             ('scheme', '', '/path/to/file', '', '', ''),
+             ('scheme', '', '/path/to/file', '', '')),
+            ('file:/tmp/junk.txt',
+             'file:///tmp/junk.txt',
+             ('file', '', '/tmp/junk.txt', '', '', ''),
+             ('file', '', '/tmp/junk.txt', '', '')),
+            ('http:/tmp/junk.txt',
+             'http:///tmp/junk.txt',
+             ('http', '', '/tmp/junk.txt', '', '', ''),
+             ('http', '', '/tmp/junk.txt', '', '')),
+            ('https:/tmp/junk.txt',
+             'https:///tmp/junk.txt',
+             ('https', '', '/tmp/junk.txt', '', '', ''),
+             ('https', '', '/tmp/junk.txt', '', '')),
+        ]
+        def _encode(t):
+            return (t[0].encode('ascii'),
+                    t[1].encode('ascii'),
+                    tuple(x.encode('ascii') for x in t[2]),
+                    tuple(x.encode('ascii') for x in t[3]))
+        bytes_cases = [_encode(x) for x in str_cases]
+        for url, url2, parsed, split in str_cases + bytes_cases:
+            self.checkRoundtrips(url, parsed, split, url2)
+
     def test_http_roundtrips(self):
         # urllib.parse.urlsplit treats 'http:' as an optimized special case,
         # so we test both 'http:' and 'https:' in all the following.