Futher cleanups, remove urllib.parse.pathsplit().

Since pathsplit() doesn't seem like a generally useful public API, remove it. Instead, add a _request_path_split() method. This ensures that the redirect logic and the translate_path() method use the same path parsing.
python · nascheme · Jun 16, 2022 · Jun 16, 2022 · Jun 16, 2022 · Jun 16, 2022
commit 8a34cd002fef6139157aed390061d4450f4cb636
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
@@ -339,18 +339,6 @@ or on combining URL components into a URL string.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
-.. function:: pathsplit(path)
-
-   Parse a path that includes an optional query and fragment. Like
-   :func:`urlsplit`, this function returns a 5-item :term:`named tuple`::
-
-      (addressing scheme, network location, path, query, fragment identifier).
-
-   The scheme and network location components will always be empty.
-
-   .. versionadded:: 3.11
-
-
 .. function:: urlunsplit(parts)
 
    Combine the elements of a tuple as returned by :func:`urlsplit` into a

diff --git a/Lib/http/server.py b/Lib/http/server.py
@@ -664,6 +664,16 @@ def do_HEAD(self):
         if f:
             f.close()
 
+    def _request_path_split(self, path):
+        """Parse a path that can include an optional query and fragment.
+        """
+        # We only handle the 'abs_path' case for the Request-URI part of the
+        # request line (the second word).  We don't handle the case of a URL
+        # containing a scheme or netloc.
+        path, _, query = path.partition('?')
+        path, _, fragment = path.partition('#')
+        return urllib.parse.SplitResult('', '', path, query, fragment)
+
     def _get_redirect_url_for_dir(self):
         """Returns URL with trailing slash on path, if required.  If not
         required, returns None.
@@ -675,7 +685,7 @@ def _get_redirect_url_for_dir(self):
         # with a double slash should not be treated as a relative URI.  Also, a
         # path with a colon in the first component could also be parsed
         # wrongly.
-        parts = urllib.parse.pathsplit(self.path)
+        parts = self._request_path_split(self.path)
         if parts.path.endswith('/'):
             return None  # already has slash, no redirect needed
         return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
@@ -832,7 +842,7 @@ def translate_path(self, path):
 
         """
         # extract only path, abandon query parameters and fragment
-        path = urllib.parse.pathsplit(path).path
+        path = self._request_path_split(path).path
         # Don't forget explicit trailing slash when normalizing. Issue17324
         trailing_slash = path.rstrip().endswith('/')
         try:

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -1116,18 +1116,8 @@ def test_urlunsplit_relative(self):
             (('', '', 'a:b', '', ''), './a:b'),
         ]
         for parts, result in cases:
-            self.assertEqual(urllib.parse.urlunsplit(parts), result)
-
-    def test_pathsplit(self):
-        cases = [
-            ('//a', ('', '', '//a', '', '')),
-            ('a:b', ('', '', 'a:b', '', '')),
-            ('/a/b?x#y', ('', '', '/a/b', 'x', 'y')),
-            ('/a/b#y', ('', '', '/a/b', '', 'y')),
-            ('/a/b?x', ('', '', '/a/b', 'x', '')),
-        ]
-        for uri, result in cases:
-            self.assertEqual(urllib.parse.pathsplit(uri), result)
+            self.assertEqual(urllib.parse.urlunsplit(parts), result,
+                             msg=f'{parts=}')
 
 
 class Utility_Tests(unittest.TestCase):

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -36,7 +36,7 @@
 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
            "urlsplit", "urlunsplit", "urlencode", "parse_qs",
            "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
-           "unquote", "unquote_plus", "unquote_to_bytes", "pathsplit",
+           "unquote", "unquote_plus", "unquote_to_bytes",
            "DefragResult", "ParseResult", "SplitResult",
            "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
 
@@ -480,29 +480,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
     v = SplitResult(scheme, netloc, url, query, fragment)
     return _coerce_result(v)
 
-# typed=True avoids BytesWarnings being emitted during cache key
-# comparison since this API supports both bytes and str input.
-@functools.lru_cache(typed=True)
-def pathsplit(path):
-    """Parse a path that includes an optional query and fragment.
-    The full syntax is:
-
-    <path>?<query>#<fragment>
-
-    The result is a named 5-tuple with fields set corresponding to the above.
-    It is either a SplitResult or SplitResultBytes object, depending on the
-    type of the url parameter.
-
-    Note that % escapes are not expanded.
-    """
-    path, _coerce_result = _coerce_args(path)
-    for b in _UNSAFE_URL_BYTES_TO_REMOVE:
-        path = path.replace(b, "")
-    path, _, fragment = path.partition('#')
-    path, _, query = path.partition('?')
-    v = SplitResult('', '', path, query, fragment)
-    return _coerce_result(v)
-
 def urlunparse(components):
     """Put a parsed URL back together again.  This may result in a
     slightly different, but equivalent URL, if the URL that was parsed

diff --git a/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst b/Misc/NEWS.d/next/Security/2022-06-16-12-13-55.gh-issue-87389.MS9wAR.rst
@@ -2,7 +2,6 @@
 when an URI path starts with ``//``.  Vulnerability discovered, and initial
 fix proposed, by Hamza Avvan.  Change :func:`urllib.parse.urlunsplit` to
 sanitize ``path`` argument in order to avoid confusing the first component of
-the path as a net location or scheme.  Add :func:`urllib.parse.pathsplit`
-function.
+the path as a net location or scheme.
 
-Co-authored-by: Gregory P. Smith <gps@google.com>
+Co-authored-by: Gregory P. Smith [Google LLC] <gps@google.com>