Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Futher cleanups, remove urllib.parse.pathsplit().
Since pathsplit() doesn't seem like a generally useful public API,
remove it.  Instead, add a _request_path_split() method.  This ensures
that the redirect logic and the translate_path() method use the same
path parsing.
  • Loading branch information
nascheme committed Jun 16, 2022
commit 8a34cd002fef6139157aed390061d4450f4cb636
12 changes: 0 additions & 12 deletions Doc/library/urllib.parse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -339,18 +339,6 @@ or on combining URL components into a URL string.

.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser

.. function:: pathsplit(path)

Parse a path that includes an optional query and fragment. Like
:func:`urlsplit`, this function returns a 5-item :term:`named tuple`::

(addressing scheme, network location, path, query, fragment identifier).

The scheme and network location components will always be empty.

.. versionadded:: 3.11


.. function:: urlunsplit(parts)

Combine the elements of a tuple as returned by :func:`urlsplit` into a
Expand Down
14 changes: 12 additions & 2 deletions Lib/http/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,16 @@ def do_HEAD(self):
if f:
f.close()

def _request_path_split(self, path):
"""Parse a path that can include an optional query and fragment.
"""
# We only handle the 'abs_path' case for the Request-URI part of the
# request line (the second word). We don't handle the case of a URL
# containing a scheme or netloc.
path, _, query = path.partition('?')
path, _, fragment = path.partition('#')
return urllib.parse.SplitResult('', '', path, query, fragment)

def _get_redirect_url_for_dir(self):
"""Returns URL with trailing slash on path, if required. If not
required, returns None.
Expand All @@ -675,7 +685,7 @@ def _get_redirect_url_for_dir(self):
# with a double slash should not be treated as a relative URI. Also, a
# path with a colon in the first component could also be parsed
# wrongly.
parts = urllib.parse.pathsplit(self.path)
parts = self._request_path_split(self.path)
if parts.path.endswith('/'):
return None # already has slash, no redirect needed
return urllib.parse.urlunsplit(('', '', parts.path + '/', parts.query,
Expand Down Expand Up @@ -832,7 +842,7 @@ def translate_path(self, path):

"""
# extract only path, abandon query parameters and fragment
path = urllib.parse.pathsplit(path).path
path = self._request_path_split(path).path
# Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/')
try:
Expand Down
14 changes: 2 additions & 12 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,18 +1116,8 @@ def test_urlunsplit_relative(self):
(('', '', 'a:b', '', ''), './a:b'),
]
for parts, result in cases:
self.assertEqual(urllib.parse.urlunsplit(parts), result)

def test_pathsplit(self):
cases = [
('//a', ('', '', '//a', '', '')),
('a:b', ('', '', 'a:b', '', '')),
('/a/b?x#y', ('', '', '/a/b', 'x', 'y')),
('/a/b#y', ('', '', '/a/b', '', 'y')),
('/a/b?x', ('', '', '/a/b', 'x', '')),
]
for uri, result in cases:
self.assertEqual(urllib.parse.pathsplit(uri), result)
self.assertEqual(urllib.parse.urlunsplit(parts), result,
msg=f'{parts=}')


class Utility_Tests(unittest.TestCase):
Expand Down
25 changes: 1 addition & 24 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
"unquote", "unquote_plus", "unquote_to_bytes", "pathsplit",
"unquote", "unquote_plus", "unquote_to_bytes",
"DefragResult", "ParseResult", "SplitResult",
"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]

Expand Down Expand Up @@ -480,29 +480,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
v = SplitResult(scheme, netloc, url, query, fragment)
return _coerce_result(v)

# typed=True avoids BytesWarnings being emitted during cache key
# comparison since this API supports both bytes and str input.
@functools.lru_cache(typed=True)
def pathsplit(path):
"""Parse a path that includes an optional query and fragment.
The full syntax is:

<path>?<query>#<fragment>

The result is a named 5-tuple with fields set corresponding to the above.
It is either a SplitResult or SplitResultBytes object, depending on the
type of the url parameter.

Note that % escapes are not expanded.
"""
path, _coerce_result = _coerce_args(path)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
path = path.replace(b, "")
path, _, fragment = path.partition('#')
path, _, query = path.partition('?')
v = SplitResult('', '', path, query, fragment)
return _coerce_result(v)

def urlunparse(components):
"""Put a parsed URL back together again. This may result in a
slightly different, but equivalent URL, if the URL that was parsed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
when an URI path starts with ``//``. Vulnerability discovered, and initial
fix proposed, by Hamza Avvan. Change :func:`urllib.parse.urlunsplit` to
sanitize ``path`` argument in order to avoid confusing the first component of
the path as a net location or scheme. Add :func:`urllib.parse.pathsplit`
function.
the path as a net location or scheme.

Co-authored-by: Gregory P. Smith <gps@google.com>
Co-authored-by: Gregory P. Smith [Google LLC] <gps@google.com>