Skip to content

Commit 91c58e8

Browse files
committed
Update urllib to CPython 3.8.10
1 parent 2924fe8 commit 91c58e8

2 files changed

Lines changed: 38 additions & 7 deletions

File tree

Lib/urllib/parse.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@
7777
'0123456789'
7878
'+-.')
7979

80+
# Unsafe bytes to be removed per WHATWG spec
81+
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
82+
8083
# XXX: Consider replacing with functools.lru_cache
8184
MAX_CACHE_SIZE = 20
8285
_parse_cache = {}
@@ -414,13 +417,20 @@ def _checknetloc(netloc):
414417
raise ValueError("netloc '" + netloc + "' contains invalid " +
415418
"characters under NFKC normalization")
416419

420+
def _remove_unsafe_bytes_from_url(url):
421+
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
422+
url = url.replace(b, "")
423+
return url
424+
417425
def urlsplit(url, scheme='', allow_fragments=True):
418426
"""Parse a URL into 5 components:
419427
<scheme>://<netloc>/<path>?<query>#<fragment>
420428
Return a 5-tuple: (scheme, netloc, path, query, fragment).
421429
Note that we don't break the components up in smaller bits
422430
(e.g. netloc is a single string) and we don't expand % escapes."""
423431
url, scheme, _coerce_result = _coerce_args(url, scheme)
432+
url = _remove_unsafe_bytes_from_url(url)
433+
scheme = _remove_unsafe_bytes_from_url(scheme)
424434
allow_fragments = bool(allow_fragments)
425435
key = url, scheme, allow_fragments, type(url), type(scheme)
426436
cached = _parse_cache.get(key, None)
@@ -631,6 +641,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
631641
632642
unquote('abc%20def') -> 'abc def'.
633643
"""
644+
if isinstance(string, bytes):
645+
raise TypeError('Expected str, got bytes')
634646
if '%' not in string:
635647
string.split
636648
return string
@@ -648,7 +660,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
648660

649661

650662
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
651-
encoding='utf-8', errors='replace', max_num_fields=None):
663+
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
652664
"""Parse a query given as a string argument.
653665
654666
Arguments:
@@ -672,12 +684,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
672684
max_num_fields: int. If set, then throws a ValueError if there
673685
are more than n fields read by parse_qsl().
674686
687+
separator: str. The symbol to use for separating the query arguments.
688+
Defaults to &.
689+
675690
Returns a dictionary.
676691
"""
677692
parsed_result = {}
678693
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
679694
encoding=encoding, errors=errors,
680-
max_num_fields=max_num_fields)
695+
max_num_fields=max_num_fields, separator=separator)
681696
for name, value in pairs:
682697
if name in parsed_result:
683698
parsed_result[name].append(value)
@@ -687,7 +702,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
687702

688703

689704
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
690-
encoding='utf-8', errors='replace', max_num_fields=None):
705+
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
691706
"""Parse a query given as a string argument.
692707
693708
Arguments:
@@ -710,19 +725,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
710725
max_num_fields: int. If set, then throws a ValueError
711726
if there are more than n fields read by parse_qsl().
712727
728+
separator: str. The symbol to use for separating the query arguments.
729+
Defaults to &.
730+
713731
Returns a list, as G-d intended.
714732
"""
715733
qs, _coerce_result = _coerce_args(qs)
734+
separator, _ = _coerce_args(separator)
735+
736+
if not separator or (not isinstance(separator, (str, bytes))):
737+
raise ValueError("Separator must be of type string or bytes.")
716738

717739
# If max_num_fields is defined then check that the number of fields
718740
# is less than max_num_fields. This prevents a memory exhaustion DOS
719741
# attack via post bodies with many fields.
720742
if max_num_fields is not None:
721-
num_fields = 1 + qs.count('&') + qs.count(';')
743+
num_fields = 1 + qs.count(separator)
722744
if max_num_fields < num_fields:
723745
raise ValueError('Max number of fields exceeded')
724746

725-
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
747+
pairs = [s1 for s1 in qs.split(separator)]
726748
r = []
727749
for name_value in pairs:
728750
if not name_value and not strict_parsing:

Lib/urllib/request.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,11 @@ def _parse_proxy(proxy):
779779
raise ValueError("proxy URL with no authority: %r" % proxy)
780780
# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
781781
# and 3.3.), path is empty or starts with '/'
782-
end = r_scheme.find("/", 2)
782+
if '@' in r_scheme:
783+
host_separator = r_scheme.find('@')
784+
end = r_scheme.find("/", host_separator)
785+
else:
786+
end = r_scheme.find("/", 2)
783787
if end == -1:
784788
end = None
785789
authority = r_scheme[2:end]
@@ -947,7 +951,7 @@ class AbstractBasicAuthHandler:
947951
# (single quotes are a violation of the RFC, but appear in the wild)
948952
rx = re.compile('(?:^|,)' # start of the string or ','
949953
'[ \t]*' # optional whitespaces
950-
'([^ \t]+)' # scheme like "Basic"
954+
'([^ \t,]+)' # scheme like "Basic"
951955
'[ \t]+' # mandatory whitespaces
952956
# realm=xxx
953957
# realm='xxx'
@@ -2604,6 +2608,11 @@ def ip2num(ipAddr):
26042608
mask = 8 * (m.group(1).count('.') + 1)
26052609
else:
26062610
mask = int(mask[1:])
2611+
2612+
if mask < 0 or mask > 32:
2613+
# System libraries ignore invalid prefix lengths
2614+
continue
2615+
26072616
mask = 32 - mask
26082617

26092618
if (hostIP >> mask) == (base >> mask):

0 commit comments

Comments
 (0)