7777 '0123456789'
7878 '+-.' )
7979
80+ # Unsafe bytes to be removed per WHATWG spec
81+ _UNSAFE_URL_BYTES_TO_REMOVE = ['\t ' , '\r ' , '\n ' ]
82+
8083# XXX: Consider replacing with functools.lru_cache
8184MAX_CACHE_SIZE = 20
8285_parse_cache = {}
@@ -414,13 +417,20 @@ def _checknetloc(netloc):
414417 raise ValueError ("netloc '" + netloc + "' contains invalid " +
415418 "characters under NFKC normalization" )
416419
420+ def _remove_unsafe_bytes_from_url (url ):
421+ for b in _UNSAFE_URL_BYTES_TO_REMOVE :
422+ url = url .replace (b , "" )
423+ return url
424+
417425def urlsplit (url , scheme = '' , allow_fragments = True ):
418426 """Parse a URL into 5 components:
419427 <scheme>://<netloc>/<path>?<query>#<fragment>
420428 Return a 5-tuple: (scheme, netloc, path, query, fragment).
421429 Note that we don't break the components up in smaller bits
422430 (e.g. netloc is a single string) and we don't expand % escapes."""
423431 url , scheme , _coerce_result = _coerce_args (url , scheme )
432+ url = _remove_unsafe_bytes_from_url (url )
433+ scheme = _remove_unsafe_bytes_from_url (scheme )
424434 allow_fragments = bool (allow_fragments )
425435 key = url , scheme , allow_fragments , type (url ), type (scheme )
426436 cached = _parse_cache .get (key , None )
@@ -631,6 +641,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
631641
632642 unquote('abc%20def') -> 'abc def'.
633643 """
644+ if isinstance (string , bytes ):
645+ raise TypeError ('Expected str, got bytes' )
634646 if '%' not in string :
635647 string .split
636648 return string
@@ -648,7 +660,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
648660
649661
650662def parse_qs (qs , keep_blank_values = False , strict_parsing = False ,
651- encoding = 'utf-8' , errors = 'replace' , max_num_fields = None ):
663+ encoding = 'utf-8' , errors = 'replace' , max_num_fields = None , separator = '&' ):
652664 """Parse a query given as a string argument.
653665
654666 Arguments:
@@ -672,12 +684,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
672684 max_num_fields: int. If set, then throws a ValueError if there
673685 are more than n fields read by parse_qsl().
674686
687+ separator: str. The symbol to use for separating the query arguments.
688+ Defaults to &.
689+
675690 Returns a dictionary.
676691 """
677692 parsed_result = {}
678693 pairs = parse_qsl (qs , keep_blank_values , strict_parsing ,
679694 encoding = encoding , errors = errors ,
680- max_num_fields = max_num_fields )
695+ max_num_fields = max_num_fields , separator = separator )
681696 for name , value in pairs :
682697 if name in parsed_result :
683698 parsed_result [name ].append (value )
@@ -687,7 +702,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
687702
688703
689704def parse_qsl (qs , keep_blank_values = False , strict_parsing = False ,
690- encoding = 'utf-8' , errors = 'replace' , max_num_fields = None ):
705+ encoding = 'utf-8' , errors = 'replace' , max_num_fields = None , separator = '&' ):
691706 """Parse a query given as a string argument.
692707
693708 Arguments:
@@ -710,19 +725,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
710725 max_num_fields: int. If set, then throws a ValueError
711726 if there are more than n fields read by parse_qsl().
712727
728+ separator: str. The symbol to use for separating the query arguments.
729+ Defaults to &.
730+
713731 Returns a list, as G-d intended.
714732 """
715733 qs , _coerce_result = _coerce_args (qs )
734+ separator , _ = _coerce_args (separator )
735+
736+ if not separator or (not isinstance (separator , (str , bytes ))):
737+ raise ValueError ("Separator must be of type string or bytes." )
716738
717739 # If max_num_fields is defined then check that the number of fields
718740 # is less than max_num_fields. This prevents a memory exhaustion DOS
719741 # attack via post bodies with many fields.
720742 if max_num_fields is not None :
721- num_fields = 1 + qs .count ('&' ) + qs . count ( ';' )
743+ num_fields = 1 + qs .count (separator )
722744 if max_num_fields < num_fields :
723745 raise ValueError ('Max number of fields exceeded' )
724746
725- pairs = [s2 for s1 in qs .split ('&' ) for s2 in s1 . split ( ';' )]
747+ pairs = [s1 for s1 in qs .split (separator )]
726748 r = []
727749 for name_value in pairs :
728750 if not name_value and not strict_parsing :
0 commit comments