1+ #!/usr/bin/env python
2+
3+ """
4+ HTTP Link Header Parsing
5+
6+ Simple routines to parse and manipulate Link headers.
7+ """
8+
9+ __license__ = """
10+ Copyright (c) 2009 Mark Nottingham
11+
12+ Permission is hereby granted, free of charge, to any person obtaining a copy
13+ of this software and associated documentation files (the "Software"), to deal
14+ in the Software without restriction, including without limitation the rights
15+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16+ copies of the Software, and to permit persons to whom the Software is
17+ furnished to do so, subject to the following conditions:
18+
19+ The above copyright notice and this permission notice shall be included in
20+ all copies or substantial portions of the Software.
21+
22+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28+ THE SOFTWARE.
29+ """
30+
31+ import re
32+
33+ TOKEN = r'(?:[^\(\)<>@,;:\\"/\[\]\?={} \t]+?)'
34+ QUOTED_STRING = r'(?:"(?:\\"|[^"])*")'
35+ PARAMETER = r'(?:%(TOKEN)s(?:=(?:%(TOKEN)s|%(QUOTED_STRING)s))?)' % locals ()
36+ LINK = r'<[^>]*>\s*(?:;\s*%(PARAMETER)s?\s*)*' % locals ()
37+ COMMA = r'(?:\s*(?:,\s*)+)'
38+ LINK_SPLIT = r'%s(?=%s|\s*$)' % (LINK , COMMA )
39+
40+ def _unquotestring (instr ):
41+ if instr [0 ] == instr [- 1 ] == '"' :
42+ instr = instr [1 :- 1 ]
43+ instr = re .sub (r'\\(.)' , r'\1' , instr )
44+ return instr
45+ def _splitstring (instr , item , split ):
46+ if not instr :
47+ return []
48+ return [ h .strip () for h in re .findall (r'%s(?=%s|\s*$)' % (item , split ), instr )]
49+
50+ link_splitter = re .compile (LINK_SPLIT )
51+
52+ def parse_link_value (instr ):
53+ """
54+ Given a link-value (i.e., after separating the header-value on commas),
55+ return a dictionary whose keys are link URLs and values are dictionaries
56+ of the parameters for their associated links.
57+
58+ Note that internationalised parameters (e.g., title*) are
59+ NOT percent-decoded.
60+
61+ Also, only the last instance of a given parameter will be included.
62+
63+ For example,
64+
65+ >>> parse_link_value('</foo>; rel="self"; title*=utf-8\' de\' letztes%20Kapitel')
66+ {'/foo': {'title*': "utf-8'de'letztes%20Kapitel", 'rel': 'self'}}
67+
68+ """
69+ out = {}
70+ if not instr :
71+ return out
72+ for link in [h .strip () for h in link_splitter .findall (instr )]:
73+ url , params = link .split (">" , 1 )
74+ url = url [1 :]
75+ param_dict = {}
76+ for param in _splitstring (params , PARAMETER , "\s*;\s*" ):
77+ try :
78+ a , v = param .split ("=" , 1 )
79+ param_dict [a .lower ()] = _unquotestring (v )
80+ except ValueError :
81+ param_dict [param .lower ()] = None
82+ out [url ] = param_dict
83+ return out
84+
85+
86+ if __name__ == '__main__' :
87+ import sys
88+ if len (sys .argv ) > 1 :
89+ print parse_link_value (sys .argv [1 ])
0 commit comments