1+ '''
2+ Created on 12 Jan 2012
3+
4+ @author: rwilkinson
5+ '''
6+ import base64
7+ import socket
8+ import urlparse
9+ from urllib import unquote , addinfourl
10+ from urllib2 import _parse_proxy , URLError , HTTPError
11+ from urllib2 import (AbstractHTTPHandler as _AbstractHTTPHandler ,
12+ BaseHandler as _BaseHandler ,
13+ HTTPRedirectHandler as _HTTPRedirectHandler ,
14+ Request as _Request ,
15+ OpenerDirector as _OpenerDirector )
16+
17+ from ndg .httpsclient .httplib_proxy import HTTPConnection
18+
19+
20+ class Request (_Request ):
21+
22+ def __init__ (self , * args , ** kw ):
23+ _Request .__init__ (self , * args , ** kw )
24+ self ._tunnel_host = None
25+
26+ def set_proxy (self , host , type ):
27+ if self .type == 'https' and not self ._tunnel_host :
28+ self ._tunnel_host = self .host
29+ else :
30+ self .type = type
31+ self .__r_host = self .__original
32+ self .host = host
33+
34+
35+ class BaseHandler (_BaseHandler ):
36+ def proxy_open (self , req , proxy , type ):
37+ if req .get_type () == 'https' :
38+ orig_type = req .get_type ()
39+ proxy_type , user , password , hostport = _parse_proxy (proxy )
40+ if proxy_type is None :
41+ proxy_type = orig_type
42+ if user and password :
43+ user_pass = '%s:%s' % (unquote (user ), unquote (password ))
44+ creds = base64 .b64encode (user_pass ).strip ()
45+ req .add_header ('Proxy-authorization' , 'Basic ' + creds )
46+ hostport = unquote (hostport )
47+ req .set_proxy (hostport , proxy_type )
48+ # let other handlers take care of it
49+ return None
50+ else :
51+ return _BaseHandler .proxy_open (self , req , proxy , type )
52+
53+ class AbstractHTTPHandler (_AbstractHTTPHandler ):
54+ def do_open (self , http_class , req ):
55+ """Return an addinfourl object for the request, using http_class.
56+
57+ http_class must implement the HTTPConnection API from httplib.
58+ The addinfourl return value is a file-like object. It also
59+ has methods and attributes including:
60+ - info(): return a mimetools.Message object for the headers
61+ - geturl(): return the original request URL
62+ - code: HTTP status code
63+ """
64+ host = req .get_host ()
65+ if not host :
66+ raise URLError ('no host given' )
67+
68+ h = http_class (host , timeout = req .timeout ) # will parse host:port
69+ h .set_debuglevel (self ._debuglevel )
70+
71+ headers = dict (req .headers )
72+ headers .update (req .unredirected_hdrs )
73+ # We want to make an HTTP/1.1 request, but the addinfourl
74+ # class isn't prepared to deal with a persistent connection.
75+ # It will try to read all remaining data from the socket,
76+ # which will block while the server waits for the next request.
77+ # So make sure the connection gets closed after the (only)
78+ # request.
79+ headers ["Connection" ] = "close"
80+ headers = dict (
81+ (name .title (), val ) for name , val in headers .items ())
82+
83+ if not hasattr (req , '_tunnel_host' ):
84+ pass
85+
86+ if req ._tunnel_host :
87+ h .set_tunnel (req ._tunnel_host )
88+ try :
89+ h .request (req .get_method (), req .get_selector (), req .data , headers )
90+ r = h .getresponse ()
91+ except socket .error , err : # XXX what error?
92+ raise URLError (err )
93+
94+ # Pick apart the HTTPResponse object to get the addinfourl
95+ # object initialized properly.
96+
97+ # Wrap the HTTPResponse object in socket's file object adapter
98+ # for Windows. That adapter calls recv(), so delegate recv()
99+ # to read(). This weird wrapping allows the returned object to
100+ # have readline() and readlines() methods.
101+
102+ # XXX It might be better to extract the read buffering code
103+ # out of socket._fileobject() and into a base class.
104+
105+ r .recv = r .read
106+ fp = socket ._fileobject (r , close = True )
107+
108+ resp = addinfourl (fp , r .msg , req .get_full_url ())
109+ resp .code = r .status
110+ resp .msg = r .reason
111+ return resp
112+
113+
114+ class HTTPHandler (AbstractHTTPHandler ):
115+
116+ def http_open (self , req ):
117+ return self .do_open (HTTPConnection , req )
118+
119+ http_request = AbstractHTTPHandler .do_request_
120+
121+ #if hasattr(httplib, 'HTTPS'):
122+ # class HTTPSHandler(AbstractHTTPHandler):
123+ #
124+ # def https_open(self, req):
125+ # return self.do_open(httplib.HTTPSConnection, req)
126+ #
127+ # https_request = AbstractHTTPHandler.do_request_
128+
129+
130+ class HTTPRedirectHandler (BaseHandler ):
131+ # maximum number of redirections to any single URL
132+ # this is needed because of the state that cookies introduce
133+ max_repeats = 4
134+ # maximum total number of redirections (regardless of URL) before
135+ # assuming we're in a loop
136+ max_redirections = 10
137+
138+ def redirect_request (self , req , fp , code , msg , headers , newurl ):
139+ """Return a Request or None in response to a redirect.
140+
141+ This is called by the http_error_30x methods when a
142+ redirection response is received. If a redirection should
143+ take place, return a new Request to allow http_error_30x to
144+ perform the redirect. Otherwise, raise HTTPError if no-one
145+ else should try to handle this url. Return None if you can't
146+ but another Handler might.
147+ """
148+ m = req .get_method ()
149+ if (code in (301 , 302 , 303 , 307 ) and m in ("GET" , "HEAD" )
150+ or code in (301 , 302 , 303 ) and m == "POST" ):
151+ # Strictly (according to RFC 2616), 301 or 302 in response
152+ # to a POST MUST NOT cause a redirection without confirmation
153+ # from the user (of urllib2, in this case). In practice,
154+ # essentially all clients do redirect in this case, so we
155+ # do the same.
156+ # be conciliant with URIs containing a space
157+ newurl = newurl .replace (' ' , '%20' )
158+ newheaders = dict ((k ,v ) for k ,v in req .headers .items ()
159+ if k .lower () not in ("content-length" , "content-type" )
160+ )
161+ return Request (newurl ,
162+ headers = newheaders ,
163+ origin_req_host = req .get_origin_req_host (),
164+ unverifiable = True )
165+ else :
166+ raise HTTPError (req .get_full_url (), code , msg , headers , fp )
167+
168+ # Implementation note: To avoid the server sending us into an
169+ # infinite loop, the request object needs to track what URLs we
170+ # have already seen. Do this by adding a handler-specific
171+ # attribute to the Request object.
172+ def http_error_302 (self , req , fp , code , msg , headers ):
173+ # Some servers (incorrectly) return multiple Location headers
174+ # (so probably same goes for URI). Use first header.
175+ if 'location' in headers :
176+ newurl = headers .getheaders ('location' )[0 ]
177+ elif 'uri' in headers :
178+ newurl = headers .getheaders ('uri' )[0 ]
179+ else :
180+ return
181+
182+ # fix a possible malformed URL
183+ urlparts = urlparse .urlparse (newurl )
184+ if not urlparts .path :
185+ urlparts = list (urlparts )
186+ urlparts [2 ] = "/"
187+ newurl = urlparse .urlunparse (urlparts )
188+
189+ newurl = urlparse .urljoin (req .get_full_url (), newurl )
190+
191+ # For security reasons we do not allow redirects to protocols
192+ # other than HTTP, HTTPS or FTP.
193+ newurl_lower = newurl .lower ()
194+ if not (newurl_lower .startswith ('http://' ) or
195+ newurl_lower .startswith ('https://' ) or
196+ newurl_lower .startswith ('ftp://' )):
197+ raise HTTPError (newurl , code ,
198+ msg + " - Redirection to url '%s' is not allowed" %
199+ newurl ,
200+ headers , fp )
201+
202+ # XXX Probably want to forget about the state of the current
203+ # request, although that might interact poorly with other
204+ # handlers that also use handler-specific request attributes
205+ new = self .redirect_request (req , fp , code , msg , headers , newurl )
206+ if new is None :
207+ return
208+
209+ # loop detection
210+ # .redirect_dict has a key url if url was previously visited.
211+ if hasattr (req , 'redirect_dict' ):
212+ visited = new .redirect_dict = req .redirect_dict
213+ if (visited .get (newurl , 0 ) >= self .max_repeats or
214+ len (visited ) >= self .max_redirections ):
215+ raise HTTPError (req .get_full_url (), code ,
216+ self .inf_msg + msg , headers , fp )
217+ else :
218+ visited = new .redirect_dict = req .redirect_dict = {}
219+ visited [newurl ] = visited .get (newurl , 0 ) + 1
220+
221+ # Don't close the fp until we are sure that we won't use it
222+ # with HTTPError.
223+ fp .read ()
224+ fp .close ()
225+
226+ return self .parent .open (new , timeout = req .timeout )
227+
228+ http_error_301 = http_error_303 = http_error_307 = http_error_302
229+
230+ inf_msg = "The HTTP server returned a redirect error that would " \
231+ "lead to an infinite loop.\n " \
232+ "The last 30x error message was:\n "
233+
234+
235+ class OpenerDirector (_OpenerDirector ):
236+ def open (self , fullurl , data = None , timeout = socket ._GLOBAL_DEFAULT_TIMEOUT ):
237+ # accept a URL or a Request object
238+ if isinstance (fullurl , basestring ):
239+ req = Request (fullurl , data )
240+ else :
241+ req = fullurl
242+ if data is not None :
243+ req .add_data (data )
244+
245+ req .timeout = timeout
246+ protocol = req .get_type ()
247+
248+ # pre-process request
249+ meth_name = protocol + "_request"
250+ for processor in self .process_request .get (protocol , []):
251+ meth = getattr (processor , meth_name )
252+ req = meth (req )
253+
254+ response = self ._open (req , data )
255+
256+ # post-process response
257+ meth_name = protocol + "_response"
258+ for processor in self .process_response .get (protocol , []):
259+ meth = getattr (processor , meth_name )
260+ response = meth (req , response )
261+
262+ return response
0 commit comments