Skip to content

Commit 8018eaf

Browse files
author
pjkersha
committed
* More tidying ready for release
git-svn-id: http://proj.badc.rl.ac.uk/svn/ndg-security/trunk/ndg_httpsclient@7988 051b1e3e-aa0c-0410-b6c2-bfbade6052be
1 parent 87b8985 commit 8018eaf

9 files changed

Lines changed: 331 additions & 15 deletions

File tree

.pydevproject

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
<?eclipse-pydev version="1.0"?>
33

44
<pydev_project>
5-
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
6-
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
5+
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">ndg-httpsclient-py2.7</pydev_property>
6+
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
77
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
88
<path>/ndg_httpsclient</path>
99
</pydev_pathproperty>

MANIFEST.in

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#
2+
# MANIFEST.in file to enable inclusion of unit test data files and config
3+
#
4+
# NDG HTTPS Client Package
5+
#
6+
# P J Kershaw 17/01/12
7+
#
8+
# Copyright (C) 2012 STFC
9+
#
10+
# Licence: BSD - See LICENCE file for details
11+
recursive-include ndg/ *.crt *.key *.pem README

ndg/httpsclient/https.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""urllib2pyopenssl HTTPS module containing PyOpenSSL implementation of
1+
"""ndg_httpsclient HTTPS module containing PyOpenSSL implementation of
22
httplib.HTTPSConnection
33
44
PyOpenSSL utility to make a httplib-like interface suitable for use with
@@ -12,8 +12,15 @@
1212
__revision__ = '$Id$'
1313
import logging
1414
import socket
15-
from httplib import HTTPConnection, HTTPS_PORT
16-
from urllib2 import AbstractHTTPHandler
15+
import sys
16+
from httplib import HTTPS_PORT
17+
if sys.version_info < (2, 6, 2):
18+
from ndg.httpsclient.httplib_proxy import HTTPConnection
19+
from ndg.httpsclient.urllib2_proxy import AbstractHTTPHandler
20+
else:
21+
from httplib import HTTPConnection
22+
from urllib2 import AbstractHTTPHandler
23+
1724

1825
from OpenSSL import SSL
1926

@@ -49,15 +56,23 @@ def connect(self):
4956
"""Create SSL socket and connect to peer
5057
"""
5158
if getattr(self, 'ssl_context', None):
59+
if not isinstance(self.ssl_context, SSL.Context):
60+
raise TypeError('Expecting OpenSSL.SSL.Context type for "'
61+
'ssl_context" keyword; got %r instead' %
62+
self.ssl_context)
5263
ssl_context = self.ssl_context
5364
else:
5465
ssl_context = SSL.Context(self.__class__.default_ssl_method)
5566

5667
sock = socket.create_connection((self.host, self.port), self.timeout)
68+
69+
# Tunnel if using a proxy - ONLY available for Python 2.6.2 and above
5770
if getattr(self, '_tunnel_host', None):
5871
self.sock = sock
5972
self._tunnel()
73+
6074
self.sock = SSLSocket(ssl_context, sock)
75+
6176
# Go to client mode.
6277
self.sock.set_connect_state()
6378

@@ -82,6 +97,10 @@ def __init__(self, ssl_context, debuglevel=0):
8297
AbstractHTTPHandler.__init__(self, debuglevel)
8398

8499
if ssl_context is not None:
100+
if not isinstance(ssl_context, SSL.Context):
101+
raise TypeError('Expecting OpenSSL.SSL.Context type for "'
102+
'ssl_context" keyword; got %r instead' %
103+
ssl_context)
85104
self.ssl_context = ssl_context
86105
else:
87106
self.ssl_context = SSL.Context(SSL.SSLv23_METHOD)

ndg/httpsclient/test/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""unit tests package for urllib2pyopenssl
1+
"""unit tests package for ndg_httpsclient
22
33
PyOpenSSL utility to make a httplib-like interface suitable for use with
44
urllib2
@@ -9,9 +9,20 @@
99
__license__ = "BSD - see LICENSE file in top-level directory"
1010
__contact__ = "Philip.Kershaw@stfc.ac.uk"
1111
__revision__ = '$Id$'
12+
import os
13+
import unittest
14+
1215
class Constants(object):
16+
'''Convenience base class from which other unit tests can extend. Its
17+
sets the generic data directory path'''
1318
PORT = 4443
1419
PORT2 = 4444
1520
HOSTNAME = 'localhost'
1621
TEST_URI = 'https://%s:%d' % (HOSTNAME, PORT)
17-
TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2)
22+
TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2)
23+
24+
UNITTEST_DIR = os.path.dirname(os.path.abspath(__path__))
25+
SSL_CERT_FILENAME = 'localhost.crt'
26+
SSL_CERT_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_CERT_FILENAME)
27+
SSL_PRIKEY_FILENAME = 'localhost.key'
28+
SSL_PRIKEY_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_PRIKEY_FILENAME)

ndg/httpsclient/urllib2_build_opener.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,17 @@
77
__license__ = "BSD - see LICENSE file in top-level directory"
88
__contact__ = "Philip.Kershaw@stfc.ac.uk"
99
__revision__ = '$Id: pyopenssl.py 7929 2011-08-16 16:39:13Z pjkersha $'
10-
1110
import logging
12-
from urllib2 import (OpenerDirector, ProxyHandler, UnknownHandler, HTTPHandler,
13-
HTTPDefaultErrorHandler, HTTPRedirectHandler,
11+
from urllib2 import (ProxyHandler, UnknownHandler, HTTPDefaultErrorHandler,
1412
FTPHandler, FileHandler, HTTPErrorProcessor)
1513

14+
import sys
15+
if sys.version_info < (2, 6, 2):
16+
from ndg.httpsclient.urllib2_proxy import (HTTPHandler, OpenerDirector,
17+
HTTPRedirectHandler)
18+
else:
19+
from urllib2 import HTTPHandler, OpenerDirector, HTTPRedirectHandler
20+
1621
from ndg.httpsclient.https import HTTPSContextHandler
1722

1823
log = logging.getLogger(__name__)

ndg/httpsclient/urllib2_proxy.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
'''
2+
Created on 12 Jan 2012
3+
4+
@author: rwilkinson
5+
'''
6+
import base64
7+
import socket
8+
import urlparse
9+
from urllib import unquote, addinfourl
10+
from urllib2 import _parse_proxy, URLError, HTTPError
11+
from urllib2 import (AbstractHTTPHandler as _AbstractHTTPHandler,
12+
BaseHandler as _BaseHandler,
13+
HTTPRedirectHandler as _HTTPRedirectHandler,
14+
Request as _Request,
15+
OpenerDirector as _OpenerDirector)
16+
17+
from ndg.httpsclient.httplib_proxy import HTTPConnection
18+
19+
20+
class Request(_Request):
21+
22+
def __init__(self, *args, **kw):
23+
_Request.__init__(self, *args, **kw)
24+
self._tunnel_host = None
25+
26+
def set_proxy(self, host, type):
27+
if self.type == 'https' and not self._tunnel_host:
28+
self._tunnel_host = self.host
29+
else:
30+
self.type = type
31+
self.__r_host = self.__original
32+
self.host = host
33+
34+
35+
class BaseHandler(_BaseHandler):
36+
def proxy_open(self, req, proxy, type):
37+
if req.get_type() == 'https':
38+
orig_type = req.get_type()
39+
proxy_type, user, password, hostport = _parse_proxy(proxy)
40+
if proxy_type is None:
41+
proxy_type = orig_type
42+
if user and password:
43+
user_pass = '%s:%s' % (unquote(user), unquote(password))
44+
creds = base64.b64encode(user_pass).strip()
45+
req.add_header('Proxy-authorization', 'Basic ' + creds)
46+
hostport = unquote(hostport)
47+
req.set_proxy(hostport, proxy_type)
48+
# let other handlers take care of it
49+
return None
50+
else:
51+
return _BaseHandler.proxy_open(self, req, proxy, type)
52+
53+
class AbstractHTTPHandler(_AbstractHTTPHandler):
54+
def do_open(self, http_class, req):
55+
"""Return an addinfourl object for the request, using http_class.
56+
57+
http_class must implement the HTTPConnection API from httplib.
58+
The addinfourl return value is a file-like object. It also
59+
has methods and attributes including:
60+
- info(): return a mimetools.Message object for the headers
61+
- geturl(): return the original request URL
62+
- code: HTTP status code
63+
"""
64+
host = req.get_host()
65+
if not host:
66+
raise URLError('no host given')
67+
68+
h = http_class(host, timeout=req.timeout) # will parse host:port
69+
h.set_debuglevel(self._debuglevel)
70+
71+
headers = dict(req.headers)
72+
headers.update(req.unredirected_hdrs)
73+
# We want to make an HTTP/1.1 request, but the addinfourl
74+
# class isn't prepared to deal with a persistent connection.
75+
# It will try to read all remaining data from the socket,
76+
# which will block while the server waits for the next request.
77+
# So make sure the connection gets closed after the (only)
78+
# request.
79+
headers["Connection"] = "close"
80+
headers = dict(
81+
(name.title(), val) for name, val in headers.items())
82+
83+
if not hasattr(req, '_tunnel_host'):
84+
pass
85+
86+
if req._tunnel_host:
87+
h.set_tunnel(req._tunnel_host)
88+
try:
89+
h.request(req.get_method(), req.get_selector(), req.data, headers)
90+
r = h.getresponse()
91+
except socket.error, err: # XXX what error?
92+
raise URLError(err)
93+
94+
# Pick apart the HTTPResponse object to get the addinfourl
95+
# object initialized properly.
96+
97+
# Wrap the HTTPResponse object in socket's file object adapter
98+
# for Windows. That adapter calls recv(), so delegate recv()
99+
# to read(). This weird wrapping allows the returned object to
100+
# have readline() and readlines() methods.
101+
102+
# XXX It might be better to extract the read buffering code
103+
# out of socket._fileobject() and into a base class.
104+
105+
r.recv = r.read
106+
fp = socket._fileobject(r, close=True)
107+
108+
resp = addinfourl(fp, r.msg, req.get_full_url())
109+
resp.code = r.status
110+
resp.msg = r.reason
111+
return resp
112+
113+
114+
class HTTPHandler(AbstractHTTPHandler):
115+
116+
def http_open(self, req):
117+
return self.do_open(HTTPConnection, req)
118+
119+
http_request = AbstractHTTPHandler.do_request_
120+
121+
#if hasattr(httplib, 'HTTPS'):
122+
# class HTTPSHandler(AbstractHTTPHandler):
123+
#
124+
# def https_open(self, req):
125+
# return self.do_open(httplib.HTTPSConnection, req)
126+
#
127+
# https_request = AbstractHTTPHandler.do_request_
128+
129+
130+
class HTTPRedirectHandler(BaseHandler):
131+
# maximum number of redirections to any single URL
132+
# this is needed because of the state that cookies introduce
133+
max_repeats = 4
134+
# maximum total number of redirections (regardless of URL) before
135+
# assuming we're in a loop
136+
max_redirections = 10
137+
138+
def redirect_request(self, req, fp, code, msg, headers, newurl):
139+
"""Return a Request or None in response to a redirect.
140+
141+
This is called by the http_error_30x methods when a
142+
redirection response is received. If a redirection should
143+
take place, return a new Request to allow http_error_30x to
144+
perform the redirect. Otherwise, raise HTTPError if no-one
145+
else should try to handle this url. Return None if you can't
146+
but another Handler might.
147+
"""
148+
m = req.get_method()
149+
if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
150+
or code in (301, 302, 303) and m == "POST"):
151+
# Strictly (according to RFC 2616), 301 or 302 in response
152+
# to a POST MUST NOT cause a redirection without confirmation
153+
# from the user (of urllib2, in this case). In practice,
154+
# essentially all clients do redirect in this case, so we
155+
# do the same.
156+
# be conciliant with URIs containing a space
157+
newurl = newurl.replace(' ', '%20')
158+
newheaders = dict((k,v) for k,v in req.headers.items()
159+
if k.lower() not in ("content-length", "content-type")
160+
)
161+
return Request(newurl,
162+
headers=newheaders,
163+
origin_req_host=req.get_origin_req_host(),
164+
unverifiable=True)
165+
else:
166+
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
167+
168+
# Implementation note: To avoid the server sending us into an
169+
# infinite loop, the request object needs to track what URLs we
170+
# have already seen. Do this by adding a handler-specific
171+
# attribute to the Request object.
172+
def http_error_302(self, req, fp, code, msg, headers):
173+
# Some servers (incorrectly) return multiple Location headers
174+
# (so probably same goes for URI). Use first header.
175+
if 'location' in headers:
176+
newurl = headers.getheaders('location')[0]
177+
elif 'uri' in headers:
178+
newurl = headers.getheaders('uri')[0]
179+
else:
180+
return
181+
182+
# fix a possible malformed URL
183+
urlparts = urlparse.urlparse(newurl)
184+
if not urlparts.path:
185+
urlparts = list(urlparts)
186+
urlparts[2] = "/"
187+
newurl = urlparse.urlunparse(urlparts)
188+
189+
newurl = urlparse.urljoin(req.get_full_url(), newurl)
190+
191+
# For security reasons we do not allow redirects to protocols
192+
# other than HTTP, HTTPS or FTP.
193+
newurl_lower = newurl.lower()
194+
if not (newurl_lower.startswith('http://') or
195+
newurl_lower.startswith('https://') or
196+
newurl_lower.startswith('ftp://')):
197+
raise HTTPError(newurl, code,
198+
msg + " - Redirection to url '%s' is not allowed" %
199+
newurl,
200+
headers, fp)
201+
202+
# XXX Probably want to forget about the state of the current
203+
# request, although that might interact poorly with other
204+
# handlers that also use handler-specific request attributes
205+
new = self.redirect_request(req, fp, code, msg, headers, newurl)
206+
if new is None:
207+
return
208+
209+
# loop detection
210+
# .redirect_dict has a key url if url was previously visited.
211+
if hasattr(req, 'redirect_dict'):
212+
visited = new.redirect_dict = req.redirect_dict
213+
if (visited.get(newurl, 0) >= self.max_repeats or
214+
len(visited) >= self.max_redirections):
215+
raise HTTPError(req.get_full_url(), code,
216+
self.inf_msg + msg, headers, fp)
217+
else:
218+
visited = new.redirect_dict = req.redirect_dict = {}
219+
visited[newurl] = visited.get(newurl, 0) + 1
220+
221+
# Don't close the fp until we are sure that we won't use it
222+
# with HTTPError.
223+
fp.read()
224+
fp.close()
225+
226+
return self.parent.open(new, timeout=req.timeout)
227+
228+
http_error_301 = http_error_303 = http_error_307 = http_error_302
229+
230+
inf_msg = "The HTTP server returned a redirect error that would " \
231+
"lead to an infinite loop.\n" \
232+
"The last 30x error message was:\n"
233+
234+
235+
class OpenerDirector(_OpenerDirector):
236+
def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
237+
# accept a URL or a Request object
238+
if isinstance(fullurl, basestring):
239+
req = Request(fullurl, data)
240+
else:
241+
req = fullurl
242+
if data is not None:
243+
req.add_data(data)
244+
245+
req.timeout = timeout
246+
protocol = req.get_type()
247+
248+
# pre-process request
249+
meth_name = protocol+"_request"
250+
for processor in self.process_request.get(protocol, []):
251+
meth = getattr(processor, meth_name)
252+
req = meth(req)
253+
254+
response = self._open(req, data)
255+
256+
# post-process response
257+
meth_name = protocol+"_response"
258+
for processor in self.process_response.get(protocol, []):
259+
meth = getattr(processor, meth_name)
260+
response = meth(req, response)
261+
262+
return response

0 commit comments

Comments
 (0)