|
| 1 | +# _*_ coding: utf-8 _*_ |
| 2 | + |
| 3 | +""" |
| 4 | +python_requests.py by xianhu |
| 5 | +""" |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | +# 尝试获取某个网页, 这里的r是一个Response对象 |
| 10 | +r = requests.get("https://github.com/timeline.json") |
| 11 | +print(type(r)) |
| 12 | + |
| 13 | +# Requests简便的API意味着所有HTTP请求类型都是显而易见的 |
| 14 | +r1 = requests.post("http://httpbin.org/post") |
| 15 | +r2 = requests.put("http://httpbin.org/put") |
| 16 | +r3 = requests.delete("http://httpbin.org/delete") |
| 17 | +r4 = requests.head("http://httpbin.org/get") |
| 18 | +r5 = requests.options("http://httpbin.org/get") |
| 19 | + |
| 20 | +# 传递URL参数: 字典 |
| 21 | +payload = {"key1": "value1", "key2": "value2"} |
| 22 | +r = requests.get("http://httpbin.org/get", params=payload) |
| 23 | +print(r.url) # http://httpbin.org/get?key2=value2&key1=value1 |
| 24 | + |
| 25 | +# 传递URL参数: 字典里带有列表 |
| 26 | +payload = {"key1": "value1", "key2": ["value2", "value3"]} |
| 27 | +r = requests.get("http://httpbin.org/get", params=payload) |
| 28 | +print(r.url) # http://httpbin.org/get?key1=value1&key2=value2&key2=value3 |
| 29 | + |
| 30 | +# 获取正常内容 |
| 31 | +r = requests.get("https://github.com/timeline.json") |
| 32 | +print(r.text) # "[{"repository":{"open_issues":0,"url":"https://github.com/... |
| 33 | +print(r.encoding) # "utf-8" |
| 34 | + |
| 35 | +# Requests会自动解码来自服务器的内容(基于HTTP头部对响应的编码作出有根据的推测), 或者你自己更改 |
| 36 | +r.encoding = "ISO-8859-1" |
| 37 | +print(r.text) # 此时使用新的r.encoding新值 |
| 38 | + |
| 39 | +# 二进制响应内容, Requests会自动为你解码 gzip 和 deflate 传输编码的响应数据 |
| 40 | +print(r.content) # b"[{"repository":{"open_issues":0,"url":"https://github.com/... |
| 41 | + |
| 42 | +# JSON 响应内容, Requests中也有一个内置的JSON解码器 |
| 43 | +print(r.json()) # [{u"repository": {u"open_issues": 0, u"url": "https://github.com/... |
| 44 | + |
| 45 | +# 原始响应内容: 获取来自服务器的原始套接字响应 |
| 46 | +r = requests.get("https://github.com/timeline.json", stream=True) |
| 47 | +print(r.raw) # <requests.packages.urllib3.response.HTTPResponse object at 0x101194810> |
| 48 | +print(r.raw.read(10)) # "\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03" |
| 49 | + |
| 50 | +# 但一般情况下, 应该以下面的模式将文本流保存到文件 |
| 51 | +with open("test", "wb") as fd: |
| 52 | + for chunk in r.iter_content(chunk_size=256): |
| 53 | + fd.write(chunk) |
| 54 | + |
| 55 | +# 定制请求头: 一个字典 |
| 56 | +headers = {"user-agent": "my-app/0.0.1"} |
| 57 | +r = requests.get("https://api.github.com/some/endpoint", headers=headers) |
| 58 | +print(r.request.headers) # 获取该请求的头部 |
| 59 | + |
| 60 | +# 更加复杂的POST请求: 表单 |
| 61 | +payload = {"key1": "value1", "key2": "value2"} |
| 62 | +r = requests.post("http://httpbin.org/post", data=payload) |
| 63 | +print(r.text) |
| 64 | + |
| 65 | +# POST一个多部分编码(Multipart-Encoded)的文件 |
| 66 | +files = {"file": open("report.xls", "rb")} |
| 67 | +r = requests.post("http://httpbin.org/post", files=files) |
| 68 | +print(r.text) |
| 69 | + |
| 70 | +# 你可以显式地设置文件名, 文件类型和请求头 |
| 71 | +files = {"file": ("report.xls", open("report.xls", "rb"), "application/vnd.ms-excel", {"Expires": "0"})} |
| 72 | +r = requests.post("http://httpbin.org/post", files=files) |
| 73 | +print(r.text) |
| 74 | + |
| 75 | +# 你也可以发送作为文件来接收的字符串 |
| 76 | +files = {"file": ("report.csv", "some,data,to,send\nanother,row,to,send\n")} |
| 77 | +r = requests.post("http://httpbin.org/post", files=files) |
| 78 | +print(r.text) |
| 79 | + |
| 80 | +# 响应状态码 |
| 81 | +r = requests.get("http://httpbin.org/get") |
| 82 | +print(r.status_code) # 200 |
| 83 | +print(r.status_code == requests.codes.ok) # True 响应状态码查询 |
| 84 | + |
| 85 | +# 如果发送了一个错误请求(4XX客户端错误, 或5XX服务器错误响应), 可以通过 Response.raise_for_status() 来抛出异常: |
| 86 | +bad_r = requests.get("http://httpbin.org/status/404") |
| 87 | +print(bad_r.status_code) # 404 |
| 88 | +bad_r.raise_for_status() |
| 89 | +# Traceback (most recent call last): |
| 90 | +# File "requests/models.py", line 832, in raise_for_status |
| 91 | +# raise http_error |
| 92 | +# requests.exceptions.HTTPError: 404 Client Error |
| 93 | + |
| 94 | +# 响应头, 一个Python字典形式展示的服务器响应头, HTTP头部是大小写不敏感的 |
| 95 | +print(r.headers) |
| 96 | +# { |
| 97 | +# "content-encoding": "gzip", |
| 98 | +# "transfer-encoding": "chunked", |
| 99 | +# "connection": "close", |
| 100 | +# "server": "nginx/1.0.4", |
| 101 | +# "x-runtime": "148ms", |
| 102 | +# "etag": "e1ca502697e5c9317743dc078f67693f", |
| 103 | +# "content-type": "application/json" |
| 104 | +# } |
| 105 | +print(r.headers["Content-Type"]) # "application/json" |
| 106 | +print(r.headers.get("content-type")) # "application/json" |
| 107 | + |
| 108 | +# Cookie: 如果某个响应中包含一些 cookie |
| 109 | +r = requests.get("http://example.com/some/cookie/setting/url") |
| 110 | +print(r.cookies["example_cookie_name"]) # "example_cookie_value" |
| 111 | + |
| 112 | +# 要想发送你的cookies到服务器, 可以使用cookies参数, 一个字典 |
| 113 | +cookies = dict(cookies_are="working") |
| 114 | +r = requests.get("http://httpbin.org/cookies", cookies=cookies) |
| 115 | +print(r.text) |
| 116 | + |
| 117 | +# 会话对象: 会话对象让你能够跨请求保持某些参数, 它也会在同一个 Session 实例发出的所有请求之间保持cookie |
| 118 | +s = requests.Session() |
| 119 | +s.get("http://httpbin.org/cookies/set/sessioncookie/123456789") |
| 120 | +r = s.get("http://httpbin.org/cookies") |
| 121 | +print(r.text) # '{"cookies": {"sessioncookie": "123456789"}}' |
| 122 | + |
| 123 | +# 会话也可用来为请求方法提供缺省数据, 这是通过为会话对象的属性提供数据来实现的 |
| 124 | +s = requests.Session() |
| 125 | +s.auth = ("user", "pass") |
| 126 | +s.headers.update({"x-test": "true"}) |
| 127 | +s.get("http://httpbin.org/headers", headers={"x-test2": "true"}) # both "x-test" and "x-test2" are sent |
| 128 | + |
| 129 | +# 不过需要注意, 就算使用了会话, 方法级别的参数也不会被跨请求保持 |
| 130 | +# 下面的例子只会和第一个请求发送cookie, 而非第二个 |
| 131 | +s = requests.Session() |
| 132 | +r = s.get("http://httpbin.org/cookies", cookies={"from-my": "browser"}) |
| 133 | +print(r.text) # '{"cookies": {"from-my": "browser"}}' |
| 134 | +r = s.get("http://httpbin.org/cookies") |
| 135 | +print(r.text) # '{"cookies": {}}' |
| 136 | +# 如果你要手动为会话添加 cookie, 就是用 Cookie utility 函数来操纵Session.cookies |
| 137 | + |
| 138 | +# 会话还可以用作前后文管理器 |
| 139 | +with requests.Session() as s: |
| 140 | + s.get("http://httpbin.org/cookies/set/sessioncookie/123456789") |
| 141 | + |
| 142 | +# 重定向与请求历史, 默认情况下, 除了HEAD, Requests会自动处理所有重定向 |
| 143 | +# 可以使用响应对象的history方法来追踪重定向 |
| 144 | +# Response.history 是一个 Response 对象的列表, 为了完成请求而创建了这些对象. 这个对象列表按照从最老到最近的请求进行排序 |
| 145 | +r = requests.get("http://github.com") |
| 146 | +print(r.status_code) # 200 |
| 147 | +print(r.history) # [<Response [301]>] |
| 148 | + |
| 149 | +# 如果你使用的是GET、OPTIONS、POST、PUT、PATCH 或者 DELETE, 那么你可以通过 allow_redirects 参数禁用重定向处理 |
| 150 | +r = requests.get("http://github.com", allow_redirects=False) |
| 151 | +print(r.status_code) # 301 |
| 152 | +print(r.history) # [] |
| 153 | + |
| 154 | +# 如果你使用了HEAD, 你也可以启用重定向 |
| 155 | +r = requests.head("http://github.com", allow_redirects=True) |
| 156 | +print(r.history) # [<Response [301]>] |
| 157 | + |
| 158 | +# 超时, 设置timeout参数 |
| 159 | +requests.get("http://github.com", timeout=0.001) |
| 160 | +# Traceback (most recent call last): |
| 161 | +# File "<stdin>", line 1, in <module> |
| 162 | +# requests.exceptions.Timeout: HTTPConnectionPool(host="github.com", port=80): Request timed out. (timeout=0.001) |
| 163 | + |
| 164 | +# 注意: timeout仅对连接过程有效, 与响应体的下载无关 |
| 165 | +# timeout并不是整个下载响应的时间限制, 而是如果服务器在timeout秒内没有应答, 将会引发一个异常 |
| 166 | +# 更精确地说, 是在 timeout 秒内没有从基础套接字上接收到任何字节的数据时 |
| 167 | +requests.get("https://github.com", timeout=5) |
| 168 | +# 这一 timeout 值将会用作 connect 和 read 二者的 timeout |
| 169 | +# 如果要分别制定, 就传入一个元组 |
| 170 | +requests.get("https://github.com", timeout=(3.05, 27)) |
| 171 | + |
| 172 | +# 错误与异常: 遇到网络问题(如: DNS 查询失败、拒绝连接等)时, Requests 会抛出一个 ConnectionError 异常 |
| 173 | +# 如果 HTTP 请求返回了不成功的状态码, Response.raise_for_status() 会抛出一个 HTTPError 异常 |
| 174 | +# 若请求超时, 则抛出一个 Timeout 异常 |
| 175 | +# 若请求超过了设定的最大重定向次数, 则会抛出一个 TooManyRedirects 异常 |
| 176 | +# 所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException |
| 177 | + |
| 178 | +# SSL证书验证, verify设置为True表示检查证书, 设置为False表示忽略证书 |
| 179 | +requests.get("https://kennethreitz.com", verify=True) # 未设置SSL证书, 抛出异常 |
| 180 | +# requests.exceptions.SSLError: hostname "kennethreitz.com" doesn"t match either of "*.herokuapp.com", "herokuapp.com" |
| 181 | +requests.get("https://github.com", verify=True) # <Response [200]>, 已设置SSL证书 |
| 182 | +# 对于私有证书,你也可以传递一个 CA_BUNDLE 文件的路径给 verify |
| 183 | + |
| 184 | +# 你也可以指定一个本地证书用作客户端证书, 可以是单个文件(包含密钥和证书)或一个包含两个文件路径的元组: |
| 185 | +requests.get("https://kennethreitz.com", cert=("/path/server.crt", "/path/key")) |
| 186 | +requests.get("https://kennethreitz.com", cert="/wrong_path/server.pem") |
| 187 | +# SSLError: [Errno 336265225] _ssl.c:347: error:140B0009:SSL routines:SSL_CTX_use_PrivateKey_file:PEM lib |
| 188 | +# 警告: 本地证书的私有 key 必须是解密状态. 目前Requests不支持使用加密的 key |
| 189 | + |
| 190 | +# 流式上传, 允许你发送大的数据流或文件而无需先把它们读入内存 |
| 191 | +with open("massive-body") as f: |
| 192 | + requests.post("http://some.url/streamed", data=f) |
| 193 | + |
| 194 | + |
| 195 | +# 事件挂钩, 可用的钩子: response(从一个请求产生的响应) |
| 196 | +# 你可以通过传递一个 {hook_name: callback_function} 字典给 hooks 请求参数为每个请求分配一个钩子函数 |
| 197 | +def print_url(resp): |
| 198 | + print(resp.url) |
| 199 | + return |
| 200 | +requests.get("http://httpbin.org", hooks=dict(response=print_url)) |
| 201 | + |
| 202 | +# 代理 |
| 203 | +proxies = { |
| 204 | + "http": "http://10.10.1.10:3128", |
| 205 | + "https": "http://10.10.1.10:1080", |
| 206 | +} |
| 207 | +requests.get("http://example.org", proxies=proxies) |
| 208 | +# 若代理需要使用HTTP Basic Auth, 可以使用http://user:password@host:port/, 比如"http": "http://user:pass@10.10.1.10:3128/" |
| 209 | + |
| 210 | +# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理 |
| 211 | +# $ pip install requests[socks] |
| 212 | +proxies = { |
| 213 | + "http": "socks5://user:pass@host:port", |
| 214 | + "https": "socks5://user:pass@host:port" |
| 215 | +} |
| 216 | +requests.get("http://example.org", proxies=proxies) |
0 commit comments