-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathserpapi.py
More file actions
210 lines (174 loc) · 6.21 KB
/
serpapi.py
File metadata and controls
210 lines (174 loc) · 6.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""Client for SerpApi.com"""
import json
import urllib3
from .error import SerpApiException
from ._version import __version__
class HttpClient:
"""Simple HTTP client wrapper around urllib3"""
BACKEND = "https://serpapi.com"
SUPPORTED_DECODER = ["json", "html"]
def __init__(self, parameter: dict = None):
"""Initialize a SerpApi Client with the default parameters provided.
An instance of urllib3 will be created
where
timeout is 60s by default
retries is disabled by default
both properties can be override by the parameter.
"""
# initialize the http client
self.http = urllib3.PoolManager()
# initialize default client parameter
self.parameter = parameter
# urllib3 configurations
# HTTP connect timeout
if "timeout" in parameter:
self.timeout = parameter["timeout"]
del parameter["timeout"]
else:
# 60s default
self.timeout = 60.0
# no HTTP retry
if "retries" in parameter:
self.retries = parameter["retries"]
del parameter["retries"]
else:
self.retries = False
def start(self, path: str, parameter: dict = None, decoder: str = "json"):
"""start HTTP request and decode response using urllib3.
The response is decoded using the selected decoder:
- html: raw HTML response
- json: deep dict contains search results
Parameters:
---
path: str
HTTP endpoint path under serpapi.com/<path>
decoder: str
define how to post process the HTTP response.
for example: json -> convert response to a dict
using the default JSON parser from Python
parameter: dict
search query
Returns:
---
dict|str
decoded HTTP response"""
# track client language
self.parameter["source"] = "serpapi-python:" + __version__
self.parameter["output"] = decoder
# merge parameter defaults and overrides
fields = self.parameter.copy()
fields.update(parameter)
# execute HTTP get request
response = self.http.request("GET",
self.BACKEND + path,
fields=fields,
timeout=self.timeout,
retries=self.retries)
# decode response
return self.decode(response, decoder)
def decode(self, response: any, decoder: str):
"""Decode HTTP response using a given decoder"""
# handle HTTP error
if response.status != 200:
try:
raw = response.data.decode("utf-8")
payload = json.loads(raw)
raise SerpApiException(payload["error"])
except Exception as ex:
raise SerpApiException(raw) from ex
# HTTP success 200
payload = response.data.decode("utf-8")
# successful response decoding
if decoder == "json":
return json.loads(payload)
if decoder == "html":
return payload
raise SerpApiException(f"Invalid decoder: {decoder}, available: json, html")
class Client(HttpClient):
"""
Client performend http query to serpApi.com using urllib3 under the hood.
The HTTP connection be tuned to allow
- retries : attempt to reconnect if the connection fail by default: False
- timeout : connection timeout by default 60s
for more details: https://urllib3.readthedocs.io/en/stable/user-guide.html
"""
def __init__(self, parameter: dict = None):
# define default parameter
if parameter is None:
parameter = {}
# initialize HTTP client
HttpClient.__init__(self, parameter)
def search(self, parameter: dict = None, decoder: str = "json"):
"""
make search then decode the output
decoder supported "json", "html"
Parameters
----------
parameter : dict
search query
decoder : str
set decoder to convert the datastructure received from
Returns
-------
dict|str
search results returns as :
dict if decoder = "json"
str if decoder = "html"
"""
return self.start(path="/search", parameter=parameter, decoder=decoder)
def html(self, parameter: dict = None):
"""
html search
Parameters
----------
parameter : dict
search query see: https://serpapi.com/search-api
Returns
-------
str
raw html search results directly from the search engine
"""
return self.start("/search", parameter, "html")
def location(self, parameter: dict = None):
"""
Get location using Location API
Parameters
----------
parameter : dict
location query like: {q: "Austin", limit: 5}
see: https://serpapi.com/locations-api
Returns
-------
array
list of matching locations
"""
return self.start("/locations.json", parameter, "json")
def search_archive(self, search_id: str, decoder: str = "json"):
"""
Retrieve search results from the Search Archive API
Parameters:
-----
search_id: str
id from a previous client. in the JSON search response it is search_metadata.id
"""
path = "/searches/" + str(search_id) + "."
if decoder in self.SUPPORTED_DECODER:
path += decoder
else:
raise SerpApiException(f"Invalid decoder: {decoder}, available: json, html. ")
return self.start(path, {}, decoder)
def account(self, api_key: str = None):
"""
Get account information using Account API
Parameters
---
api_key: str
secret user key provided by serpapi.com
Returns
---
dict
user account information
"""
if api_key is not None:
self.parameter["api_key"] = api_key
return self.start("/account", self.parameter, "json")