forked from lisa-lab/DeepLearningTutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpy_bing_search.py
More file actions
100 lines (84 loc) · 3.45 KB
/
py_bing_search.py
File metadata and controls
100 lines (84 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import urllib2
import requests
import pdb
import time
class PyBingException(Exception):
pass
class PyBingSearch(object):
# QUERY_URL = 'https://api.datamarket.azure.com/Bing/Search/v1/Composite' \
# + '?Sources={}&Query={}&$top={}&$skip={}&$format={}'
QUERY_URL = 'https://api.datamarket.azure.com/Bing/Search/v1/Image' \
+ '?Query={}&$top={}&$skip={}&$format={}'
# QUERY_URL = 'https://api.datamarket.azure.com/Bing/Search/v1/Web' \
# + '?Query={}&$top={}&$skip={}&$format={}'
# proxies = {
# "http": "http://privateproxy.gq"
# }
def __init__(self, api_key, safe=False):
self.api_key = api_key
self.safe = safe
def search(self, query, limit=50, offset=0, format='json'):
return self._search(query, limit, offset, format)
def search_all(self, query, limit=50, format='json'):
results = self._search(query, limit, 0, format)
while results.total > len(results) and len(results) < limit:
max = limit - len(results)
more_results = self._search(query, max, len(results), format)
results += more_results
print len(results)
return results
def _search(self, query, limit, offset, format):
'''
Returns a list of result objects, with the url for the next page bing search url.
'''
url = self.QUERY_URL.format(urllib2.quote("'{}'".format(query)), limit, offset, format)
#r = requests.get(url, auth=("", self.api_key), proxies=self.proxies)
r = requests.get(url, auth=("", self.api_key))
print url
try:
json_results = r.json()
except ValueError as vE:
if not self.safe:
print ("Request returned with code %s, error msg: %s" % (r.status_code, r.text))
#raise PyBingException("Request returned with code %s, error msg: %s" % (r.status_code, r.text))
return [],r.status_code
else:
print "[ERROR] Request returned with code %s, error msg: %s. \nContinuing in 5 seconds." % (r.status_code, r.text)
time.slee(5)
try:
next_link = json_results['d']['__next']
except KeyError as kE:
print "Couldn't extract next_link: KeyError: %s" % kE
next_link = None
return [Result(single_result_json) for single_result_json in json_results['d']['results']], next_link
class Result(object):
'''
The class represents a SINGLE search result.
Each result will come with the following:
#For the actual results#
title: title of the result
url: the url of the result
description: description for the result
id: bing id for the page
#Meta info#:
meta.uri: the search uri for bing
meta.type: for the most part WebResult
'''
class _Meta(object):
'''
Holds the meta info for the result.
'''
def __init__(self, meta):
self.type = meta['type']
self.uri = meta['uri']
def __init__(self, result):
self.source_url = result['SourceUrl']
self.media_url = result['MediaUrl']
self.title = result['Title']
#self.meta = self._Meta(result['__metadata'])
# def __init__(self, result):
# self.url = result['Url']
# self.title = result['Title']
# self.description = result['Description']
# self.id = result['ID']
# self.meta = self._Meta(result['__metadata'])