forked from Alfred1984/interesting-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_data.py
More file actions
43 lines (36 loc) · 1.89 KB
/
get_data.py
File metadata and controls
43 lines (36 loc) · 1.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import requests
import pandas as pd
from pymongo import MongoClient
class DataCrawler(object):
def __init__(self):
self.cities = list(pd.read_csv('city_data.csv')['city'])
client = MongoClient(host='localhost', port=27017)
db = client.Laborday
self.col = db.ticket
def get_city_trip(self):
for city in self.cities:
print('正在爬取城市:{}的数据!'.format(city))
res = requests.get('https://travelsearch.fliggy.com/async/queryItemResult.do?searchType='
'product&keyword={}&category=SCENIC&pagenum=1'.format(city))
data = res.json()
itemPagenum = data['data']['data'].get('itemPagenum')
if itemPagenum is not None:
page_count = itemPagenum['data']['count']
data_list = data['data']['data']['itemProducts']['data']['list'][0]['auctions']
for ticket in data_list:
ticket['city'] = city
self.col.insert_one(ticket)
print('成功爬取城市:{}的第{}页数据!'.format(city, 1))
if page_count > 1:
for page in range(2, page_count+1):
res = requests.get('https://travelsearch.fliggy.com/async/queryItemResult.do?searchType='
'product&keyword={}&category=SCENIC&pagenum={}'.format(city, page))
data = res.json()
data_list = data['data']['data']['itemProducts']['data']['list'][0]['auctions']
for ticket in data_list:
ticket['city'] = city
self.col.insert_one(ticket)
print('成功爬取城市:{}的第{}页数据!'.format(city, page))
if __name__ == '__main__':
data_crawler = DataCrawler()
data_crawler.get_city_trip()