Skip to content

Commit fdf4a96

Browse files
committed
change README and add file
1 parent 59fc6d8 commit fdf4a96

7 files changed

Lines changed: 334 additions & 0 deletions

File tree

PROXIES/db.conf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[mysql]
2+
3+
HOST = 172.20.6.100
4+
PORT = 3306
5+
USER = root
6+
PASSWD = mysqladmin
7+
DB = pydb
8+
TABLE = pytab
9+
CHARSET = utf8
10+
11+
12+
[redis]
13+
14+
HOST = 172.20.6.100
15+
PORT = 6379
16+
PASSWD = redisadmin
17+
18+
19+
[memcache]
20+
21+
HOST = 172.20.6.100
22+
PORT = 11211
23+
24+
25+
[mongodb]
26+
27+
HOST = 172.20.6.100
28+
PORT = 27017
29+
DB = db1
30+
USER = mongoadmin
31+
PASSWD = mongopwd

PROXIES/save_memcache.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# _author:kaliarch
4+
5+
import memcache
6+
import random
7+
import configparser
8+
import spider
9+
10+
class MemcacheOper:
11+
12+
def __init__(self):
13+
"""
14+
initialization redis infomation
15+
:param
16+
"""
17+
config = configparser.ConfigParser()
18+
config.read('db.conf')
19+
self.host = config['memcache']['HOST']
20+
self.port = config['memcache']['PORT']
21+
self.mcoper = memcache.Client([self.host+':'+self.port], debug = True)
22+
23+
def memcache_save(self,result_list):
24+
"""
25+
save data
26+
:return:None
27+
"""
28+
for num,cont in enumerate(result_list):
29+
self.mcoper.set(str(num),cont)
30+
31+
def memcache_gain(self):
32+
"""
33+
gain data
34+
:return: proxies
35+
"""
36+
num = random.randint(0,10)
37+
ip = self.mcoper.get(str(num))
38+
return ip
39+
40+
if __name__ == '__main__':
41+
proxyhelper = spider.GetProxyIP(2)
42+
res_pool = proxyhelper.get_ip()
43+
proxy_ip = proxyhelper.right_proxies(res_pool)
44+
dbhelper = MemcacheOper()
45+
dbhelper.memcache_save(proxy_ip)
46+
ip = dbhelper.memcache_gain()
47+
print(ip)

PROXIES/save_mongodb.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# _author:kaliarch
4+
5+
#!/bin/env python
6+
# -*- coding:utf-8 -*-
7+
# _author:kaliarch
8+
9+
10+
11+
import configparser
12+
import spider
13+
from pymongo import MongoClient
14+
15+
class MongodbOper:
16+
17+
def __init__(self):
18+
"""
19+
initialization redis infomation
20+
:param
21+
"""
22+
config = configparser.ConfigParser()
23+
config.read('db.conf')
24+
self.host = config['mongodb']['HOST']
25+
self.port = config['mongodb']['PORT']
26+
self.db = config['mongodb']['DB']
27+
self.user = config['mongodb']['USER']
28+
self.pwd = config['mongodb']['PASSWD']
29+
self.client = MongoClient(self.host, int(self.port))
30+
self.db_auth = self.client.admin
31+
self.db_auth.authenticate(self.user,self.pwd)
32+
self.DB = self.client[self.db]
33+
self.collection = self.DB.myset
34+
35+
def mongodb_save(self,result_list):
36+
"""
37+
save data
38+
:return:None
39+
"""
40+
41+
for values in result_list:
42+
self.collection.insert(values)
43+
44+
def mongodb_gain(self):
45+
"""
46+
gain data
47+
:return: proxies
48+
"""
49+
ip = self.collection.find_one()
50+
return ip
51+
52+
if __name__ == '__main__':
53+
proxyhelper = spider.GetProxyIP(2)
54+
res_pool = proxyhelper.get_ip()
55+
proxy_ip = proxyhelper.right_proxies(res_pool)
56+
dbhelper = MongodbOper()
57+
dbhelper.mongodb_save(proxy_ip)
58+
ip = dbhelper.mongodb_gain()
59+
print(ip)

PROXIES/save_mysql.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# _author:kaliarch
4+
5+
import pymysql
6+
import configparser
7+
import spider
8+
9+
10+
class MysqlOper:
11+
# initial database information
12+
def __init__(self, result_list):
13+
config = configparser.ConfigParser()
14+
config.read('db.conf')
15+
self.host = config['mysql']['HOST']
16+
self.port = int(config['mysql']['PORT'])
17+
self.user = config['mysql']['USER']
18+
self.passwd = config['mysql']['PASSWD']
19+
self.db = config['mysql']['DB']
20+
self.table = config['mysql']['TABLE']
21+
self.charset = config['mysql']['CHARSET']
22+
self.result_list = result_list
23+
24+
def mysql_save(self):
25+
26+
# create db cursor
27+
try:
28+
DB = pymysql.connect(self.host, self.user, self.passwd, self.db, port=self.port, charset=self.charset)
29+
cursor = DB.cursor()
30+
except Exception as e:
31+
print("connect dbserver fail,Please see information:")
32+
print(e)
33+
exit(1)
34+
35+
# check and create tables
36+
cursor.execute('show tables in pydb')
37+
tables = cursor.fetchall()
38+
flag = True
39+
for tab in tables:
40+
if self.table in tab:
41+
flag = False
42+
print('%s is exist' % self.table)
43+
print(flag)
44+
if flag:
45+
cursor.execute(
46+
'''create table pytab (id int unsigned not null primary key auto_increment, protocol varchar(10),content varchar(50))''')
47+
else:
48+
return 0
49+
50+
# write database
51+
for values in self.result_list:
52+
for prot, cont in values.items():
53+
try:
54+
cursor.execute("insert into pytab (protocol,content) value (%s,%s);", [prot, cont])
55+
except Exception as e:
56+
print("insert db occer error", e)
57+
58+
59+
if __name__ == "__main__":
60+
proxyhelper = spider.GetProxyIP(3)
61+
res_pool = proxyhelper.get_ip()
62+
proxy_ip = proxyhelper.right_proxies(res_pool)
63+
dbhelper = MysqlOper(proxy_ip)
64+
dbhelper.mysql_save()

PROXIES/save_redis.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# _author:kaliarch
4+
5+
import redis
6+
import random
7+
import configparser
8+
import spider
9+
10+
class RedisOper:
11+
12+
def __init__(self):
13+
"""
14+
initialization redis infomation
15+
:param
16+
"""
17+
config = configparser.ConfigParser()
18+
config.read('db.conf')
19+
self.host = config['redis']['HOST']
20+
self.port = config['redis']['PORT']
21+
self.passwd = config['redis']['PASSWD']
22+
self.pool = redis.ConnectionPool(host=self.host,port=self.port,password=self.passwd)
23+
self.redis_helper = redis.Redis(connection_pool=self.pool)
24+
self.pipe = self.redis_helper.pipeline(transaction=True)
25+
26+
def redis_save(self,result_list):
27+
"""
28+
save data
29+
:return:None
30+
"""
31+
for num,cont in enumerate(result_list):
32+
self.redis_helper.set(num,cont)
33+
self.pipe.execute()
34+
35+
def redis_gain(self):
36+
"""
37+
gain data
38+
:return: proxies
39+
"""
40+
num = random.randint(0,10)
41+
ip = self.redis_helper.get(num)
42+
self.pipe.execute()
43+
return ip
44+
45+
if __name__ == '__main__':
46+
# proxyhelper = spider.GetProxyIP(2)
47+
# res_pool = proxyhelper.get_ip()
48+
# proxy_ip = proxyhelper.right_proxies(res_pool)
49+
dbhelper = RedisOper()
50+
# dbhelper.redis_save(proxy_ip)
51+
ip = dbhelper.redis_gain()
52+
print(ip)

PROXIES/spider.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/bin/env python
2+
# -*- coding:utf-8 -*-
3+
# _author:kaliarch
4+
5+
import requests
6+
from bs4 import BeautifulSoup
7+
import random
8+
9+
class GetProxyIP:
10+
11+
def __init__(self,page=10):
12+
self._page = page
13+
self.url_head = 'http://www.xicidaili.com/wt/'
14+
15+
def get_ip(self):
16+
"""
17+
get resouce proxy ip pool
18+
:return: res_pool list
19+
"""
20+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}
21+
res_pool = []
22+
for pagenum in range(1,self._page):
23+
url = self.url_head + str(pagenum)
24+
response = requests.get(url, headers=headers)
25+
soup = BeautifulSoup(response.text, "html.parser")
26+
soup_tr = soup.find_all('tr')
27+
for item in soup_tr:
28+
try:
29+
soup_td = item.find_all('td')
30+
res_pool.append(soup_td[5].text.lower() + '://' + soup_td[1].text + ':' + soup_td[2].text)
31+
except IndexError:
32+
pass
33+
return res_pool
34+
35+
def right_proxies(self,res_pool):
36+
"""
37+
check available ip
38+
:param res_pool:
39+
:return:right_pool list
40+
"""
41+
right_pool = []
42+
for ip in res_pool:
43+
if 'https' in ip:
44+
proxies = {'http': ip}
45+
else:
46+
proxies = {"http": ip}
47+
check_urllist = ['http://www.baidu.com', 'http://www.taobao.com', 'https://cloud.tencent.com/']
48+
try:
49+
response = requests.get(random.choice(check_urllist), proxies=proxies, timeout = 1)
50+
if response.status_code:
51+
right_pool.append(proxies)
52+
print('add ip %s' % proxies)
53+
except Exception as e:
54+
continue
55+
return right_pool
56+
57+
if __name__ == '__main__':
58+
proxyhelper = GetProxyIP(2)
59+
res_pool = proxyhelper.get_ip()
60+
proxy_ip =proxyhelper.right_proxies(res_pool)
61+
print(proxy_ip)

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,21 @@
11
# my-python-code
2+
## 翻译小工具
3+
> Python实现翻译小工具
4+
5+
blog:[Python实现翻译小工具](http://blog.51cto.com/kaliarch/2072150)
6+
7+
## 汽车票查询
8+
> python搜索汽车票
9+
10+
blog:[python搜索汽车票](http://blog.51cto.com/kaliarch/2071288)
11+
12+
## 电影爬取
13+
> 爬取电影并存储到excel
14+
15+
blog:[爬取搜索出来的电影的下载地址并保存到excel](http://blog.51cto.com/kaliarch/2069544)
16+
17+
## 爬取推荐博客
18+
> 爬取推进博客
19+
20+
blog:[利用Python搜索51CTO推荐博客并保存至Excel](http://blog.51cto.com/kaliarch/2067103)
21+

0 commit comments

Comments
 (0)