diff --git a/PROXIES/db.conf b/PROXIES/db.conf new file mode 100644 index 0000000..1066d95 --- /dev/null +++ b/PROXIES/db.conf @@ -0,0 +1,31 @@ +[mysql] + +HOST = 172.20.6.100 +PORT = 3306 +USER = root +PASSWD = mysqladmin +DB = pydb +TABLE = pytab +CHARSET = utf8 + + +[redis] + +HOST = 172.20.6.100 +PORT = 6379 +PASSWD = redisadmin + + +[memcache] + +HOST = 172.20.6.100 +PORT = 11211 + + +[mongodb] + +HOST = 172.20.6.100 +PORT = 27017 +DB = db1 +USER = mongoadmin +PASSWD = mongopwd diff --git a/PROXIES/save_memcache.py b/PROXIES/save_memcache.py new file mode 100644 index 0000000..8edf889 --- /dev/null +++ b/PROXIES/save_memcache.py @@ -0,0 +1,47 @@ +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + +import memcache +import random +import configparser +import spider + +class MemcacheOper: + + def __init__(self): + """ + initialization redis infomation + :param + """ + config = configparser.ConfigParser() + config.read('db.conf') + self.host = config['memcache']['HOST'] + self.port = config['memcache']['PORT'] + self.mcoper = memcache.Client([self.host+':'+self.port], debug = True) + + def memcache_save(self,result_list): + """ + save data + :return:None + """ + for num,cont in enumerate(result_list): + self.mcoper.set(str(num),cont) + + def memcache_gain(self): + """ + gain data + :return: proxies + """ + num = random.randint(0,10) + ip = self.mcoper.get(str(num)) + return ip + +if __name__ == '__main__': + proxyhelper = spider.GetProxyIP(2) + res_pool = proxyhelper.get_ip() + proxy_ip = proxyhelper.right_proxies(res_pool) + dbhelper = MemcacheOper() + dbhelper.memcache_save(proxy_ip) + ip = dbhelper.memcache_gain() + print(ip) \ No newline at end of file diff --git a/PROXIES/save_mongodb.py b/PROXIES/save_mongodb.py new file mode 100644 index 0000000..0f76994 --- /dev/null +++ b/PROXIES/save_mongodb.py @@ -0,0 +1,59 @@ +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + + + +import configparser +import spider +from pymongo import MongoClient + +class MongodbOper: + + def __init__(self): + """ + initialization redis infomation + :param + """ + config = configparser.ConfigParser() + config.read('db.conf') + self.host = config['mongodb']['HOST'] + self.port = config['mongodb']['PORT'] + self.db = config['mongodb']['DB'] + self.user = config['mongodb']['USER'] + self.pwd = config['mongodb']['PASSWD'] + self.client = MongoClient(self.host, int(self.port)) + self.db_auth = self.client.admin + self.db_auth.authenticate(self.user,self.pwd) + self.DB = self.client[self.db] + self.collection = self.DB.myset + + def mongodb_save(self,result_list): + """ + save data + :return:None + """ + + for values in result_list: + self.collection.insert(values) + + def mongodb_gain(self): + """ + gain data + :return: proxies + """ + ip = self.collection.find_one() + return ip + +if __name__ == '__main__': + proxyhelper = spider.GetProxyIP(2) + res_pool = proxyhelper.get_ip() + proxy_ip = proxyhelper.right_proxies(res_pool) + dbhelper = MongodbOper() + dbhelper.mongodb_save(proxy_ip) + ip = dbhelper.mongodb_gain() + print(ip) \ No newline at end of file diff --git a/PROXIES/save_mysql.py b/PROXIES/save_mysql.py new file mode 100644 index 0000000..f7c40c7 --- /dev/null +++ b/PROXIES/save_mysql.py @@ -0,0 +1,64 @@ +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + +import pymysql +import configparser +import spider + + +class MysqlOper: + # initial database information + def __init__(self, result_list): + config = configparser.ConfigParser() + config.read('db.conf') + self.host = config['mysql']['HOST'] + self.port = int(config['mysql']['PORT']) + self.user = config['mysql']['USER'] + self.passwd = config['mysql']['PASSWD'] + self.db = config['mysql']['DB'] + self.table = config['mysql']['TABLE'] + self.charset = config['mysql']['CHARSET'] + self.result_list = result_list + + def mysql_save(self): + + # create db cursor + try: + DB = pymysql.connect(self.host, self.user, self.passwd, self.db, port=self.port, charset=self.charset) + cursor = DB.cursor() + except Exception as e: + print("connect dbserver fail,Please see information:") + print(e) + exit(1) + + # check and create tables + cursor.execute('show tables in pydb') + tables = cursor.fetchall() + flag = True + for tab in tables: + if self.table in tab: + flag = False + print('%s is exist' % self.table) + print(flag) + if flag: + cursor.execute( + '''create table pytab (id int unsigned not null primary key auto_increment, protocol varchar(10),content varchar(50))''') + else: + return 0 + + # write database + for values in self.result_list: + for prot, cont in values.items(): + try: + cursor.execute("insert into pytab (protocol,content) value (%s,%s);", [prot, cont]) + except Exception as e: + print("insert db occer error", e) + + +if __name__ == "__main__": + proxyhelper = spider.GetProxyIP(3) + res_pool = proxyhelper.get_ip() + proxy_ip = proxyhelper.right_proxies(res_pool) + dbhelper = MysqlOper(proxy_ip) + dbhelper.mysql_save() diff --git a/PROXIES/save_redis.py b/PROXIES/save_redis.py new file mode 100644 index 0000000..5c7bb91 --- /dev/null +++ b/PROXIES/save_redis.py @@ -0,0 +1,52 @@ +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + +import redis +import random +import configparser +import spider + +class RedisOper: + + def __init__(self): + """ + initialization redis infomation + :param + """ + config = configparser.ConfigParser() + config.read('db.conf') + self.host = config['redis']['HOST'] + self.port = config['redis']['PORT'] + self.passwd = config['redis']['PASSWD'] + self.pool = redis.ConnectionPool(host=self.host,port=self.port,password=self.passwd) + self.redis_helper = redis.Redis(connection_pool=self.pool) + self.pipe = self.redis_helper.pipeline(transaction=True) + + def redis_save(self,result_list): + """ + save data + :return:None + """ + for num,cont in enumerate(result_list): + self.redis_helper.set(num,cont) + self.pipe.execute() + + def redis_gain(self): + """ + gain data + :return: proxies + """ + num = random.randint(0,10) + ip = self.redis_helper.get(num) + self.pipe.execute() + return ip + +if __name__ == '__main__': + # proxyhelper = spider.GetProxyIP(2) + # res_pool = proxyhelper.get_ip() + # proxy_ip = proxyhelper.right_proxies(res_pool) + dbhelper = RedisOper() + # dbhelper.redis_save(proxy_ip) + ip = dbhelper.redis_gain() + print(ip) \ No newline at end of file diff --git a/PROXIES/spider.py b/PROXIES/spider.py new file mode 100644 index 0000000..9b538c6 --- /dev/null +++ b/PROXIES/spider.py @@ -0,0 +1,61 @@ +#!/bin/env python +# -*- coding:utf-8 -*- +# _author:kaliarch + +import requests +from bs4 import BeautifulSoup +import random + +class GetProxyIP: + + def __init__(self,page=10): + self._page = page + self.url_head = 'http://www.xicidaili.com/wt/' + + def get_ip(self): + """ + get resouce proxy ip pool + :return: res_pool list + """ + headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"} + res_pool = [] + for pagenum in range(1,self._page): + url = self.url_head + str(pagenum) + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, "html.parser") + soup_tr = soup.find_all('tr') + for item in soup_tr: + try: + soup_td = item.find_all('td') + res_pool.append(soup_td[5].text.lower() + '://' + soup_td[1].text + ':' + soup_td[2].text) + except IndexError: + pass + return res_pool + + def right_proxies(self,res_pool): + """ + check available ip + :param res_pool: + :return:right_pool list + """ + right_pool = [] + for ip in res_pool: + if 'https' in ip: + proxies = {'http': ip} + else: + proxies = {"http": ip} + check_urllist = ['http://www.baidu.com', 'http://www.taobao.com', 'https://cloud.tencent.com/'] + try: + response = requests.get(random.choice(check_urllist), proxies=proxies, timeout = 1) + if response.status_code: + right_pool.append(proxies) + print('add ip %s' % proxies) + except Exception as e: + continue + return right_pool + +if __name__ == '__main__': + proxyhelper = GetProxyIP(2) + res_pool = proxyhelper.get_ip() + proxy_ip =proxyhelper.right_proxies(res_pool) + print(proxy_ip) diff --git a/README.md b/README.md index 082ea2b..bac47fe 100644 --- a/README.md +++ b/README.md @@ -1 +1,25 @@ # my-python-code +## 翻译小工具(FANYI) +> Python实现翻译小工具 + +blog:[Python实现翻译小工具](http://blog.51cto.com/kaliarch/2072150) + +## 汽车票查询(XAGLKP) +> python搜索汽车票 + +blog:[python搜索汽车票](http://blog.51cto.com/kaliarch/2071288) + +## 电影爬取(DYTT8) +> 爬取电影并存储到excel + +blog:[爬取搜索出来的电影的下载地址并保存到excel](http://blog.51cto.com/kaliarch/2069544) + +## 爬取推荐博客(51BLOG) +> 爬取推进博客 + +blog:[利用Python搜索51CTO推荐博客并保存至Excel](http://blog.51cto.com/kaliarch/2067103) + +## 构建自己的代理库(PROXIES) +> Python构建自己的代理库 + +blog:[Python构建自己的代理库](http://blog.51cto.com/kaliarch/2083997)