Skip to content

Commit 7a7430e

Browse files
committed
Merge pull request egrcc#15 from LuoZijun/patch-3
独立出身份模块以及改善zhihu.py中的身份信息读取
2 parents c828f71 + 55a7159 commit 7a7430e

3 files changed

Lines changed: 293 additions & 273 deletions

File tree

auth.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
#!/usr/bin/env python
2+
#-*- coding:utf-8 -*-
3+
4+
# Build-in / Std
5+
import os, sys, time, platform, random
6+
import re, json, cookielib
7+
8+
# requirements
9+
import requests, termcolor
10+
# try:
11+
# from bs4 import BeautifulSoup
12+
# except:
13+
# import BeautifulSoup
14+
# Darwin platform
15+
# BeautifulSoup = BeautifulSoup.BeautifulSoup
16+
17+
requests = requests.Session()
18+
requests.cookies = cookielib.LWPCookieJar('cookies')
19+
try:
20+
requests.cookies.load(ignore_discard=True)
21+
except:
22+
pass
23+
24+
class Logging:
25+
flag = True
26+
27+
@staticmethod
28+
def error(msg):
29+
if Logging.flag == True:
30+
print "".join( [ termcolor.colored("ERROR", "red"), ": ", termcolor.colored(msg, "white") ] )
31+
@staticmethod
32+
def warn(msg):
33+
if Logging.flag == True:
34+
print "".join( [ termcolor.colored("WARN", "yellow"), ": ", termcolor.colored(msg, "white") ] )
35+
@staticmethod
36+
def info(msg):
37+
# attrs=['reverse', 'blink']
38+
if Logging.flag == True:
39+
print "".join( [ termcolor.colored("INFO", "magenta"), ": ", termcolor.colored(msg, "white") ] )
40+
@staticmethod
41+
def debug(msg):
42+
if Logging.flag == True:
43+
print "".join( [ termcolor.colored("DEBUG", "magenta"), ": ", termcolor.colored(msg, "white") ] )
44+
@staticmethod
45+
def success(msg):
46+
if Logging.flag == True:
47+
print "".join( [ termcolor.colored("SUCCES", "green"), ": ", termcolor.colored(msg, "white") ] )
48+
49+
# Setting Logging
50+
Logging.flag = True
51+
52+
class LoginPasswordError(Exception):
53+
def __init__(self, message):
54+
if type(message) != type("") or message == "": self.message = u"帐号密码错误"
55+
else: self.message = message
56+
Logging.error(self.message)
57+
58+
class NetworkError(Exception):
59+
def __init__(self, message):
60+
if type(message) != type("") or message == "": self.message = u"网络异常"
61+
else: self.message = message
62+
Logging.error(self.message)
63+
class AccountError(Exception):
64+
def __init__(self, message):
65+
if type(message) != type("") or message == "": self.message = u"帐号类型错误"
66+
else: self.message = message
67+
Logging.error(self.message)
68+
69+
70+
71+
72+
73+
def download_captcha():
74+
url = "http://www.zhihu.com/captcha.gif"
75+
r = requests.get(url, params={"r": random.random()} )
76+
if int(r.status_code) != 200:
77+
raise NetworkError(u"验证码请求失败")
78+
image_name = u"verify." + r.headers['content-type'].split("/")[1]
79+
open( image_name, "wb").write(r.content)
80+
"""
81+
System platform: https://docs.python.org/2/library/platform.html
82+
"""
83+
if platform.system() == "Linux":
84+
os.system("see %s &" % image_name )
85+
elif platform.system() == "Darwin":
86+
os.system("open %s &" % image_name )
87+
elif platform.system() == "SunOS":
88+
os.system("open %s &" % image_name )
89+
elif platform.system() == "FreeBSD":
90+
os.system("open %s &" % image_name )
91+
elif platform.system() == "Unix":
92+
os.system("open %s &" % image_name )
93+
elif platform.system() == "OpenBSD":
94+
os.system("open %s &" % image_name )
95+
elif platform.system() == "NetBSD":
96+
os.system("open %s &" % image_name )
97+
elif platform.system() == "Windows":
98+
os.system("open %s &" % image_name )
99+
else:
100+
Logging.info(u"我们无法探测你的作业系统,请自行打开验证码 %s 文件,并输入验证码。" % os.path.join(os.getcwd(), image_name) )
101+
102+
captcha_code = raw_input( termcolor.colored("请输入验证码: ", "cyan") )
103+
return captcha_code
104+
105+
def search_xsrf():
106+
url = "http://www.zhihu.com/"
107+
r = requests.get(url)
108+
if int(r.status_code) != 200:
109+
raise NetworkError(u"验证码请求失败")
110+
results = re.compile(r"\<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"", re.DOTALL).findall(r.text)
111+
if len(results) < 1:
112+
Logging.info(u"提取XSRF 代码失败" )
113+
return None
114+
return results[0]
115+
116+
def build_form(account, password):
117+
account_type = "email"
118+
if re.match(r"^\d{11}$", account): account_type = "phone"
119+
elif re.match(r"^\S+\@\S+\.\S+$", account): account_type = "email"
120+
else: raise AccountError(u"帐号类型错误")
121+
122+
form = {account_type: account, "password": password, "remember_me": True }
123+
124+
form['_xsrf'] = search_xsrf()
125+
form['captcha'] = download_captcha()
126+
return form
127+
128+
def upload_form(form):
129+
url = "http://www.zhihu.com/login/email"
130+
headers = {
131+
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
132+
'Host': "www.zhihu.com",
133+
'Origin': "http://www.zhihu.com",
134+
'Pragma': "no-cache",
135+
'Referer': "http://www.zhihu.com/",
136+
'X-Requested-With': "XMLHttpRequest"
137+
}
138+
139+
r = requests.post(url, data=form, headers=headers)
140+
if int(r.status_code) != 200:
141+
raise NetworkError(u"表单上传失败!")
142+
143+
if r.headers['content-type'].lower() == "application/json":
144+
result = r.json()
145+
if result["r"] == 0:
146+
Logging.success(u"登录成功!" )
147+
return {"result": True}
148+
elif result["r"] == 1:
149+
Logging.success(u"登录失败!" )
150+
return {"error": {"code": int(result['errcode']), "message": result['msg'], "data": result['data'] } }
151+
else:
152+
Logging.warn(u"表单上传出现未知错误: \n \t %s )" % ( str(result) ) )
153+
return {"error": {"code": -1, "message": u"unknow error"} }
154+
else:
155+
Logging.warn(u"无法解析服务器的响应内容: \n \t %s " % r.text )
156+
return {"error": {"code": -2, "message": u"parse error"} }
157+
158+
159+
def islogin():
160+
# check session
161+
url = "http://www.zhihu.com/settings/profile"
162+
r = requests.get(url, allow_redirects=False)
163+
status_code = int(r.status_code)
164+
if status_code == 301 or status_code == 302:
165+
# 未登录
166+
return False
167+
elif status_code == 200:
168+
return True
169+
else:
170+
Logging.warn(u"网络故障")
171+
return None
172+
173+
174+
def read_account_from_config_file(config_file="config.ini"):
175+
# NOTE: The ConfigParser module has been renamed to configparser in Python 3.
176+
# The 2to3 tool will automatically adapt imports when converting your sources to Python 3.
177+
# https://docs.python.org/2/library/configparser.html
178+
from ConfigParser import ConfigParser
179+
cf = ConfigParser()
180+
if os.path.exists(config_file) and os.path.isfile(config_file):
181+
Logging.info(u"正在加载配置文件 ...")
182+
cf.read(config_file)
183+
#cookies = cf._sections['cookies']
184+
email = cf.get("info", "email")
185+
password = cf.get("info", "password")
186+
if email == "" or password == "":
187+
Logging.warn(u"帐号信息无效")
188+
return (None, None)
189+
else: return (email, password)
190+
else:
191+
Logging.error(u"配置文件加载失败!")
192+
return (None, None)
193+
194+
195+
196+
197+
def login(account=None, password=None):
198+
if islogin() == True:
199+
Logging.success(u"你已经登录过咯")
200+
201+
if account == None:
202+
(account, password) = read_account_from_config_file()
203+
if account == None:
204+
account = raw_input("请输入登录帐号: ")
205+
password = raw_input("请输入登录密码: ")
206+
207+
208+
form_data = build_form(account, password)
209+
"""
210+
result:
211+
{"result": True}
212+
{"error": {"code": 19855555, "message": "unknow.", "data": "data" } }
213+
{"error": {"code": -1, "message": u"unknow error"} }
214+
"""
215+
result = upload_form(form_data)
216+
if "error" in result:
217+
if result["error"]['code'] == 1991829:
218+
# 验证码错误
219+
Logging.error(u"验证码输入错误,请准备重新输入。" )
220+
return login()
221+
else:
222+
Logging.warn(u"unknow error." )
223+
return False
224+
elif "result" in result and result['result'] == True:
225+
# 登录成功
226+
Logging.success(u"登录成功!" )
227+
requests.cookies.save()
228+
return True
229+
230+
if __name__ == "__main__":
231+
login()

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
requests
22
beautifulsoup4
3-
html2text
3+
html2text
4+
termcolor

0 commit comments

Comments
 (0)