Skip to content

Commit 5a95335

Browse files
author
Luo
committed
修正登录问题,支持验证码输入
1 parent c828f71 commit 5a95335

2 files changed

Lines changed: 77 additions & 18 deletions

File tree

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
requests
22
beautifulsoup4
3-
html2text
3+
html2text
4+
termcolor

zhihu.py

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,14 @@
6060
from bs4 import BeautifulSoup
6161
import sys
6262

63+
import termcolor
64+
6365
reload(sys)
6466
sys.setdefaultencoding('utf8')
6567
session = None
6668

6769
cookies = {}
6870

69-
7071
def create_session():
7172
global session
7273
global cookies
@@ -77,31 +78,88 @@ def create_session():
7778
email = cf.get("info", "email")
7879
password = cf.get("info", "password")
7980
cookies = dict(cookies)
80-
81+
xsrf = None
82+
captcha = None
83+
8184
s = requests.session()
82-
login_data = {"email": email, "password": password}
85+
# Fetch XSRF
86+
r = s.get(u"http://www.zhihu.com/")
87+
if int(r.status_code) == 200:
88+
_xsrf = re.compile(r"\<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"", re.DOTALL).findall(r.text)
89+
if len(_xsrf) > 0:
90+
xsrf = _xsrf[0]
91+
else:
92+
print "".join( [ termcolor.colored(u"DEBUG", u"yellow"), ": ", termcolor.colored(u"XSRF代码提取失败", u"yellow") ] )
93+
else:
94+
raise Exception(u"链接失败!")
95+
96+
# Login
97+
login_data = {"email": email, "password": password, "remember_me": True, "_xsrf": xsrf }
98+
print "".join( [ termcolor.colored(u"LOGGING", u"green"), ": ", termcolor.colored(u"发送登录数据 " + str(login_data) , u"yellow") ] )
99+
83100
header = {
84-
'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
101+
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
85102
'Host': "www.zhihu.com",
103+
'Origin': "http://www.zhihu.com",
104+
'Pragma': "no-cache",
86105
'Referer': "http://www.zhihu.com/",
106+
'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
87107
'X-Requested-With': "XMLHttpRequest"
88108
}
89109

90110
r = s.post('http://www.zhihu.com/login/email', data=login_data, headers=header)
91-
if r.json()["r"] == 1:
92-
print "Login Failed, reason is:"
93-
for m in r.json()["data"]:
94-
print r.json()["data"][m]
95-
print "Use cookies"
96-
has_cookies = False
97-
for key in cookies:
98-
if key != '__name__' and cookies[key] != '':
99-
has_cookies = True
100-
break
101-
if has_cookies == False:
102-
raise ValueError("请填写config.ini文件中的cookies项.")
103-
session = s
111+
if int(r.status_code) == 200 and r.headers['content-type'].lower() == "application/json":
112+
result = r.json()
113+
if result["r"] == 1:
114+
if int(result['errcode']) == 1991829:
115+
# 需要输入验证码
116+
print "".join( [ termcolor.colored(u"LOGGING", u"green"), ": ", termcolor.colored(u"正在下载验证码... " , u"yellow") ] )
117+
import random
118+
_r = s.get("http://www.zhihu.com/captcha.gif?r=" + str(random.random()) )
119+
if int(_r.status_code) == 200:
120+
_ext = _r.headers['content-type'].split("/")[1]
121+
open("verify."+ _ext, "wb").write(_r.content)
122+
import platform
123+
if platform.uname()[0] == "Linux":
124+
os.system("see verify."+_ext + " &")
125+
else:
126+
# OSX 平台?
127+
os.system("open verify."+_ext + " &")
128+
captcha = raw_input(termcolor.colored(u"请输入验证码: ", "cyan") )
129+
# 第二次登录,使用验证码
130+
login_data = {"email": email, "password": password, "remember_me": True, "_xsrf": xsrf, "captcha": captcha }
131+
print "".join( [ termcolor.colored(u"LOGGING", u"green"), ": ", termcolor.colored(u"发送登录数据 " + str(login_data) , u"yellow") ] )
132+
r2 = s.post('http://www.zhihu.com/login/email', data=login_data, headers=header)
133+
if int(r2.status_code) == 200 and r2.headers['content-type'].lower() == "application/json":
134+
result2 = r2.json()
135+
if result2["r"] == 0:
136+
print "".join( [ termcolor.colored(u"INFO", u"green"), ": ", termcolor.colored(u"登录成功 " , u"white", attrs=['reverse', 'blink']) ] )
137+
else:
138+
print "".join( [ termcolor.colored(u"DEBUG", u"red"), ": ", termcolor.colored(u"验证码下载失败!" , u"yellow") ] )
139+
raise Exception(u"验证码下载失败!")
140+
else:
141+
# 未知登录错误, 加载 config.ini 读取cookie 信息
142+
print "".join( [ termcolor.colored(u"ERROR", u"red"), ": ", termcolor.colored(u"Login Failed, reason is: " + str(result) , u"white") ] )
143+
print "".join( [ termcolor.colored(u"INFO", u"yellow"), ": ", termcolor.colored(u"Use cookies from " , u"white"), termcolor.colored(u"config.ini" , u"green")] )
144+
has_cookies = False
145+
for key in cookies:
146+
if key != '__name__' and cookies[key] != '':
147+
has_cookies = True
148+
break
149+
if has_cookies == False:
150+
raise ValueError(u"请填写config.ini文件中的cookies项.")
104151

152+
elif int(result['r']) == 0:
153+
print "".join( [ termcolor.colored(u"INFO", u"green"), ": ", termcolor.colored(u"登录成功 " , u"white", attrs=['reverse', 'blink']) ] )
154+
else:
155+
raise Exception(u"未知错误.")
156+
else:
157+
# HTTP CODE ERROR.
158+
raise Exception(u"登录失败!")
159+
160+
# The End.
161+
session = s
162+
return True
105163

106164
class Question:
107165
url = None

0 commit comments

Comments
 (0)