6060from bs4 import BeautifulSoup
6161import sys
6262
63+ import termcolor
64+
6365reload (sys )
6466sys .setdefaultencoding ('utf8' )
6567session = None
6668
6769cookies = {}
6870
69-
7071def create_session ():
7172 global session
7273 global cookies
@@ -77,31 +78,88 @@ def create_session():
7778 email = cf .get ("info" , "email" )
7879 password = cf .get ("info" , "password" )
7980 cookies = dict (cookies )
80-
81+ xsrf = None
82+ captcha = None
83+
8184 s = requests .session ()
82- login_data = {"email" : email , "password" : password }
85+ # Fetch XSRF
86+ r = s .get (u"http://www.zhihu.com/" )
87+ if int (r .status_code ) == 200 :
88+ _xsrf = re .compile (r"\<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"" , re .DOTALL ).findall (r .text )
89+ if len (_xsrf ) > 0 :
90+ xsrf = _xsrf [0 ]
91+ else :
92+ print "" .join ( [ termcolor .colored (u"DEBUG" , u"yellow" ), ": " , termcolor .colored (u"XSRF代码提取失败" , u"yellow" ) ] )
93+ else :
94+ raise Exception (u"链接失败!" )
95+
96+ # Login
97+ login_data = {"email" : email , "password" : password , "remember_me" : True , "_xsrf" : xsrf }
98+ print "" .join ( [ termcolor .colored (u"LOGGING" , u"green" ), ": " , termcolor .colored (u"发送登录数据 " + str (login_data ) , u"yellow" ) ] )
99+
83100 header = {
84- 'User-Agent' : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0 " ,
101+ 'User-Agent' : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 " ,
85102 'Host' : "www.zhihu.com" ,
103+ 'Origin' : "http://www.zhihu.com" ,
104+ 'Pragma' : "no-cache" ,
86105 'Referer' : "http://www.zhihu.com/" ,
106+ 'Content-Type' : "application/x-www-form-urlencoded; charset=UTF-8" ,
87107 'X-Requested-With' : "XMLHttpRequest"
88108 }
89109
90110 r = s .post ('http://www.zhihu.com/login/email' , data = login_data , headers = header )
91- if r .json ()["r" ] == 1 :
92- print "Login Failed, reason is:"
93- for m in r .json ()["data" ]:
94- print r .json ()["data" ][m ]
95- print "Use cookies"
96- has_cookies = False
97- for key in cookies :
98- if key != '__name__' and cookies [key ] != '' :
99- has_cookies = True
100- break
101- if has_cookies == False :
102- raise ValueError ("请填写config.ini文件中的cookies项." )
103- session = s
111+ if int (r .status_code ) == 200 and r .headers ['content-type' ].lower () == "application/json" :
112+ result = r .json ()
113+ if result ["r" ] == 1 :
114+ if int (result ['errcode' ]) == 1991829 :
115+ # 需要输入验证码
116+ print "" .join ( [ termcolor .colored (u"LOGGING" , u"green" ), ": " , termcolor .colored (u"正在下载验证码... " , u"yellow" ) ] )
117+ import random
118+ _r = s .get ("http://www.zhihu.com/captcha.gif?r=" + str (random .random ()) )
119+ if int (_r .status_code ) == 200 :
120+ _ext = _r .headers ['content-type' ].split ("/" )[1 ]
121+ open ("verify." + _ext , "wb" ).write (_r .content )
122+ import platform
123+ if platform .uname ()[0 ] == "Linux" :
124+ os .system ("see verify." + _ext + " &" )
125+ else :
126+ # OSX 平台?
127+ os .system ("open verify." + _ext + " &" )
128+ captcha = raw_input (termcolor .colored (u"请输入验证码: " , "cyan" ) )
129+ # 第二次登录,使用验证码
130+ login_data = {"email" : email , "password" : password , "remember_me" : True , "_xsrf" : xsrf , "captcha" : captcha }
131+ print "" .join ( [ termcolor .colored (u"LOGGING" , u"green" ), ": " , termcolor .colored (u"发送登录数据 " + str (login_data ) , u"yellow" ) ] )
132+ r2 = s .post ('http://www.zhihu.com/login/email' , data = login_data , headers = header )
133+ if int (r2 .status_code ) == 200 and r2 .headers ['content-type' ].lower () == "application/json" :
134+ result2 = r2 .json ()
135+ if result2 ["r" ] == 0 :
136+ print "" .join ( [ termcolor .colored (u"INFO" , u"green" ), ": " , termcolor .colored (u"登录成功 " , u"white" , attrs = ['reverse' , 'blink' ]) ] )
137+ else :
138+ print "" .join ( [ termcolor .colored (u"DEBUG" , u"red" ), ": " , termcolor .colored (u"验证码下载失败!" , u"yellow" ) ] )
139+ raise Exception (u"验证码下载失败!" )
140+ else :
141+ # 未知登录错误, 加载 config.ini 读取cookie 信息
142+ print "" .join ( [ termcolor .colored (u"ERROR" , u"red" ), ": " , termcolor .colored (u"Login Failed, reason is: " + str (result ) , u"white" ) ] )
143+ print "" .join ( [ termcolor .colored (u"INFO" , u"yellow" ), ": " , termcolor .colored (u"Use cookies from " , u"white" ), termcolor .colored (u"config.ini" , u"green" )] )
144+ has_cookies = False
145+ for key in cookies :
146+ if key != '__name__' and cookies [key ] != '' :
147+ has_cookies = True
148+ break
149+ if has_cookies == False :
150+ raise ValueError (u"请填写config.ini文件中的cookies项." )
104151
152+ elif int (result ['r' ]) == 0 :
153+ print "" .join ( [ termcolor .colored (u"INFO" , u"green" ), ": " , termcolor .colored (u"登录成功 " , u"white" , attrs = ['reverse' , 'blink' ]) ] )
154+ else :
155+ raise Exception (u"未知错误." )
156+ else :
157+ # HTTP CODE ERROR.
158+ raise Exception (u"登录失败!" )
159+
160+ # The End.
161+ session = s
162+ return True
105163
106164class Question :
107165 url = None
0 commit comments