@@ -654,6 +654,22 @@ def get_followers_num(self):
654654 .find_all ("a" )[1 ].strong .string )
655655 return followers_num
656656
657+ def get_topics_num (self ):
658+ if self .user_url == None :
659+ print "I'm anonymous user."
660+ return 0
661+ else :
662+ if self .soup == None :
663+ self .parser ()
664+ soup = self .soup
665+ topics_num = soup .find_all ("div" , class_ = "zm-profile-side-section-title" )[1 ].strong .string .encode ("utf-8" )
666+ I = ''
667+ for i in topics_num :
668+ if i .isdigit ():
669+ I = I + i
670+ topics_num = int (I )
671+ return topics_num
672+
657673 def get_agree_num (self ):
658674 if self .user_url == None :
659675 print "I'm anonymous user."
@@ -812,6 +828,55 @@ def get_followers(self):
812828 user_link = follower_soup .find ("h2" , class_ = "zm-list-content-title" ).a
813829 yield User (user_link ["href" ], user_link .string .encode ("utf-8" ))
814830
831+ def get_topics (self ):
832+ if self .user_url == None :
833+ print "I'm anonymous user."
834+ return
835+ yield
836+ else :
837+ topics_num = self .get_topics_num ()
838+ # print topics_num
839+ if topics_num == 0 :
840+ return
841+ yield
842+ else :
843+ topics_url = self .user_url + "/topics"
844+ headers = {
845+ 'User-Agent' : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36" ,
846+ 'Host' : "www.zhihu.com" ,
847+ 'Origin' : "http://www.zhihu.com" ,
848+ 'Pragma' : "no-cache" ,
849+ 'Referer' : "http://www.zhihu.com/"
850+ }
851+ r = requests .get (topics_url , headers = headers , verify = False )
852+ soup = BeautifulSoup (r .content , "lxml" )
853+ for i in xrange ((topics_num - 1 ) / 20 + 1 ):
854+ if i == 0 :
855+ topic_list = soup .find_all ("div" , class_ = "zm-profile-section-item zg-clear" )
856+ for j in xrange (min (topics_num , 20 )):
857+ yield topic_list [j ].find ("strong" ).string .encode ("utf-8" )
858+ else :
859+ post_url = topics_url
860+ _xsrf = soup .find ("input" , attrs = {'name' : '_xsrf' })["value" ]
861+ offset = i * 20
862+ data = {
863+ '_xsrf' : _xsrf ,
864+ 'offset' : offset ,
865+ 'start' : 0
866+ }
867+ header = {
868+ 'User-Agent' : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0" ,
869+ 'Host' : "www.zhihu.com" ,
870+ 'Referer' : topics_url
871+ }
872+ r_post = requests .post (post_url , data = data , headers = header , verify = False )
873+
874+ topic_data = r_post .json ()["msg" ][1 ]
875+ topic_soup = BeautifulSoup (topic_data , "lxml" )
876+ topic_list = topic_soup .find_all ("div" , class_ = "zm-profile-section-item zg-clear" )
877+ for j in xrange (min (topics_num - i * 20 , 20 )):
878+ yield topic_list [j ].find ("strong" ).string .encode ("utf-8" )
879+
815880 def get_asks (self ):
816881 """
817882 By ecsys (https://github.com/ecsys)
0 commit comments