Skip to content

Commit 2b43364

Browse files
committed
topics
1 parent bc7491f commit 2b43364

2 files changed

Lines changed: 69 additions & 0 deletions

File tree

test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def user_test(user_url):
128128
followees = user.get_followees()
129129
# 获取关注该用户的人
130130
followers = user.get_followers()
131+
topics = user.get_topics()
131132
# 获取该用户提的问题
132133
asks = user.get_asks()
133134
# 获取该用户回答的问题的答案
@@ -166,6 +167,9 @@ def user_test(user_url):
166167
if i == 41:
167168
break
168169

170+
for topic in topics:
171+
print topic
172+
169173
print asks
170174
# <generator object get_ask at 0x7ffcab9db780>
171175
# 代表该用户提的所有问题的生成器对象

zhihu.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,22 @@ def get_followers_num(self):
654654
.find_all("a")[1].strong.string)
655655
return followers_num
656656

657+
def get_topics_num(self):
658+
if self.user_url == None:
659+
print "I'm anonymous user."
660+
return 0
661+
else:
662+
if self.soup == None:
663+
self.parser()
664+
soup = self.soup
665+
topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[1].strong.string.encode("utf-8")
666+
I=''
667+
for i in topics_num:
668+
if i.isdigit():
669+
I=I+i
670+
topics_num=int(I)
671+
return topics_num
672+
657673
def get_agree_num(self):
658674
if self.user_url == None:
659675
print "I'm anonymous user."
@@ -812,6 +828,55 @@ def get_followers(self):
812828
user_link = follower_soup.find("h2", class_="zm-list-content-title").a
813829
yield User(user_link["href"], user_link.string.encode("utf-8"))
814830

831+
def get_topics(self):
832+
if self.user_url == None:
833+
print "I'm anonymous user."
834+
return
835+
yield
836+
else:
837+
topics_num = self.get_topics_num()
838+
# print topics_num
839+
if topics_num == 0:
840+
return
841+
yield
842+
else:
843+
topics_url = self.user_url + "/topics"
844+
headers = {
845+
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
846+
'Host': "www.zhihu.com",
847+
'Origin': "http://www.zhihu.com",
848+
'Pragma': "no-cache",
849+
'Referer': "http://www.zhihu.com/"
850+
}
851+
r = requests.get(topics_url, headers=headers, verify=False)
852+
soup = BeautifulSoup(r.content, "lxml")
853+
for i in xrange((topics_num - 1) / 20 + 1):
854+
if i == 0:
855+
topic_list = soup.find_all("div", class_="zm-profile-section-item zg-clear")
856+
for j in xrange(min(topics_num, 20)):
857+
yield topic_list[j].find("strong").string.encode("utf-8")
858+
else:
859+
post_url = topics_url
860+
_xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"]
861+
offset = i * 20
862+
data = {
863+
'_xsrf': _xsrf,
864+
'offset': offset,
865+
'start': 0
866+
}
867+
header = {
868+
'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0",
869+
'Host': "www.zhihu.com",
870+
'Referer': topics_url
871+
}
872+
r_post = requests.post(post_url, data=data, headers=header, verify=False)
873+
874+
topic_data = r_post.json()["msg"][1]
875+
topic_soup = BeautifulSoup(topic_data, "lxml")
876+
topic_list = topic_soup.find_all("div", class_="zm-profile-section-item zg-clear")
877+
for j in xrange(min(topics_num - i * 20, 20)):
878+
yield topic_list[j].find("strong").string.encode("utf-8")
879+
815880
def get_asks(self):
816881
"""
817882
By ecsys (https://github.com/ecsys)

0 commit comments

Comments
 (0)