Skip to content

Commit 1a72049

Browse files
committed
Merge pull request egrcc#29 from ecsys/master
增加了获取某用户所有赞过答案的功能
2 parents aa678e6 + 0335af4 commit 1a72049

1 file changed

Lines changed: 56 additions & 0 deletions

File tree

zhihu.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,62 @@ def get_collections(self):
599599
yield Collection(url, name, self)
600600

601601

602+
def get_likes(self):
603+
# This function only handles liked answers, not including zhuanlan articles
604+
if self.user_url == None:
605+
print "I'm an anonymous user."
606+
return
607+
yield
608+
else:
609+
r = requests.get(self.user_url)
610+
soup = BeautifulSoup(r.content)
611+
# Handle the first liked item
612+
first_item = soup.find("div", attrs={'class':'zm-profile-section-item zm-item clearfix'})
613+
first_item = first_item.find("div", attrs={'class':'zm-profile-section-main zm-profile-section-activity-main zm-profile-activity-page-item-main'})
614+
if u"赞同了回答" in str(first_item):
615+
first_like = first_item.find("a")['href']
616+
yield Answer("http://www.zhihu.com" + first_like)
617+
# Handle the rest liked items
618+
post_url = self.user_url + "/activities"
619+
start_time = soup.find("div", attrs={'class':'zm-profile-section-item zm-item clearfix'})["data-time"]
620+
_xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"]
621+
data = {
622+
'start': start_time,
623+
'_xsrf': _xsrf,
624+
}
625+
header = {
626+
'Host': "www.zhihu.com",
627+
'Referer': self.user_url,
628+
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
629+
}
630+
r = requests.post(post_url, data=data, headers=header)
631+
response_size = r.json()["msg"][0]
632+
response_html = r.json()["msg"][1]
633+
while response_size > 0:
634+
all_liked_answers = re.findall(u"\u8d5e\u540c\u4e86\u56de\u7b54\n\n<a class=\"question_link\" target=\"_blank\" href=\"\/question\/\d{8}\/answer\/\d{8}", response_html)
635+
liked_answers = list(set(all_liked_answers))
636+
liked_answers.sort(key=all_liked_answers.index)
637+
for i in xrange(len(liked_answers)):
638+
answer_url = "http://www.zhihu.com" + liked_answers[i][54:]
639+
yield Answer(answer_url)
640+
data_times = re.findall(r"data-time=\"\d+\"", response_html)
641+
if len(data_times) != response_size:
642+
print "读取activities栏时间信息时发生错误,可能因为某答案中包含data-time信息"
643+
return
644+
yield
645+
latest_data_time = re.search(r"\d+", data_times[response_size - 1]).group()
646+
data = {
647+
'start': latest_data_time,
648+
'_xsrf': _xsrf,
649+
}
650+
r = requests.post(post_url, data=data, headers=header)
651+
response_size = r.json()["msg"][0]
652+
response_html = r.json()["msg"][1]
653+
return
654+
yield
655+
656+
657+
602658
class Answer:
603659
answer_url = None
604660
# session = None

0 commit comments

Comments
 (0)