@@ -18,9 +18,12 @@ class Question:
1818
1919 def __init__ (self , url , title = None ):
2020
21- self .url = url
22- if title != None :
23- self .title = title
21+ if url [0 :len (url ) - 8 ] != "http://www.zhihu.com/question/" :
22+ raise ValueError ("\" " + url + "\" " + " : it isn't a question url." )
23+ else :
24+ self .url = url
25+ if title != None :
26+ self .title = title
2427
2528 def create_session (self ):
2629 cf = ConfigParser .ConfigParser ()
@@ -84,45 +87,45 @@ def get_topics(self):
8487 topics .append (i .contents [0 ].encode ("utf-8" ).replace ("\n " , "" ))
8588 return topics
8689
87- def get_top_answer (self ):
88-
89- if self .get_answers_num () == 0 :
90- print "No answer."
91- return
92- else :
93- if self .soup == None :
94- self .parser ()
95- soup = BeautifulSoup (self .soup .encode ("utf-8" ))
96- author = None
97- if soup .find ("h3" , class_ = "zm-item-answer-author-wrap" ) == u"匿名用户" :
98- author_url = None
99- author = User (author_url )
100- else :
101- author_tag = soup .find ("h3" , class_ = "zm-item-answer-author-wrap" ).find_all ("a" )[1 ]
102- author_id = author_tag .string .encode ("utf-8" )
103- author_url = "http://www.zhihu.com" + author_tag ["href" ]
104- author = User (author_url , author_id )
105-
106- count = soup .find ("span" , class_ = "count" ).string
107- if count [- 1 ] == "K" :
108- upvote = int (count [0 :(len (count ) - 1 )]) * 1000
109- elif count [- 1 ] == "W" :
110- upvote = int (count [0 :(len (count ) - 1 )]) * 10000
111- else :
112- upvote = int (count )
113-
114- answer_url = "http://www.zhihu.com" + soup .find ("a" , class_ = "answer-date-link" )["href" ]
90+ # def get_top_answer(self):
11591
116- top_answer = soup .find ("div" , class_ = " zm-editable-content clearfix" )
117- soup .body .extract ()
118- soup .head .insert_after (soup .new_tag ("body" , ** {'class' :'zhi' }))
119- soup .body .append (top_answer )
120- img_list = soup .find_all ("img" , class_ = "content_image lazy" )
121- for img in img_list :
122- img ["src" ] = img ["data-actualsrc" ]
123- content = soup
124- answer = Answer (answer_url , self , author , upvote , content )
125- return answer
92+ # if self.get_answers_num() == 0:
93+ # print "No answer."
94+ # return
95+ # else:
96+ # if self.soup == None:
97+ # self.parser()
98+ # soup = BeautifulSoup(self.soup.encode("utf-8"))
99+ # author = None
100+ # if soup.find("h3", class_ = "zm-item-answer-author-wrap") == u"匿名用户":
101+ # author_url = None
102+ # author = User(author_url)
103+ # else:
104+ # author_tag = soup.find("h3", class_ = "zm-item-answer-author-wrap").find_all("a")[1]
105+ # author_id = author_tag.string.encode("utf-8")
106+ # author_url = "http://www.zhihu.com" + author_tag["href"]
107+ # author = User(author_url, author_id)
108+
109+ # count = soup.find("span", class_ = "count").string
110+ # if count[-1] == "K":
111+ # upvote = int(count[0:(len(count) - 1)]) * 1000
112+ # elif count[-1] == "W":
113+ # upvote = int(count[0:(len(count) - 1)]) * 10000
114+ # else:
115+ # upvote = int(count)
116+
117+ # answer_url = "http://www.zhihu.com" + soup.find("a", class_ = "answer-date-link")["href"]
118+
119+ # top_answer = soup.find("div", class_ = " zm-editable-content clearfix")
120+ # soup.body.extract()
121+ # soup.head.insert_after(soup.new_tag("body", **{'class':'zhi'}))
122+ # soup.body.append(top_answer)
123+ # img_list = soup.find_all("img", class_ = "content_image lazy")
124+ # for img in img_list:
125+ # img["src"] = img["data-actualsrc"]
126+ # content = soup
127+ # answer = Answer(answer_url, self, author, upvote, content)
128+ # return answer
126129
127130 def get_all_answers (self ):
128131 if self .get_answers_num () == 0 :
@@ -222,6 +225,21 @@ def get_all_answers(self):
222225 answer = Answer (answer_url , self , author , upvote , content )
223226 yield answer
224227
228+ def get_top_i_answers (self , i ):
229+ # if i > self.get_answers_num():
230+ # i = self.get_answers_num()
231+ j = 0
232+ answers = self .get_all_answers ()
233+ for answer in answers :
234+ j = j + 1
235+ if j > i :
236+ break
237+ yield answer
238+
239+ def get_top_answer (self ):
240+ for answer in self .get_top_i_answers (1 ):
241+ return answer
242+
225243
226244class User :
227245
@@ -232,6 +250,8 @@ class User:
232250 def __init__ (self , user_url , user_id = None ):
233251 if user_url == None :
234252 self .user_id = "匿名用户"
253+ elif user_url [0 :28 ] != "http://www.zhihu.com/people/" :
254+ raise ValueError ("\" " + user_url + "\" " + " : it isn't a user url." )
235255 else :
236256 self .user_url = user_url
237257 if user_id != None :
@@ -257,7 +277,7 @@ def parser(self):
257277
258278 def get_user_id (self ):
259279 if self .user_url == None :
260- print "I'm anonymous user."
280+ # print "I'm anonymous user."
261281 return "匿名用户"
262282 else :
263283 if hasattr (self , "user_id" ):
@@ -627,6 +647,7 @@ def to_txt(self):
627647 if not os .path .isdir (os .path .join (os .path .join (os .getcwd (), "text" ))):
628648 os .makedirs (os .path .join (os .path .join (os .getcwd (), "text" )))
629649 file_name = self .get_question ().get_title () + "--" + self .get_author ().get_user_id () + "的回答.txt"
650+ print file_name
630651 if os .path .exists (os .path .join (os .path .join (os .getcwd (), "text" ), file_name )):
631652 f = open (os .path .join (os .path .join (os .getcwd (), "text" ), file_name ), "a" )
632653 f .write ("\n \n " )
@@ -637,6 +658,7 @@ def to_txt(self):
637658 if not os .path .isdir (os .path .join (os .path .join (os .getcwd (), "text" ))):
638659 os .makedirs (os .path .join (os .path .join (os .getcwd (), "text" )))
639660 file_name = self .get_question ().get_title () + "--" + self .get_author ().get_user_id () + "的回答.txt"
661+ print file_name
640662 f = open (os .path .join (os .path .join (os .getcwd (), "text" ), file_name ), "wt" )
641663 f .write (self .get_question ().get_title () + "\n \n " )
642664 f .write ("作者: " + self .get_author ().get_user_id () + " 赞同: " + str (self .get_upvote ()) + "\n \n " )
@@ -657,6 +679,7 @@ def to_md(self):
657679 content = self .get_content ()
658680 if self .get_author ().get_user_id () == "匿名用户" :
659681 file_name = self .get_question ().get_title () + "--" + self .get_author ().get_user_id () + "的回答.md"
682+ print file_name
660683 if not os .path .isdir (os .path .join (os .path .join (os .getcwd (), "markdown" ))):
661684 os .makedirs (os .path .join (os .path .join (os .getcwd (), "markdown" )))
662685 if os .path .exists (os .path .join (os .path .join (os .getcwd (), "markdown" ), file_name )):
@@ -669,6 +692,7 @@ def to_md(self):
669692 if not os .path .isdir (os .path .join (os .path .join (os .getcwd (), "markdown" ))):
670693 os .makedirs (os .path .join (os .path .join (os .getcwd (), "markdown" )))
671694 file_name = self .get_question ().get_title () + "--" + self .get_author ().get_user_id () + "的回答.md"
695+ print file_name
672696 f = open (os .path .join (os .path .join (os .getcwd (), "markdown" ), file_name ), "wt" )
673697 f .write ("# " + self .get_question ().get_title () + "\n " )
674698 f .write ("## 作者: " + self .get_author ().get_user_id () + " 赞同: " + str (self .get_upvote ()) + "\n " )
@@ -684,12 +708,16 @@ class Collection:
684708 session = None
685709 soup = None
686710
687- def __init__ (self , url , name = None , creator = None ):
688- self .url = url
689- if name != None :
690- self .name = name
691- if creator != None :
692- self .creator = creator
711+ def __init__ (self , url , name = None , creator = None ):
712+
713+ if url [0 :len (url ) - 8 ] != "http://www.zhihu.com/collection/" :
714+ raise ValueError ("\" " + url + "\" " + " : it isn't a collection url." )
715+ else :
716+ self .url = url
717+ if name != None :
718+ self .name = name
719+ if creator != None :
720+ self .creator = creator
693721
694722 def create_session (self ):
695723 cf = ConfigParser .ConfigParser ()
@@ -790,4 +818,11 @@ def get_all_answers(self):
790818 yield Answer (answer_url , question , author )
791819 i = i + 1
792820
793-
821+ def get_top_i_answers (self , i ):
822+ j = 0
823+ answers = self .get_all_answers ()
824+ for answer in answers :
825+ j = j + 1
826+ if j > i :
827+ break
828+ yield answer
0 commit comments