Skip to content

Commit 7c07235

Browse files
committed
Add new feature Post and Column for zhihu
1 parent e283907 commit 7c07235

1 file changed

Lines changed: 168 additions & 0 deletions

File tree

zhihu.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,174 @@
9292
reload(sys)
9393
sys.setdefaultencoding('utf8')
9494

95+
class Post:
96+
url = None
97+
meta = None
98+
slug = None
99+
100+
def __init__(self, url):
101+
102+
if not re.compile(r"(http|https)://zhuanlan.zhihu.com/p/\d{8}").match(url):
103+
raise ValueError("\"" + url + "\"" + " : it isn't a question url.")
104+
else:
105+
self.url = url
106+
self.slug = re.compile(r"(http|https)://zhuanlan.zhihu.com/p/(\d{8})").match(url).group(2)
107+
108+
def parser(self):
109+
r = requests.get('https://zhuanlan.zhihu.com/api/posts/' + self.slug)
110+
self.meta = r.json()
111+
112+
def get_title(self):
113+
if hasattr(self, "title"):
114+
if platform.system() == 'Windows':
115+
title = self.title.decode('utf-8').encode('gbk')
116+
return title
117+
else:
118+
return self.title
119+
else:
120+
if self.meta == None:
121+
self.parser()
122+
meta = self.meta
123+
title = meta['title']
124+
self.title = title
125+
if platform.system() == 'Windows':
126+
title = title.decode('utf-8').encode('gbk')
127+
return title
128+
else:
129+
return title
130+
131+
def get_content(self):
132+
if self.meta == None:
133+
self.parser()
134+
meta = self.meta
135+
content = meta['content']
136+
if platform.system() == 'Windows':
137+
content = content.decode('utf-8').encode('gbk')
138+
return content
139+
else:
140+
return content
141+
142+
def get_author(self):
143+
if hasattr(self, "author"):
144+
return self.author
145+
else:
146+
if self.meta == None:
147+
self.parser()
148+
meta = self.meta
149+
author_tag = meta['author']
150+
author = User(author_tag['profileUrl'],author_tag['slug'])
151+
return author
152+
153+
def get_column(self):
154+
if self.meta == None:
155+
self.parser()
156+
meta = self.meta
157+
column_url = 'https://zhuanlan.zhihu.com/' + meta['column']['slug']
158+
return Column(column_url, meta['column']['slug'])
159+
160+
def get_likes(self):
161+
if self.meta == None:
162+
self.parser()
163+
meta = self.meta
164+
return int(meta["likesCount"])
165+
166+
def get_topics(self):
167+
if self.meta == None:
168+
self.parser()
169+
meta = self.meta
170+
for topic in meta['topics']:
171+
yield topic
172+
173+
class Column:
174+
url = None
175+
meta = None
176+
177+
def __init__(self, url, slug=None):
178+
179+
if not re.compile(r"(http|https)://zhuanlan.zhihu.com/([0-9a-zA-Z]+)").match(url):
180+
raise ValueError("\"" + url + "\"" + " : it isn't a question url.")
181+
else:
182+
self.url = url
183+
if slug == None:
184+
self.slug = re.compile(r"(http|https)://zhuanlan.zhihu.com/([0-9a-zA-Z]+)").match(url).group(2)
185+
else:
186+
self.slug = slug
187+
188+
def parser(self):
189+
r = requests.get('https://zhuanlan.zhihu.com/api/columns/' + self.slug)
190+
self.meta = r.json()
191+
192+
def get_title(self):
193+
if hasattr(self,"title"):
194+
if platform.system() == 'Windows':
195+
title = self.title.decode('utf-8').encode('gbk')
196+
return title
197+
else:
198+
return self.title
199+
else:
200+
if self.meta == None:
201+
self.parser()
202+
meta = self.meta
203+
title = meta['name']
204+
self.title = title
205+
if platform.system() == 'Windows':
206+
title = title.decode('utf-8').encode('gbk')
207+
return title
208+
else:
209+
return title
210+
211+
def get_description(self):
212+
if self.meta == None:
213+
self.parser()
214+
meta = self.meta
215+
description = meta['description']
216+
if platform.system() == 'Windows':
217+
description = description.decode('utf-8').encode('gbk')
218+
return description
219+
else:
220+
return description
221+
222+
def get_followers_num(self):
223+
if self.meta == None:
224+
self.parser()
225+
meta = self.meta
226+
followers_num = int(meta['followersCount'])
227+
return followers_num
228+
229+
def get_posts_num(self):
230+
if self.meta == None:
231+
self.parser()
232+
meta = self.meta
233+
posts_num = int(meta['postsCount'])
234+
return posts_num
235+
236+
def get_creator(self):
237+
if hasattr(self, "creator"):
238+
return self.creator
239+
else:
240+
if self.meta == None:
241+
self.parser()
242+
meta = self.meta
243+
creator_tag = meta['creator']
244+
creator = User(creator_tag['profileUrl'],creator_tag['slug'])
245+
return creator
246+
247+
def get_all_posts(self):
248+
posts_num = self.get_posts_num()
249+
if posts_num == 0:
250+
print "No posts."
251+
return
252+
yield
253+
else:
254+
for i in xrange((posts_num - 1) / 20 + 1):
255+
parm = {'limit': 20, 'offset': 20*i}
256+
url = 'https://zhuanlan.zhihu.com/api/columns/' + self.slug + '/posts'
257+
r = requests.get(url, params=parm)
258+
posts_list = r.json()
259+
for p in posts_list:
260+
post_url = 'https://zhuanlan.zhihu.com/p/' + str(p['slug'])
261+
yield Post(post_url)
262+
95263
class Question:
96264
url = None
97265
soup = None

0 commit comments

Comments
 (0)