|
92 | 92 | reload(sys) |
93 | 93 | sys.setdefaultencoding('utf8') |
94 | 94 |
|
| 95 | +class Post: |
| 96 | + url = None |
| 97 | + meta = None |
| 98 | + slug = None |
| 99 | + |
| 100 | + def __init__(self, url): |
| 101 | + |
| 102 | + if not re.compile(r"(http|https)://zhuanlan.zhihu.com/p/\d{8}").match(url): |
| 103 | + raise ValueError("\"" + url + "\"" + " : it isn't a question url.") |
| 104 | + else: |
| 105 | + self.url = url |
| 106 | + self.slug = re.compile(r"(http|https)://zhuanlan.zhihu.com/p/(\d{8})").match(url).group(2) |
| 107 | + |
| 108 | + def parser(self): |
| 109 | + r = requests.get('https://zhuanlan.zhihu.com/api/posts/' + self.slug) |
| 110 | + self.meta = r.json() |
| 111 | + |
| 112 | + def get_title(self): |
| 113 | + if hasattr(self, "title"): |
| 114 | + if platform.system() == 'Windows': |
| 115 | + title = self.title.decode('utf-8').encode('gbk') |
| 116 | + return title |
| 117 | + else: |
| 118 | + return self.title |
| 119 | + else: |
| 120 | + if self.meta == None: |
| 121 | + self.parser() |
| 122 | + meta = self.meta |
| 123 | + title = meta['title'] |
| 124 | + self.title = title |
| 125 | + if platform.system() == 'Windows': |
| 126 | + title = title.decode('utf-8').encode('gbk') |
| 127 | + return title |
| 128 | + else: |
| 129 | + return title |
| 130 | + |
| 131 | + def get_content(self): |
| 132 | + if self.meta == None: |
| 133 | + self.parser() |
| 134 | + meta = self.meta |
| 135 | + content = meta['content'] |
| 136 | + if platform.system() == 'Windows': |
| 137 | + content = content.decode('utf-8').encode('gbk') |
| 138 | + return content |
| 139 | + else: |
| 140 | + return content |
| 141 | + |
| 142 | + def get_author(self): |
| 143 | + if hasattr(self, "author"): |
| 144 | + return self.author |
| 145 | + else: |
| 146 | + if self.meta == None: |
| 147 | + self.parser() |
| 148 | + meta = self.meta |
| 149 | + author_tag = meta['author'] |
| 150 | + author = User(author_tag['profileUrl'],author_tag['slug']) |
| 151 | + return author |
| 152 | + |
| 153 | + def get_column(self): |
| 154 | + if self.meta == None: |
| 155 | + self.parser() |
| 156 | + meta = self.meta |
| 157 | + column_url = 'https://zhuanlan.zhihu.com/' + meta['column']['slug'] |
| 158 | + return Column(column_url, meta['column']['slug']) |
| 159 | + |
| 160 | + def get_likes(self): |
| 161 | + if self.meta == None: |
| 162 | + self.parser() |
| 163 | + meta = self.meta |
| 164 | + return int(meta["likesCount"]) |
| 165 | + |
| 166 | + def get_topics(self): |
| 167 | + if self.meta == None: |
| 168 | + self.parser() |
| 169 | + meta = self.meta |
| 170 | + for topic in meta['topics']: |
| 171 | + yield topic |
| 172 | + |
| 173 | +class Column: |
| 174 | + url = None |
| 175 | + meta = None |
| 176 | + |
| 177 | + def __init__(self, url, slug=None): |
| 178 | + |
| 179 | + if not re.compile(r"(http|https)://zhuanlan.zhihu.com/([0-9a-zA-Z]+)").match(url): |
| 180 | + raise ValueError("\"" + url + "\"" + " : it isn't a question url.") |
| 181 | + else: |
| 182 | + self.url = url |
| 183 | + if slug == None: |
| 184 | + self.slug = re.compile(r"(http|https)://zhuanlan.zhihu.com/([0-9a-zA-Z]+)").match(url).group(2) |
| 185 | + else: |
| 186 | + self.slug = slug |
| 187 | + |
| 188 | + def parser(self): |
| 189 | + r = requests.get('https://zhuanlan.zhihu.com/api/columns/' + self.slug) |
| 190 | + self.meta = r.json() |
| 191 | + |
| 192 | + def get_title(self): |
| 193 | + if hasattr(self,"title"): |
| 194 | + if platform.system() == 'Windows': |
| 195 | + title = self.title.decode('utf-8').encode('gbk') |
| 196 | + return title |
| 197 | + else: |
| 198 | + return self.title |
| 199 | + else: |
| 200 | + if self.meta == None: |
| 201 | + self.parser() |
| 202 | + meta = self.meta |
| 203 | + title = meta['name'] |
| 204 | + self.title = title |
| 205 | + if platform.system() == 'Windows': |
| 206 | + title = title.decode('utf-8').encode('gbk') |
| 207 | + return title |
| 208 | + else: |
| 209 | + return title |
| 210 | + |
| 211 | + def get_description(self): |
| 212 | + if self.meta == None: |
| 213 | + self.parser() |
| 214 | + meta = self.meta |
| 215 | + description = meta['description'] |
| 216 | + if platform.system() == 'Windows': |
| 217 | + description = description.decode('utf-8').encode('gbk') |
| 218 | + return description |
| 219 | + else: |
| 220 | + return description |
| 221 | + |
| 222 | + def get_followers_num(self): |
| 223 | + if self.meta == None: |
| 224 | + self.parser() |
| 225 | + meta = self.meta |
| 226 | + followers_num = int(meta['followersCount']) |
| 227 | + return followers_num |
| 228 | + |
| 229 | + def get_posts_num(self): |
| 230 | + if self.meta == None: |
| 231 | + self.parser() |
| 232 | + meta = self.meta |
| 233 | + posts_num = int(meta['postsCount']) |
| 234 | + return posts_num |
| 235 | + |
| 236 | + def get_creator(self): |
| 237 | + if hasattr(self, "creator"): |
| 238 | + return self.creator |
| 239 | + else: |
| 240 | + if self.meta == None: |
| 241 | + self.parser() |
| 242 | + meta = self.meta |
| 243 | + creator_tag = meta['creator'] |
| 244 | + creator = User(creator_tag['profileUrl'],creator_tag['slug']) |
| 245 | + return creator |
| 246 | + |
| 247 | + def get_all_posts(self): |
| 248 | + posts_num = self.get_posts_num() |
| 249 | + if posts_num == 0: |
| 250 | + print "No posts." |
| 251 | + return |
| 252 | + yield |
| 253 | + else: |
| 254 | + for i in xrange((posts_num - 1) / 20 + 1): |
| 255 | + parm = {'limit': 20, 'offset': 20*i} |
| 256 | + url = 'https://zhuanlan.zhihu.com/api/columns/' + self.slug + '/posts' |
| 257 | + r = requests.get(url, params=parm) |
| 258 | + posts_list = r.json() |
| 259 | + for p in posts_list: |
| 260 | + post_url = 'https://zhuanlan.zhihu.com/p/' + str(p['slug']) |
| 261 | + yield Post(post_url) |
| 262 | + |
95 | 263 | class Question: |
96 | 264 | url = None |
97 | 265 | soup = None |
|
0 commit comments