Skip to content

Commit b95877c

Browse files
JINGJING
authored andcommitted
分析漫画首页信息
1 parent e0980d1 commit b95877c

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

comic-index.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#/usr/bin/env python
2+
#coding:utf-8
3+
4+
'''
5+
解析漫画主页
6+
包括漫画简介、章回、作者、连载状态、分类、漫画名、最新更新
7+
'''
8+
9+
import urllib2, urllib, re
10+
11+
url_index = "http://manhua.dmzj.com/lxjgs/"
12+
13+
14+
#过滤漫画简介
15+
regex = re.compile(r"<meta\sname=\'description\'\scontent=\"([^\"]+)\"")
16+
#过滤作者
17+
regex2 = re.compile(r"<td><a\shref=\'[^\']+\'>(.+)</a><br/></td>")
18+
#过滤漫画名
19+
regex3 = re.compile(r"g_comic_name\s=\s\"([^\"]+)\"")
20+
#过滤连载状态
21+
regex4 = re.compile(r"<td><a\shref=\"[^\"]+\"\salt=\"[^\"]+\">(.+)</a></td>")
22+
#过滤分类
23+
regex5 = re.compile(r"<td><a\stitle=\'[^\']+\'\shref=\'[^\']+\'>(.+)</a></td>")
24+
#过滤最新更新
25+
regex6 = re.compile(r"<li><a\stitle=\"[^\"]+\"\shref=\"([^\"]+)\"\s+class=\"color_red\">(.+)</a></li>")
26+
#过滤章回
27+
regex7 = re.compile(r"<li><a\stitle=\"[^\"]+\"\shref=\"([^\"]+)\"\s*>(.+)</a></li>")
28+
29+
url_test = "http://manhua.dmzj.com/jqzd/"
30+
31+
socket = urllib2.urlopen(url_test)
32+
content = socket.read()
33+
socket.close()
34+
35+
intro = regex.findall(content)[0]
36+
author = regex2.findall(content)[0]
37+
name = regex3.findall(content)[0]
38+
status = regex4.findall(content)[1]
39+
category = regex5.findall(content)[0]
40+
lastupdate = regex6.findall(content)[0]
41+
chapters0 = regex7.findall(content)
42+
43+
t = "漫画介绍:"
44+
i = intro.find(t)
45+
46+
intro = intro[i + len(t):]
47+
chapters = []
48+
for x in chapters0:
49+
chapters.append((x[0], x[1]))
50+
chapters.append((lastupdate[0], lastupdate[1]))
51+
52+
with open("output.txt", "w") as f:
53+
f.write("漫画名 - " + name + "\n")
54+
f.write("作者 - " + author + "\n")
55+
f.write("分类 - " + category + "\n")
56+
f.write("漫画简介 - " + intro + "\n")
57+
f.write("连载状态 - " + status + "\n")
58+
f.write("最新更新 - " + lastupdate[1] + "\n")
59+
for x in chapters:
60+
f.write(" ".join((x[0], x[1])) + "\n")

0 commit comments

Comments
 (0)