|
| 1 | +#/usr/bin/env python |
| 2 | +#coding:utf-8 |
| 3 | + |
| 4 | +''' |
| 5 | +解析漫画主页 |
| 6 | +包括漫画简介、章回、作者、连载状态、分类、漫画名、最新更新 |
| 7 | +''' |
| 8 | + |
| 9 | +import urllib2, urllib, re |
| 10 | + |
| 11 | +url_index = "http://manhua.dmzj.com/lxjgs/" |
| 12 | + |
| 13 | + |
| 14 | +#过滤漫画简介 |
| 15 | +regex = re.compile(r"<meta\sname=\'description\'\scontent=\"([^\"]+)\"") |
| 16 | +#过滤作者 |
| 17 | +regex2 = re.compile(r"<td><a\shref=\'[^\']+\'>(.+)</a><br/></td>") |
| 18 | +#过滤漫画名 |
| 19 | +regex3 = re.compile(r"g_comic_name\s=\s\"([^\"]+)\"") |
| 20 | +#过滤连载状态 |
| 21 | +regex4 = re.compile(r"<td><a\shref=\"[^\"]+\"\salt=\"[^\"]+\">(.+)</a></td>") |
| 22 | +#过滤分类 |
| 23 | +regex5 = re.compile(r"<td><a\stitle=\'[^\']+\'\shref=\'[^\']+\'>(.+)</a></td>") |
| 24 | +#过滤最新更新 |
| 25 | +regex6 = re.compile(r"<li><a\stitle=\"[^\"]+\"\shref=\"([^\"]+)\"\s+class=\"color_red\">(.+)</a></li>") |
| 26 | +#过滤章回 |
| 27 | +regex7 = re.compile(r"<li><a\stitle=\"[^\"]+\"\shref=\"([^\"]+)\"\s*>(.+)</a></li>") |
| 28 | + |
| 29 | +url_test = "http://manhua.dmzj.com/jqzd/" |
| 30 | + |
| 31 | +socket = urllib2.urlopen(url_test) |
| 32 | +content = socket.read() |
| 33 | +socket.close() |
| 34 | + |
| 35 | +intro = regex.findall(content)[0] |
| 36 | +author = regex2.findall(content)[0] |
| 37 | +name = regex3.findall(content)[0] |
| 38 | +status = regex4.findall(content)[1] |
| 39 | +category = regex5.findall(content)[0] |
| 40 | +lastupdate = regex6.findall(content)[0] |
| 41 | +chapters0 = regex7.findall(content) |
| 42 | + |
| 43 | +t = "漫画介绍:" |
| 44 | +i = intro.find(t) |
| 45 | + |
| 46 | +intro = intro[i + len(t):] |
| 47 | +chapters = [] |
| 48 | +for x in chapters0: |
| 49 | + chapters.append((x[0], x[1])) |
| 50 | +chapters.append((lastupdate[0], lastupdate[1])) |
| 51 | + |
| 52 | +with open("output.txt", "w") as f: |
| 53 | + f.write("漫画名 - " + name + "\n") |
| 54 | + f.write("作者 - " + author + "\n") |
| 55 | + f.write("分类 - " + category + "\n") |
| 56 | + f.write("漫画简介 - " + intro + "\n") |
| 57 | + f.write("连载状态 - " + status + "\n") |
| 58 | + f.write("最新更新 - " + lastupdate[1] + "\n") |
| 59 | + for x in chapters: |
| 60 | + f.write(" ".join((x[0], x[1])) + "\n") |
0 commit comments