|
| 1 | +#!/usr/bin/env python |
| 2 | +#encoding:utf-8 |
| 3 | +''' |
| 4 | +完成一个章回的图片多线程下载 |
| 5 | +
|
| 6 | +依赖spidermonkey库 |
| 7 | +''' |
| 8 | + |
| 9 | +import threading, re, urllib, urllib2, ConfigParser, string, os, time |
| 10 | +from spidermonkey import Runtime |
| 11 | + |
| 12 | +#过滤图片URL列表 |
| 13 | +regex = re.compile(r"eval(.+)") |
| 14 | +#过滤漫画名 |
| 15 | +regex2 = re.compile(r"g_comic_name\s=\s\"([^\"]+)\"") |
| 16 | +#过滤章回 |
| 17 | +regex3 = re.compile(r"g_chapter_name\s=\s\"([^\"]+)\"") |
| 18 | +#过滤图片类型 |
| 19 | +regex4 = re.compile(r"(\w+)") |
| 20 | + |
| 21 | +cfg = {} |
| 22 | +conf = ConfigParser.ConfigParser() |
| 23 | +conf.read("conf_comic") |
| 24 | +section = "dmzj" |
| 25 | +for option in conf.options(section): |
| 26 | + cfg[option] = conf.get(section, option) |
| 27 | + |
| 28 | + |
| 29 | +socket = urllib2.urlopen(cfg["test_url_img"]) |
| 30 | +content = socket.read() |
| 31 | +socket.close() |
| 32 | + |
| 33 | +rt = Runtime() |
| 34 | +cx = rt.new_context() |
| 35 | +rs = regex.findall(content)[0] |
| 36 | + |
| 37 | +urls_img = list(cx.eval_script("eval(" + rs + ");eval(pages);")) |
| 38 | +urls_img = map(lambda x : cfg["root_img"] + x, urls_img) |
| 39 | +name_comic = regex2.findall(content)[0] |
| 40 | +name_chapter = regex3.findall(content)[0] |
| 41 | + |
| 42 | + |
| 43 | +class ComicDownload(threading.Thread): |
| 44 | + def __init__(self, url, localfile): |
| 45 | + threading.Thread.__init__(self, name = url) |
| 46 | + self.url = url |
| 47 | + self.localfile = localfile |
| 48 | + |
| 49 | + def run(self): |
| 50 | + try: |
| 51 | + time.sleep(5) |
| 52 | + urllib.urlretrieve(self.url, self.localfile) |
| 53 | + except Exception, e: |
| 54 | + print e |
| 55 | + |
| 56 | +thread_pool = [] |
| 57 | +try: |
| 58 | + os.mkdir(name_comic) |
| 59 | + path = "/".join((name_comic, name_chapter)) |
| 60 | + os.mkdir(path) |
| 61 | + |
| 62 | + for i, x in enumerate(urls_img): |
| 63 | + type_img = regex4.findall(x)[-1] |
| 64 | + filename = ".".join((str(i + 1), type_img)) |
| 65 | + thread_pool.append(ComicDownload(x, "/".join((path, filename)))) |
| 66 | + |
| 67 | + map(lambda t : t.start(), thread_pool) |
| 68 | + map(lambda t : t.join(), thread_pool) |
| 69 | + |
| 70 | +except Exception, e: |
| 71 | + print e |
0 commit comments