File tree Expand file tree Collapse file tree 1 file changed +42
-0
lines changed
Expand file tree Collapse file tree 1 file changed +42
-0
lines changed Original file line number Diff line number Diff line change 1+ # _*_ coding: utf-8 _*_
2+
3+ """
4+ 使用Splash服务器抓取Ajax渲染页面
5+ """
6+
7+ import json
8+ import requests
9+
10+ # Docker安装: https://splash.readthedocs.io/en/latest/install.html
11+ CRAWLER_URL = "http://weixin.sogou.com/weixin?page=1&type=2&query=%E4%B8%AD%E5%9B%BD"
12+
13+
14+ # render.html
15+ def test_1 (url ):
16+ render = "http://xx.xx.xx.xx:8050/render.html"
17+ body = json .dumps ({
18+ "url" : url ,
19+ "wait" : 0.5 , # 设定页面加载等待时间
20+ "images" : 0 , # 是否抓取图片
21+ "timeout" : 3 , # 设置过期时间
22+ # "allowed_domains": ["sogou.com", ], # 设置允许的域
23+ "allowed_content_types" : "text/html; charset=utf-8"
24+ })
25+ headers = {"Content-Type" : "application/json" }
26+
27+ response = requests .post (url = render , headers = headers , data = body )
28+ print (url , response .status_code )
29+ print (response .text )
30+ return
31+
32+ # test_1(CRAWLER_URL)
33+
34+
35+ # render.png
36+ def test_2 (url ):
37+ render = "http://xx.xx.xx.xx:8050/render.png?url=%s&timeout=5" % url
38+ response = requests .get (url = render )
39+ print (url , response .status_code )
40+ return
41+
42+ # test_2(CRAWLER_URL)
You can’t perform that action at this time.
0 commit comments