Skip to content

Commit 4b83e9f

Browse files
committed
add Chapter10
1 parent 5c90446 commit 4b83e9f

3 files changed

Lines changed: 59 additions & 0 deletions

File tree

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from selenium import webdriver
2+
import time
3+
from selenium.webdriver.remote.webelement import WebElement
4+
from selenium.common.exceptions import StaleElementReferenceException
5+
6+
7+
'''
8+
我们可以用一种智能的方法来检测客户端重定向是否完成,首先从页面开始加载 时就“监视”DOM 中的一个元素,
9+
然后重复调用这个元素直到 Selenium
10+
抛出一个 StaleElementReferenceException 异常;
11+
也就是说,元素不在页面的 DOM 里了,说明这时 网站已经跳转:
12+
'''
13+
def waitForLoad(driver):
14+
elem = driver.find_element_by_tag_name("html")
15+
count = 0
16+
while True:
17+
count += 1
18+
if count > 20:
19+
print("Timing out after 10 seconds and returning")
20+
return
21+
time.sleep(.5)
22+
try:
23+
elem == driver.find_element_by_tag_name("html")
24+
except StaleElementReferenceException:
25+
return
26+
27+
driver = webdriver.PhantomJS(executable_path='<Path to Phantom JS>')
28+
driver.get("http://pythonscraping.com/pages/javascript/redirectDemo1.html")
29+
waitForLoad(driver)
30+
print(driver.page_source)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from selenium import webdriver
2+
import time
3+
4+
5+
'''
6+
那些使用了 Ajax 或 DHTML 技术改变 / 加载内容的页面,可能有一些采集手段,但是用 Python 解决这个问题只有两种途径:
7+
直接从 JavaScript 代码里采集内容,或者用 Python 的 第三方库运行 JavaScript,直接采集你在浏览器里看到的页面
8+
9+
PhantomJS 无头浏览器
10+
11+
把 Selenium 和 PhantomJS 结合在一 起,就可以运行一个非常强大的网络爬虫了,
12+
可以处理 cookie、JavaScrip、header,以及 任何你需要做的事情。
13+
'''
14+
driver = webdriver.PhantomJS(executable_path='')
15+
driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
16+
time.sleep(3)
17+
print(driver.find_element_by_id("content").text)
18+
driver.close()
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from selenium.webdriver.common.by import By
2+
from selenium.webdriver.support.ui import WebDriverWait
3+
from selenium.webdriver.support import expected_conditions as EC
4+
5+
driver = webdriver.PhantomJS(executable_path='')
6+
driver.get("http://pythonscraping.com/pages/javascript/ajaxDemo.html")
7+
try:
8+
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "loadedButton")))
9+
finally:
10+
print(driver.find_element_by_id("content").text)
11+
driver.close()

0 commit comments

Comments
 (0)