安裝
pip install selenium
官方文檔:
https://www.seleniumhq.org/docs/測試頁面
http://www.pythonscraping.com/pages/javascript/ajaxDemo.html報錯:warnings.warn(‘Selenium support for PhantomJS has been deprecated, please use headless ’
解決: 安裝版本2
pip install "selenium < 3"
簡單示例解析javascript
from selenium import webdriver
from bs4 import BeautifulSoup
import time
# 顯示等待頁面加載
def getPage1():
url = "http://www.pythonscraping.com/pages/javascript/ajaxDemo.html"
driver = webdriver.PhantomJS()
driver.get(url)
time.sleep(1) # 設定等待時間
# 擷取内容
content = driver.find_element_by_id("content")
print(content.text)
# 通過bs解析
html = driver.page_source # 源代碼字元串
soup = BeautifulSoup(html, "html.parser")
tag = soup.find(id="content")
print(tag)
driver.close()
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 隐式等待頁面加載
def getPage2():
url = "http://www.pythonscraping.com/pages/javascript/ajaxDemo.html"
driver = webdriver.PhantomJS(executable_path="phantomjs")
driver.get(url)
# 等待頁面加載完畢,擷取明顯元素作為标志
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "loadedButton"))
)
finally:
# 擷取内容
content = driver.find_element_by_id("content")
print(content.text)
driver.close()
getPage2()
點選百度自動送出
# -*- coding:utf-8 -*-
from selenium import webdriver
import time
def clickBaidu():
# 為了看到效果,使用chrome浏覽器
driver = webdriver.Chrome()
time.sleep(5)
baidu = "http:www.baidu.com"
driver.get(baidu)
time.sleep(5)
driver.find_element_by_id("kw").send_keys("百度")
time.sleep(5)
driver.find_element_by_id("su").click()
time.sleep(5)
# 截屏
driver.get_screenshot_as_file("baidu_shot.png")
time.sleep(5)
driver.close()
clickBaidu()
# 說明,time.sleep() 是為了看到浏覽器整個自動過程,實際使用可以去掉
滑鼠動作
element.click()
element.click_and_hold()
element.release()
element.double_click()
參考:
python selenium啟動浏覽器打開百度搜尋