代碼實作
from typing import Sized
from docx import Document
import time
from docxtpl import DocxTemplate,InlineImage,RichText
from docx.shared import Mm
from PIL import Image
from selenium import webdriver
import ssl
import sys
import json
import base64
# 初始化driver
driver = webdriver.Chrome()
driver.set_window_size(1280, 800, driver.window_handles[0])
driver.maximize_window()
# 擷取驗證碼圖檔
def getimage():
ele_vcode = driver.find_element_by_xpath("//*[@id='captchaImgU']")
ele_vcode.click()
time.sleep(2)
ele_vcode.screenshot('vcode.png')
# 百度api接口識别 coding=utf-8
# post請求參數
ssl._create_default_https_context = ssl._create_unverified_context
API_KEY = 'fqe83vwceOl3A87umYHATbaB'
SECRET_KEY = 'UFjtlGbBvhLAh1VSDok1apCuDx6AceRG'
OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
# 保證相容python2以及python3
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode
from urllib.parse import quote_plus
else:
pass
# 擷取token
def fetch_token():
params = {'grant_type': 'client_credentials',
'client_id': API_KEY,
'client_secret': SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode('utf-8')
req = Request(TOKEN_URL, post_data)
try:
f = urlopen(req, timeout=5)
result_str = f.read()
except URLError as err:
print(err)
if (IS_PY3):
result_str = result_str.decode()
result = json.loads(result_str)
if ('access_token' in result.keys() and 'scope' in result.keys()):
if not 'brain_all_scope' in result['scope'].split(' '):
print ('please ensure has check the ability')
exit()
return result['access_token']
else:
print ('please overwrite the correct API_KEY and SECRET_KEY')
exit()
# 讀取檔案
def read_file(image_path):
f = None
try:
f = open(image_path, 'rb')
return f.read()
except:
print('read image file fail')
return None
finally:
if f:
f.close()
# 調用遠端服務
def request(url, data):
req = Request(url, data.encode('utf-8'))
has_error = False
try:
f = urlopen(req)
result_str = f.read()
if (IS_PY3):
result_str = result_str.decode()
return result_str
except URLError as err:
print(err)
# 擷取驗證碼
def get_code():
# 擷取access token
token = fetch_token()
# 拼接通用文字識别高精度url
image_url = OCR_URL + "?access_token=" + token
text = ""
# 讀取測試圖檔
file_content = read_file('vcode.png')
# 調用文字識别服務
result = request(image_url, urlencode({'image': base64.b64encode(file_content)}))
result_json = json.loads(result)
for words_result in result_json["words_result"]:
text = text + words_result["words"]
return text
# 執行登入
def phsc_login():
driver.get("https://www.shgt.com/trade-web/login")
time.sleep(5)
while driver.title == '登入': # 加入循環判斷,登入不成功,重新擷取驗證碼登入
getimage()
vcode = get_code()
driver.find_element_by_xpath("//*[@name='user']").clear()
driver.find_element_by_xpath("//*[@name='user']").send_keys("username")
driver.find_element_by_xpath("//*[@name='pass']").clear()
driver.find_element_by_xpath("//*[@name='pass']").send_keys("password")
driver.find_element_by_xpath("//*[@name='validateCode']").clear()
driver.find_element_by_xpath("//*[@name='validateCode']").send_keys(vcode)
driver.find_element_by_xpath("//*[@class='el-button btn_login el-button--button']").click()
time.sleep(5)
phsc_login()
driver.quit()
參考文章:
百度ORC接口入門:https://ai.baidu.com/ai-doc/OCR/dk3iqnq51
如何用代碼調用百度OCR服務:https://cloud.baidu.com/doc/OCR/s/Pkrwx9ye4
【Python+selenium】帶圖檔驗證碼的登入自動化實戰:https://www.jianshu.com/p/6755a40d961f
5行Python實作驗證碼識别(識别率一般):https://jishuin.proginn.com/p/763bfbd60bb1