在使用python 使用tesseract-ocr将网站验证码识别基础上进行登录和获取token操作(已做循环,保证获取到的token可用)
具体实现代码如下,代码组建相关的内容可忽略,重要的是实现登陆的代码和循环使用登录接口来保证获取到的token肯定可用
# -*- coding: utf-8 -*-
"""
@Time : 2021/4/25 20:35
@Auth : supassxu
@File :get_token.py
@IDE :PyCharm
@Motto:work steadily
"""
from public.base_requests import Requests
from public.log import Log
import pytesseract
from PIL import Image # 图形处理的库
# 注意:如果是"data:image/jpg:base64,",那你保存的就要以png格式,如果是"data:image/png:base64,"那你保存的时候就以jpg格式。
import json
import base64
# 请求url
import requests
from tools.read_config import Read_conf
log = Log()
login_url = Read_conf().load_ini('test.ini')['ROOT_URL']['root_url'] + "/api/preposition/system/login"
username = Read_conf().load_ini('test.ini')['ACCOUNT_PASSWORD']['account']
password = Read_conf().load_ini('test.ini')['ACCOUNT_PASSWORD']['password']
class Before:
def __init__(self):
try:
'''前置操作,通过登录获取登录的token'''
payload = {'jobNumber': username,
'password': password,
"verifyCode": Before.get_pngcode(self)
}
header = {
"Content-Type": "application/json;charset=UTF-8",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
self.response = Requests().post(login_url)
# log.info('初始化登录成功')
except Exception as e:
log.error('初始化登录失败:{}'.format(e))
def get_token(self):
while(not self.response.json()['flag']): # 避免识别验证码错误导致登录失败获取不到验证码的情况,因此进行while循环不进行token获取操作,重新进行获取验证码和登录操作。
self.__init__(self.url)
data = self.response.json()['data']['token'] # 从返回结果中获取token
print(data)
return data
def get_pngcode(self):
url = Read_conf().load_ini('test.ini')['ROOT_URL']['root_url'] + "/api/preposition/system/image" # 从配置文件读取需要的值
# 请求头
headers = {
"Accept": "*/*",
"User-Agent": "python-requests/2.9.1",
"Connection": "keep-alive",
"Content-Type": "text/plain;charset=UTF-8",
}
# 查询字符串
r = requests.get(url=url, headers=headers)
result = json.loads(r.text)
img_str = result['data']
img_data = base64.b64decode(img_str)
with open('001.png', 'wb') as f:
f.write(img_data)
# print('successful')
img2 = Image.open('001.png')
gray = img2.convert('L')
# 图像对象转化(L:8位像素,表示黑和白)
# 可以参考:Python图像处理库PIL的基本概念介绍 - icamera0的博客 - CSDN博客
bw = gray.point(lambda x: 0 if x < 180 else 255, '1') # 如果RGB数值小于140的变成1,否则是255。也就是将验证码背景变成白色,具体字符变成黑色。
bw.save('aa.png')
vcode = pytesseract.image_to_string(bw, lang="eng", config="-psm 6").replace("\n", "").replace(' ', '')
# print(vcode)
return vcode
if __name__ == '__main__':
res = Before()
res.get_token()
print(res)
执行效果如下图: