天天看點

爬取有道翻譯

入門學習了爬蟲,嘗試寫一段爬取有道翻譯的代碼.

import urllib.request as ur
import urllib.parse as up
import chardet
import json
string = input('please enter the words needing to translate:')#在input()中用中文輸入法提示會出現錯誤,有沒有大佬懂啊..
URL = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = string
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '1536587001028'
data['sign'] = '9fe501a15b60074aa1fbbdc15baeac93'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_REALTIME'
data['typoResult'] = 'false'
data = up.urlencode(data).encode('utf-8')
# header = {}   #直接設定參數修改隐藏
# header['Ueser-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
response = ur.Request(URL,data) #使用參數隐藏的話,隻能用于ur.Request(URL,data,header)中
response.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36')
response = ur.urlopen(response)
html = response.read()
type_encode = chardet.detect(html)['encoding']#通過chardet.detect()獲得編碼方式
html = html.decode(type_encode)
html = json.loads(html)#json是輕量級的字元串封裝方式
answer = html['translateResult'][][]['tgt']
print(answer)
           

繼續閱讀