使用阿裡雲ECS建立聊天機器人

初次釋出于2018年1月

前期考慮使用了圖靈機器人，有兩種實作方式綁定公衆号和加入群聊。圖靈機器人可以直接綁定公衆号，使用過程還是不錯的，不用明确@，可以直接進行對話，上下文邏輯比微軟小冰要好，有1000條/天的限制。第二種方案則是使用第三方插件，綁定微信号，用機器人運作微信号，缺點就是需要一直運作微信不能退出，邏輯不明顯。好吧，邏輯都不是很明顯。

接下來考慮了Sam Gu調用itchat的方式[1]。Sam 使用谷歌雲平台內建ItChat微信聊天機器人并內建深度人工智能應用，首先安裝Jupyter Notebook。

登陸伺服器，可以通過控制台，或者putty, mobaxterm等工具，前者簡潔後者提供檔案操作界面。

mkdir anaconda #              建立安裝目錄

cd anaconda #              将進入安裝目錄

wget         https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh                #              下載下傳安裝包

bash Anaconda3-5.0.1-Linux-x86_64.sh #              安裝另外解除安裝用              rm -rf anaconda3

jupyter notebook --generate-config --allow-root

ipython

from notebook.auth import passwd

`passwd() #` `設定密碼會生成` `hashed password` `，複制`

vi ~/.jupyter/jupyter_notebook_config.py

複制粘貼以下幾行，然後運作。具體操作可以參

翼起小飛

在社群的博文，即[2]。

c.NotebookApp.ip='*'

c.NotebookApp.password = u'              把上面的文本粘貼到這裡              '

c.NotebookApp.open_browser = False

c.NotebookApp.port =8888

nohup jupyter notebook --allow-root

然後運作Jupyter Notebook，并在浏覽器登入。Sam是在谷歌雲台（第一年免費，贈送300美元，當然阿裡雲也有學生優惠）上建立的，基本原理是一緻的，他的視訊裡講的也是非常清楚，這裡把這些内容搬運過來。登陸Jupyter Notebook後，建立一個檔案夾存放相關檔案，然後建立Python3檔案，在其指令行裡複制并運作以下指令，第二條運作兩次。

!git clone         https://github.com/telescopeuser/workshop_blog.git

%load workshop_blog/setup_cloud.py

運作lesson 1就可以開始了。當然這些指令也可以在伺服器直接調用，Jupyter Notebook在這裡更多的是互動界面的作用。接下來的圖形識别要調用機器學習，Sam使用了Google Cloud Platform's Machine Learning APIs，直接調用就可以，複制粘貼API，然後再運作lesson 2。Lesson3 主要講了語音的識别，翻譯。Lesson4就高深一點，涉及到情感語義分析，接下來的lesson5涉及到從視訊中提取資訊。按照參考文獻[1]的知識進行操作即可。但是，這樣調用谷歌API，和之前挂用圖靈機器人，最大的差別就是多了幾行代碼，Well，很多行代碼，和我預想的還是有差距的。

然後在warmheartli的項目裡面發現了些有趣的内容，不過我要準備去擠火車了……

更新于2018年9月17日星期一：收集聊天語料

莫名其妙的更新被删除掉，好在以前的都還留着，從新再發語料的收集不再考慮使用字幕，從微網誌爬取資料，作為日後的語料和機器學習用，爬蟲的方法參考nghuyong，nghuyong的方法在驗證登入的時候存在無法識别。是以滑塊驗證的方法參考

LiuXingMing/WeiboSliderCode

；

Python3WebSpider/CrackWeiboSlide bone_ace/article/details/71056741

，當然他們的方法也并不是有效，嘗試了很多方法後，最簡單粗暴地是把驗證圖形的矩陣直接放在cookies.py裡面。

#!/usr/bin/env python

# encoding: utf-8

import datetime

import json

import base64

from time import sleep

import os

import time

import random

import io

from PIL import Image

from math import sqrt

import pymongo

from selenium import webdriver

from selenium.webdriver import ActionChains

from selenium.common.exceptions import TimeoutException

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.remote.command import Command

WeiBoAccounts = [

{'username': '登陸賬号', 'password': ‘密碼’},

]

cookies = []

client = pymongo.MongoClient("localhost", 27017)

db = client["Sina"]

userAccount = db["userAccount"]

PIXELS = []

def getExactly(im):

""" Precise cutting """

imin = -1

imax = -1

jmin = -1

jmax = -1

row = im.size[0]

col = im.size[1]

for i in range(row):

for j in range(col):

if im.load()[i, j] != 255:

imax = i

break

if imax == -1:

imin = i

for j in range(col):

for i in range(row):

jmax = j

if jmax == -1:

jmin = j

return (imin + 1, jmin + 1, imax + 1, jmax + 1)

def getType(browser):

""" Identifying the graphic path """

ttype = ''

time.sleep(3.5)

im0 = Image.open(io.BytesIO(browser.get_screenshot_as_png()))

box = browser.find_element_by_id('patternCaptchaHolder')

im = im0.crop((int(box.location['x']) + 10, int(box.location['y']) + 100, int(box.location['x']) + box.size['width'] - 10, int(box.location['y']) + box.size['height'] - 10)).convert('L')

newBox = getExactly(im)

im = im.crop(newBox)

width = im.size[0]

height = im.size[1]

for png in ims.keys():

isGoingOn = True

for i in range(width):

for j in range(height):

if ((im.load()[i, j] >= 245 and ims[png][i][j] < 245) or (im.load()[i, j] < 245 and ims[png][i][j] >= 245)) and abs(ims[png][i][j] - im.load()[i, j]) > 10: # With 245 as the threshold, about 245 as the blank, less than 245 as the line; the difference between two pixels is about 10 to remove the error on the 245 boundary

isGoingOn = False

break

if isGoingOn is False:

ttype = ''

else:

ttype = png

else:

break

px0_x = box.location['x'] + 40 + newBox[0]

px1_y = box.location['y'] + 130 + newBox[1]

PIXELS.append((px0_x, px1_y))

PIXELS.append((px0_x + 100, px1_y))

PIXELS.append((px0_x, px1_y + 100))

PIXELS.append((px0_x + 100, px1_y + 100))

return ttype

def move(browser, coordinate, coordinate0):

""" Move from coordinate0 to coordinate """

time.sleep(0.05)

length = sqrt((coordinate[0] - coordinate0[0]) ** 2 + (coordinate[1] - coordinate0[1]) ** 2) # Two point line distance

if length < 4: # If the distance between two points is less than 4PX, go straight ahead.

ActionChains(browser).move_by_offset(coordinate[0] - coordinate0[0], coordinate[1] - coordinate0[1]).perform()

return

else: # Recursion, sliding to the end

step = random.randint(3, 5)

x = int(step * (coordinate[0] - coordinate0[0]) / length) # In proportion

y = int(step * (coordinate[1] - coordinate0[1]) / length)

ActionChains(browser).move_by_offset(x, y).perform()

move(browser, coordinate, (coordinate0[0] + x, coordinate0[1] + y))

def draw(browser, ttype):

""" Slide """

if len(ttype) == 4:

px0 = PIXELS[int(ttype[0]) - 1]

ActionChains(browser).move_to_element(login).move_by_offset(px0[0] - login.location['x'] - int(login.size['width'] / 2), px0[1] - login.location['y'] - int(login.size['height'] / 2)).perform()

browser.execute(Command.MOUSE_DOWN, {})

px1 = PIXELS[int(ttype[1]) - 1]

move(browser, (px1[0], px1[1]), px0)

px2 = PIXELS[int(ttype[2]) - 1]

move(browser, (px2[0], px2[1]), px1)

px3 = PIXELS[int(ttype[3]) - 1]

move(browser, (px3[0], px3[1]), px2)

browser.execute(Command.MOUSE_UP, {})

else:

print('Sorry! Failed! Maybe you need to update the code.')

def get_cookie_from_weibo(username, password):

browser = webdriver.Chrome()

browser.set_window_size(1050, 840)

browser.get('https://weibo.cn/')

time.sleep(1)

assert "微網誌" in browser.title

login_link = browser.find_element_by_link_text('登入')

ActionChains(browser).move_to_element(login_link).click().perform()

login_name = WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.ID, "loginName")))

login_password = browser.find_element_by_id("loginPassword")

login_name.send_keys(username)

login_password.send_keys(password)

login_button = browser.find_element_by_id("loginAction")

login_button.click()

try:

img = WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'patt-shadow')))

except TimeoutException:

print('No verification codes')

self.open()

# Here you stay for 10 seconds to see if the Chrome is successfully logged in.

ttype = getType(browser) # Identifying the graphic path

print('Result: %s!' % ttype)

draw(browser, ttype) # Slide

time.sleep(10)

cookie = browser.get_cookies()

browser.close()

return cookie

def init_cookies():

for cookie in userAccount.find():

cookies.append(cookie['cookie'])

if __name__ == "__main__":

userAccount.drop()

except Exception as e:

pass

for account in WeiBoAccounts:

cookie = get_cookie_from_weibo(account["username"], account["password"])

userAccount.insert_one({"_id": account["username"], "cookie": cookie})

TOBECONTINUED

參考文獻：

[1] Telescopeuser, workshop_blog, (n.d.). https://github.com/telescopeuser/workshop_blog.

[2] 翼起小飛, 如何在阿裡ECS雲端運作Jupyter Notebook進行機器/深度學習？-部落格-雲栖社群-阿裡雲, (n.d.). https://yq.aliyun.com/articles/98527 (accessed February 5, 2018).

使用阿裡雲ECS建立聊天機器人

`passwd() #` `設定密碼會生成` `hashed password` `，複制`

繼續閱讀

無法解析的外部符号 wmain，該符号在函數 "void cdecl mainCRTStartupHelper(struct HINSTANCE *,unsigned short con......

TestLink導出用例轉換工具(XML2Excel)

解碼器用于語義分割：資料依賴的解碼可以實作靈活的特征聚合

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

Small tricks

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入

使用阿裡雲ECS建立聊天機器人

passwd() # 設定密碼會生成 hashed password ，複制

繼續閱讀

`passwd() #` `設定密碼會生成` `hashed password` `，複制`