一、頭檔案
基本子產品導入
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import re
機器學習子產品:
from sklearn.cross_validation import train_test_split #分資料
import mglearn
from sklearn.linear_model import Ridge
from sklearn import datasets, linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from mglearn import plot_2d_separator
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
深度學習子產品:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D
from keras.models import load_model
讀取檔案方法,好多包都可讀取檔案:
C:/pypractise/fangzhen2/cj.csv
text = open('test_txt/14.txt','r')
q = text.read()
data=pd.read_csv('C:/pypractise/3/Advertising.csv')
讀取檔案清單,以便在依次周遊檔案
def get_imglist(path):
return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')]
寫入exls 大檔案可用
def data_w(datas):
f= xlsxwriter.Workbook('demo1.xlsx')#建立一個excel檔案
sheet1= f.add_worksheet(u'sheet1')
i = 0
j = 0
for data in datas[0]:
sheet1.write(i,j,data)
i=i+2
i = 1
j = 0
for data1 in datas[1]:
sheet1.write(i,j,data1)
i=i+2
f.close()
将檔案寫進行excel表格中:
def data_write(datas):
f = xlwt.Workbook()
sheet1 = f.add_sheet(u'ex.xls',cell_overwrite_ok=True) #建立sheet
#将資料寫入第 i 行,第 j 列
i = 0 #行
j = 0 #列
for data in datas:
sheet1.write(i,j,data)
i=i+1
# i=1
# for data1 in datas[1]:
# sheet1.write(i,j,data1)
# i=i+2
f.save('ex.xls') #儲存檔案
将檔案txt的寫到csv中;
def data_write():
'''
函數為将txt資料寫入到csv中
'''
with open('shuju.csv', 'w+',newline='') as csvfile:
spamwriter = csv.writer(csvfile, dialect='excel')
# 讀要轉換的txt檔案,檔案每行各詞間以@@@字元分隔
with open('che.txt', 'r',encoding='utf-8') as filein:
for line in filein:
line_list = line.strip('\n').split(' ')
spamwriter.writerow(line_list)
正則語句切分. 文獻去頭尾;
#(\.(?=\s+(?:[A-Z])))|(\). )|(\.[ 0-9|A-Z])|(ACKNOWLEDGMENTS.*$)|(.*AUTHOR INFORMATION*$)|(REFERENCES.*$)|(References.*$)|(\?+)|(^.*ABSTRACT:)|(^.*INTRODUCTION )| (^.*CONSPECTUS:)
#正則規則去除開頭結尾和*?
# 切分 \.(?=[\d+](?=\s+(?:[A-Z])))|\.(?=\s+(?:[A-Z])))
# 删除開頭結尾 (ACKNOWLEDGMENTS.*$)|(AUTHOR.INFORMATION.*$)|(REFERENCES.*$)|(References.*$)|(^.*ABSTRACT:)|(^.*INTRODUCTION )| (^.*CONSPECTUS:)|(ASSOCIATED.CONTENT.*$)
#替換 ?和* (\?+)|(\*+)
re.sub(pattern1,'',q)
re.compile(r' ')
調用API翻譯 百度:
ID自己的号
def fanyi(yuju):
'''
出入Q 傳回src 和dst
'''
myurl = '/api/trans/vip/fieldtranslate' #分網頁
appid = ' '
secretKey = ' '
q = yuju
q.encode(encoding = 'utf-8')
salt = random.randint(32768, 65536)
salt = str(salt)
domain = 'medicine'
sign = appid + q + salt + domain + secretKey #簽名
m1 = hashlib.md5() #建立一個md5加密模式的hash對象
m1.update(sign.encode(encoding = 'utf-8')) #跟新對象
sign = m1.hexdigest() #傳回摘要
fromLang = 'en'
toLang = 'zh'
httpClient = None
myurl = myurl+'?q='+parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&appid='+appid+'&salt='+salt+'&domain='+'medicine'+'&sign='+sign
try:
httpClient = http.client.HTTPConnection('api.fanyi.baidu.com') #主網頁
httpClient.request('GET', myurl)
response = httpClient.getresponse()
html = response.read().decode('utf-8')
html = json.loads(html) #傳回翻譯結果
# ds1t = html["trans_result"][0]["dst"]
# src1 = html["trans_result"][0]["src"]
except Exception as e:
print(e)
finally:
if httpClient:
httpClient.close()
return html
畫圖方法:
一般的畫出來:
plt.title("ATP curve") #設定标題
plt.xlim(xmax=2500,xmin=-250) #設定x y軸的大小
plt.ylim(ymax=220,ymin=0)
plt.figure()
plt.plot(range(len(y_pred)),y_pred,'b',label='predict')
plt.plot(range(len(y_pred)),y_test,'r',label='test')
plt.legend(loc="upper right") #圖示的位置
plt.xlabel("the number of sales")
plt.ylabel("value of sales")
plt.show()