首先先來觀察一下這個式子,感覺似曾相識。他和L2 regularization很像,對于L2 regularization,首先是先要計算Ein的最小值,所謂的Ein就是該模型在目前的訓練資料上犯錯誤的期望值。然後再正則化,是以L2是Minimizing Ein and Regularized L2 Paradigms;而支援向量機正好相反,他是先假設我這個平面是分類正确的,然後minimize W方:
還有對數損失函數交叉熵等等。logistics用的是交叉熵,SVM就是用的hinge lost function。支援向量機就是一個結構風險最小化的近似實作,結構風險相當于期望風險(Eout)的一個上界,它是經驗風險(Ein)和置信區間(Ω模型複雜度)的和,經驗風險依賴于決策函數f的選取,但是置信區間是,F的VC維的增函數,兩者是沖突的。沖突展現在:當VC維數變大的時候可以選到更好的f使得經驗風險比較小,但是此時的置信區間比較大。這就是對應了VC bound理論。還好去聽了台灣大學林軒宇老師課程,對這些機器學習理論基礎有了解。
positive , negative , data = get_positive_and_negative()
show_picture(positive , negative)
最後調用一些看看這些點是什麼:
支援向量機(Support Vector Machine)支援向量機
還有一些是對α的限制和一下工具函數:
'''''
Generate a random number
'''
def select_jrand(i , m):
j = i
while(j == i):
j = int(random.uniform(0 , m))
return j
pass
'''''
restraint the α
'''
def clip_alpha(aj , H , L):
if aj > H:
aj = H
elif aj < L:
aj = L
return aj
pass
接下來就是實作支援向量機了:
def SVM(data_mat , class_label , C , tolar , max_iter):
data_mat = np.mat(data_mat)
label_mat = np.mat(class_label)
b = 0
m , n = np.shape(data_mat)
alphas = np.zeros((m , 1))
iter = 0
while iter < max_iter:
#作為疊代變化量
alpha_pairs_changed = 0
#作為第一個a
for i in range(m):
WT_i = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xi = float(np.dot(WT_i , data_mat[i , :].T)) + b
Ei = f_xi - float(label_mat[i])
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
j = Tools.select_jrand(i , m)
WT_j = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xj = float(np.dot(WT_j , data_mat[j , :].T)) + b
Ej = f_xj - float(label_mat[j])
alpha_iold = alphas[i].copy()
alpha_jold = alphas[j].copy()
if (label_mat[i] != label_mat[j]):
L = max(0 , alphas[j] - alphas[i])
H = min(C , C + alphas[j] - alphas[i])
else:
L = max(0 , alphas[j] + alphas[i] - C)
H = min(C , alphas[j] + alphas[i])
if H == L:
continue
eta = 2.0 * data_mat[i, :] * data_mat[j, :].T - data_mat[i, :] * data_mat[i, :].T - data_mat[j, :] * data_mat[j, :].T
if eta >= 0: continue
alphas[j] = (alphas[j] - label_mat[j]*(Ei - Ej))/eta
alphas[j] = Tools.clip_alpha(alphas[j], H, L)
if (abs(alphas[j] - alpha_jold) < 0.00001):
continue
alphas[i] = alphas[i] + label_mat[j]*label_mat[i]*(alpha_jold - alphas[j])
b1 = b - Ei + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[i,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[i,:], data_mat[j,:].T)
b2 = b - Ej + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[j,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[j,:], data_mat[j,:].T)
if (0 < alphas[i]) and (C > alphas[i]):
b = b1
elif (0 < alphas[j]) and (C > alphas[j]):
b = b2
else:
b = (b1 + b2)/2.0
print(b)
alpha_pairs_changed += 1
pass
if alpha_pairs_changed == 0:
iter += 1
else:
iter = 0
support_x = []
support_y = []
class1_x = []
class1_y = []
class01_x = []
class01_y = []
for i in range(m):
if alphas[i] > 0.0:
support_x.append(data_mat[i, 0])
support_y.append(data_mat[i, 1])
for i in range(m):
if label_mat[i] == 1:
class1_x.append(data_mat[i, 0])
class1_y.append(data_mat[i, 1])
else:
class01_x.append(data_mat[i, 0])
class01_y.append(data_mat[i, 1])
w_best = np.dot(np.multiply(alphas, label_mat).T, data_mat)
fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(support_x, support_y, s=100, c="y", marker="v", label="support_v")
ax.scatter(class1_x, class1_y, s=30, c="b", marker="o", label="class 1")
ax.scatter(class01_x, class01_y, s=30, c="r", marker="x", label="class -1")
lin_x = np.linspace(0, 100)
lin_y = (-float(b) - w_best[0, 0] * lin_x) / w_best[0, 1]
plt.plot(lin_x, lin_y, color="black")
ax.legend()
ax.set_xlabel("factor1")
ax.set_ylabel("factor2")
plt.show()
return b , alphas
datamat , labelmat = dataSet.load_data_set()
b, alphas = SVM(datamat , labelmat , 0.6 , 0.001 , 10)
print(b , alphas)
首先傳入的後面幾個參數分别是懲罰力度,容忍度。比較重要的應該是這一句:
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random
import Tool
import smo_class
import KernelTransform
def innerL(i ,os):
Ei = Tool.calculateEi(os , i)
if ((os.labels[i]*Ei < -os.toler) and
(os.alphas[i] < os.C)) or ((os.labels[i]*Ei > os.toler) and
(os.alphas[i] > 0)):
j , Ej = Tool.selectj(i , os , Ei)
alphaIold = os.alphas[i].copy()
alphaJold = os.alphas[j].copy()
if (os.labels[i] != os.labels[j]):
L = max(0 , os.alphas[j] - os.alphas[i])
H = min(os.C , os.C + np.array(os.alphas)[j] - np.array(os.alphas)[i])
else:
L = max(0 , os.alphas[j] + os.alphas[i] - os.C)
H = min(os.C , np.array(os.alphas)[j] + np.array(os.alphas)[i])
if L == H:
return 0
eta = 2.0*os.x[i,:]*os.x[j,:].T - os.x[i,:]*os.x[i,:].T - os.x[j,:]*os.x[j,:].T
if eta >= 0:
print('η> 0,the kernel matrix is not semi-positive definite')
return 0
os.alphas[j] -= os.labels[j]*(Ei - Ej)/eta
os.alphas[j] = Tool.rangeSelectionForAlpha(os.alphas[j] , H , L)
Tool.updateEk(os , j)
if (abs(os.alphas[j] - alphaJold) < 0.00001):
print("j not moving enough")
return 0
os.alphas[i] += os.labels[j] * os.labels[i] * (alphaJold - os.alphas[j])
Tool.updateEk(os , i)
b1 = os.b - Ei - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[i, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[i, :] * os.x[j, :].T
b2 = os.b - Ej - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[j, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[j, :] * os.x[j, :].T
if (0 < os.alphas[i]) and (os.C > os.alphas[i]):
os.b = b1
elif (0 < os.alphas[j]) and (os.C > os.alphas[j]):
os.b = b2
else:
os.b = (b1 + b2) / 2.0
return 1
else:
return 0
def smo(data,labels,C = 0.6,toler = 0.001,maxIter = 40 , kernel = True):
oS = smo_class.optStruct(np.mat(data),np.mat(labels).transpose(),C,toler)
iter =0
entireSet = True
alphaPairsChanged = 0
while(iter < maxIter) and ((alphaPairsChanged >0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
if kernel == True:
alphaPairsChanged += KernelTransform.innerL(i,oS)
else:
alphaPairsChanged += innerL(i, oS)
print("fullSet,iter: %d i: %d,pairs changed %d" %\
(iter,i,alphaPairsChanged))
iter +=1
else:
# 兩個元素乘積非零,每兩個元素做乘法[0,1,1,0,0]*[1,1,0,1,0]=[0,1,0,0,0]
nonBoundIs = np.nonzero((oS.alphas.A > 0)*(oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("nou-bound,iter: %d i:%d,pairs changed %d" % (iter,i,alphaPairsChanged))
iter +=1
# entireSet 控制交替的政策選擇
if entireSet:
entireSet = False
# 必須有alpha對進行更新
elif(alphaPairsChanged == 0):
entireSet = True
print("iteration number:%d" % iter)
return oS.b,oS.alphas
entireSet就是交換政策的标志。貌似沒有什麼好說的。
之後就是執行函數這些了:
import Tool
import SMO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import KernelTransform
'''
calculate w and draw the picture,
the variable which the α not equal zero ,
we call support vector
'''
def calculateW(alphas , data , labels):
x = np.mat(data)
label = np.mat(labels).transpose()
m , n = np.shape(x)
w = np.zeros((n , 1))
for i in range(m):
w += np.multiply(alphas[i] * label[i] , x[i , :].T)
return w
pass
if __name__ == '__main__':
data, label = Tool.loadDataSet('../Data/testSet.txt')
b,alphas = SMO.smo(data , label , kernel=False)
w = calculateW(alphas , data , label)
x = np.arange(0 , 11)
print(w)
y = (-b - w[0]*x)/w[1]
Tool.drawDataset(data , label , x , y.tolist()[0] , line=True , alphas=alphas)
data, label = Tool.loadDataSet('../Data/testSetRBF.txt')
b, alphas = SMO.smo(data, label,kernel=True ,maxIter=100)
svInd = np.nonzero(alphas.A > 0)[0]
Tool.drawDataset(data, label, line=False, alphas=alphas)
有一個是kernel function的,先不用管。
支援向量機(Support Vector Machine)支援向量機
圈起來的是支援向量點,好很多了。
⑩算法實作——version 3
kernel function加上,先看看原來的資料:
支援向量機(Support Vector Machine)支援向量機
需要改的其實就是內積就可以了,到處看看哪裡有內積就改改他,修改過後的innel和smo:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import Tool
def kernelTrans(X,A,kTup):
m,n = np.shape(X)
K = np.mat(np.zeros((m,1)))
if kTup[0]=='lin':
K = X*A.T
elif kTup[0] =='rbf':
for j in range(m):
deltRow = X[j,:]-A
K[j] = deltRow*deltRow.T
K = np.exp(K/(-1*kTup[1]**2))
return K
'''
update the innel function
'''
def innerL(i ,os):
Ei = calculateEi(os , i)
if ((os.labels[i]*Ei < -os.toler) and
(os.alphas[i] < os.C)) or ((os.labels[i]*Ei > os.toler) and
(os.alphas[i] > 0)):
j , Ej = Tool.selectj(i , os , Ei)
alphaIold = os.alphas[i].copy()
alphaJold = os.alphas[j].copy()
if (os.labels[i] != os.labels[j]):
L = max(0 , os.alphas[j] - os.alphas[i])
H = min(os.C , os.C + np.array(os.alphas)[j] - np.array(os.alphas)[i])
else:
L = max(0 , os.alphas[j] + os.alphas[i] - os.C)
H = min(os.C , np.array(os.alphas)[j] + np.array(os.alphas)[i])
if L == H:
return 0
eta = 2.0 * os.K[i, j] - os.K[i, i] - os.K[j, j]
if eta >= 0:
print('η> 0,the kernel matrix is not semi-positive definite')
return 0
os.alphas[j] -= os.labels[j]*(Ei - Ej)/eta
os.alphas[j] = Tool.rangeSelectionForAlpha(os.alphas[j] , H , L)
updateEk(os , j)
if (abs(os.alphas[j] - alphaJold) < 0.00001):
print("j not moving enough")
return 0
os.alphas[i] += os.labels[j] * os.labels[i] * (alphaJold - os.alphas[j])
updateEk(os , i)
b1 = os.b - Ei - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.K[i , i] - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.K[i , j]
b2 = os.b - Ej - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.K[i , j] - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.K[j , j]
if (0 < os.alphas[i]) and (os.C > os.alphas[i]):
os.b = b1
elif (0 < os.alphas[j]) and (os.C > os.alphas[j]):
os.b = b2
else:
os.b = (b1 + b2) / 2.0
return 1
else:
return 0
'''
updata the Ei
'''
def calculateEi(os , k):
fxk = float(np.multiply(os.alphas, os.labels).T * os.K[:, k] + os.b)
Ek = fxk - float(os.labels[k])
return Ek
def updateEk(os,k):
Ek = calculateEi(os,k)
os.eCache[k]=[1,Ek]