簡單介紹
本文主要分為兩個内容.
一.使用Logistic回歸完成多分類任務,值得注意的是,完成的過程中并未使用SoftMax,是以各類的可能性相加并不為1,可以把它看作使用Logistic完成多個二分類任務.
二.使用多層神經網絡完成多分類任務,本文并未自行訓練,而是直接使用已經訓練好的權重矩陣對網絡進行初始化,即fine-tune.
此次多分類任務為手寫識别,使用兩種方法均可以達到不錯的效果,第一種的準确率可以達到94%,第二種達到了97.52%.
源代碼以及題目等檔案見部落格
1.Logistic回歸
導入必要的包并對資料預處理
# load package and load data
import torch
import torch.nn as nn
import torch.utils.data as Data
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
# define the root path
root_path = '../machine-learning-ex3/ex3/'
# define the path of dataset
train_path = root_path + 'ex3data1.mat'
file_train_data = loadmat(train_path)
train_features = torch.tensor(file_train_data['X'],dtype=torch.float)
# the values of file_train_data are from 1-10,but we need start from zero
train_labels = torch.tensor(file_train_data['y'], dtype=torch.long) - 1
# one-hot train_labels(the usage of scatter_ is similar to torch.gather.value=1 means using 1 to pad)
train_labels = torch.zeros(train_labels.shape[0],max(train_labels)+1, dtype=torch.float).scatter_(
dim=1,index=train_labels,value=1)
顯示部分資料集
此步驟用來顯示我們要完成的任務,可以省略
# show the partial training data
img_id = 0
for img_index in np.random.rand(25)*5000:
img_id += 1
img_index = int(img_index)
img = train_features[img_index].view(20,20)
plt.subplot(5,5,img_id)
# set resolution ratio of image to change the figure size
plt.rcParams['figure.dpi']=40
# set the axis unvisible
plt.axis('off')
plt.imshow(img,cmap=plt.cm.gray)
結果如下圖:
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsICM38FdsYkRGZkRG9lcvx2bjxiNx8VZ6l2cs0TPR5ENRpWT6VEROBDOsJGcohVYsR2MMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLwEzN5IDOzEjM3ITMxkTMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
定義模型
# define the model
class Logistic(nn.Module):
def __init__(self, input_channels, output_channels):
super(Logistic, self).__init__()
self.linear = nn.Linear(input_channels, output_channels)
self.sigmoid = nn.Sigmoid()
def forward(self, X):
X = self.linear(X)
out = self.sigmoid(X)
return out
# add regular term
def add_regular_item(net, m, lm):
# go through all the modules and find specific modules whose gradient need to be modified
for module in net.modules():
if isinstance(module, nn.Linear):
# the gradient of regular term:λθ/m(lm=λ,module.weight=θ)
module.weight.grad.data.add_(lm/m*module.weight.data)
實作訓練方法和評估方法
def train(net, num_epochs, train_iter, loss, optim, lm=0, is_print = True):
for epoch in range(num_epochs):
# m:the number of data in each batch
n_batch,sum_loss, m = 0, 0.0, 0
for X, y in train_iter:
optim.zero_grad()
m = y.shape[0]
y_hat = net(X)
l = loss(y_hat, y.view(-1,y.shape[1])).sum()
l.backward()
add_regular_item(net, m, lm)
optim.step()
sum_loss += l
n_batch += 1
if is_print is True:
if (epoch+1) % 20 == 0:
print("epoch: %d, average loss: %f" % (epoch+1, sum_loss / n_batch))
print("epoch: %d, train accuracy: %0.2f%%\n" % (epoch+1, evaluate(net, train_iter)))
def evaluate(net, test_iter):
sum_accurate, num_data = 0.0, 0
for X, y in test_iter:
y = y.view(-1, y.shape[1])
num_data += y.size()[0]
y_hat = net(X)
# if the values of activation are more than 0.5,then the result will be 1
sum_accurate += (y_hat.argmax(1) == y.argmax(1)).sum()
return 100*sum_accurate.float() / num_data
定義必要的變量并訓練網絡
# load the data
batch_size = 250
train_data = Data.TensorDataset(train_features, train_labels)
train_iter = Data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
# define the necessary variable
net = Logistic(400, 10)
num_epochs, lr, lm = 2000, 0.5, 0
loss = nn.BCELoss()
optim = torch.optim.SGD(net.parameters(), lr)
train(net, num_epochs, train_iter, loss, optim, lm)
2.多層神經網絡
結構
結構大緻如上圖,其中input layer含有400個神經元(圖檔大小為20×20),Hidden layer隻有一層,含有25個神經元,Output layer含有10個神經元(分類數為10)
代碼
# define model
net = NeuralNetwork()
# using fine-tune,the shape of Theta1:25×401,the shape of Theta2:10×26
# the shape of linear1.weight:25×400,the shape of linear2.weight:10×25
weight_path = root_path + 'ex3weights.mat'
weights_data = loadmat(weight_path)
net.linear1.weight.data = torch.tensor(weights_data['Theta1'][:,1:], dtype=torch.float)
net.linear1.bias.data = torch.tensor(weights_data['Theta1'][:,0], dtype=torch.float)
net.linear2.weight.data = torch.tensor(weights_data['Theta2'][:,1:], dtype=torch.float)
net.linear2.bias.data = torch.tensor(weights_data['Theta2'][:,0], dtype=torch.float)
# evaluate the accuracy
print("test accuracy: %0.2f%%\n" % evaluate(net, train_iter))