简单介绍
本文主要分为两个内容.
一.使用Logistic回归完成多分类任务,值得注意的是,完成的过程中并未使用SoftMax,所以各类的可能性相加并不为1,可以把它看作使用Logistic完成多个二分类任务.
二.使用多层神经网络完成多分类任务,本文并未自行训练,而是直接使用已经训练好的权重矩阵对网络进行初始化,即fine-tune.
此次多分类任务为手写识别,使用两种方法均可以达到不错的效果,第一种的准确率可以达到94%,第二种达到了97.52%.
源代码以及题目等文件见博客
1.Logistic回归
导入必要的包并对数据预处理
# load package and load data
import torch
import torch.nn as nn
import torch.utils.data as Data
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
# define the root path
root_path = '../machine-learning-ex3/ex3/'
# define the path of dataset
train_path = root_path + 'ex3data1.mat'
file_train_data = loadmat(train_path)
train_features = torch.tensor(file_train_data['X'],dtype=torch.float)
# the values of file_train_data are from 1-10,but we need start from zero
train_labels = torch.tensor(file_train_data['y'], dtype=torch.long) - 1
# one-hot train_labels(the usage of scatter_ is similar to torch.gather.value=1 means using 1 to pad)
train_labels = torch.zeros(train_labels.shape[0],max(train_labels)+1, dtype=torch.float).scatter_(
dim=1,index=train_labels,value=1)
显示部分数据集
此步骤用来显示我们要完成的任务,可以省略
# show the partial training data
img_id = 0
for img_index in np.random.rand(25)*5000:
img_id += 1
img_index = int(img_index)
img = train_features[img_index].view(20,20)
plt.subplot(5,5,img_id)
# set resolution ratio of image to change the figure size
plt.rcParams['figure.dpi']=40
# set the axis unvisible
plt.axis('off')
plt.imshow(img,cmap=plt.cm.gray)
结果如下图:
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsICM38FdsYkRGZkRG9lcvx2bjxiNx8VZ6l2cs0TPR5ENRpWT6VEROBDOsJGcohVYsR2MMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLwEzN5IDOzEjM3ITMxkTMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
定义模型
# define the model
class Logistic(nn.Module):
def __init__(self, input_channels, output_channels):
super(Logistic, self).__init__()
self.linear = nn.Linear(input_channels, output_channels)
self.sigmoid = nn.Sigmoid()
def forward(self, X):
X = self.linear(X)
out = self.sigmoid(X)
return out
# add regular term
def add_regular_item(net, m, lm):
# go through all the modules and find specific modules whose gradient need to be modified
for module in net.modules():
if isinstance(module, nn.Linear):
# the gradient of regular term:λθ/m(lm=λ,module.weight=θ)
module.weight.grad.data.add_(lm/m*module.weight.data)
实现训练方法和评估方法
def train(net, num_epochs, train_iter, loss, optim, lm=0, is_print = True):
for epoch in range(num_epochs):
# m:the number of data in each batch
n_batch,sum_loss, m = 0, 0.0, 0
for X, y in train_iter:
optim.zero_grad()
m = y.shape[0]
y_hat = net(X)
l = loss(y_hat, y.view(-1,y.shape[1])).sum()
l.backward()
add_regular_item(net, m, lm)
optim.step()
sum_loss += l
n_batch += 1
if is_print is True:
if (epoch+1) % 20 == 0:
print("epoch: %d, average loss: %f" % (epoch+1, sum_loss / n_batch))
print("epoch: %d, train accuracy: %0.2f%%\n" % (epoch+1, evaluate(net, train_iter)))
def evaluate(net, test_iter):
sum_accurate, num_data = 0.0, 0
for X, y in test_iter:
y = y.view(-1, y.shape[1])
num_data += y.size()[0]
y_hat = net(X)
# if the values of activation are more than 0.5,then the result will be 1
sum_accurate += (y_hat.argmax(1) == y.argmax(1)).sum()
return 100*sum_accurate.float() / num_data
定义必要的变量并训练网络
# load the data
batch_size = 250
train_data = Data.TensorDataset(train_features, train_labels)
train_iter = Data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
# define the necessary variable
net = Logistic(400, 10)
num_epochs, lr, lm = 2000, 0.5, 0
loss = nn.BCELoss()
optim = torch.optim.SGD(net.parameters(), lr)
train(net, num_epochs, train_iter, loss, optim, lm)
2.多层神经网络
结构
结构大致如上图,其中input layer含有400个神经元(图片大小为20×20),Hidden layer只有一层,含有25个神经元,Output layer含有10个神经元(分类数为10)
代码
# define model
net = NeuralNetwork()
# using fine-tune,the shape of Theta1:25×401,the shape of Theta2:10×26
# the shape of linear1.weight:25×400,the shape of linear2.weight:10×25
weight_path = root_path + 'ex3weights.mat'
weights_data = loadmat(weight_path)
net.linear1.weight.data = torch.tensor(weights_data['Theta1'][:,1:], dtype=torch.float)
net.linear1.bias.data = torch.tensor(weights_data['Theta1'][:,0], dtype=torch.float)
net.linear2.weight.data = torch.tensor(weights_data['Theta2'][:,1:], dtype=torch.float)
net.linear2.bias.data = torch.tensor(weights_data['Theta2'][:,0], dtype=torch.float)
# evaluate the accuracy
print("test accuracy: %0.2f%%\n" % evaluate(net, train_iter))