前言
图像分类是计算机视觉里面最基本的任务。其中手写数字识别几乎等同于cv的helloWorld,本篇博客会把这个demo实现一下。实现方式有两种,第一种就是使用最基本的神经网络,第二种是使用leNet。这里就不再从零开始,直接使用封装好的库函数:tensorflow+keras。
环境
- tensorflow:2.31
- keras:2.31
- numpy:1.20.1
NN
代码
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models,layers,regularizers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
# 加载数据集
(train_image,train_label),(test_images,test_labels)=mnist.load_data()
origin_test_data=test_images[:5]
# 数据规则
train_image=train_image.reshape((60000,28*28)).astype('float')
test_images=test_images.reshape((10000,28*28)).astype('float')
# oneHot编码
train_label=to_categorical(train_label)
test_labels=to_categorical(test_labels)
# 定义节点
hider1=100
hider2=32
# 建立神经网络
network=models.Sequential()
network.add(layers.Dense(units=hider1, activation='relu', input_shape=(28*28, ),kernel_regularizer=regularizers.l1(0.0001)))
# 避免过拟合 添加dropOut
network.add(layers.Dropout(0.001))
network.add(layers.Dense(units=hider2,activation="relu",kernel_regularizer=regularizers.l1(0.0001)))
network.add(layers.Dropout(0.002))
network.add(layers.Dense(units=10, activation='softmax'))
# 编译接口
network.compile(optimizer=RMSprop(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])
# 训练网络,用fit函数, epochs表示训练多少个回合, batch_size表示每次训练给多大的数据
network.fit(train_image, train_label, epochs=20, batch_size=128, verbose=2)
# 来在测试集上测试一下模型的性能
x_image=test_images[:5]
y_pre = network.predict(test_images[:5])
y_test=test_labels[:5]
for i in range(0,len(y_pre)):
aMax=-1.0
aIndex=0
for j in range(0,len(y_pre[i])):
if(y_pre[i][j]>aMax):
aMax=y_pre[i][j]
aIndex = j
print("预测值"+str(aIndex))
plt.imshow(origin_test_data[i])
plt.show()
# print(y_pre, test_labels[:5])
# test_loss, test_accuracy = network.evaluate(test_images, test_labels)
# print("test_loss:", test_loss, " test_accuracy:", test_accuracy)
实现效果
预测值:
精确度:97%。
LeNet
代码
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.datasets import mnist
# 加载数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 搭建LeNet网络
def LeNet():
network = models.Sequential()
network.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
network.add(layers.AveragePooling2D((2, 2)))
network.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
network.add(layers.AveragePooling2D((2, 2)))
network.add(layers.Conv2D(filters=120, kernel_size=(3, 3), activation='relu'))
network.add(layers.Flatten())
network.add(layers.Dense(84, activation='relu'))
network.add(layers.Dense(10, activation='softmax'))
return network
network = LeNet()
network.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# 训练网络,用fit函数, epochs表示训练多少个回合, batch_size表示每次训练给多大的数据
network.fit(train_images, train_labels, epochs=10, batch_size=128, verbose=2)
test_loss, test_accuracy = network.evaluate(test_images, test_labels)
print("test_loss:", test_loss, " test_accuracy:", test_accuracy)