課程來源:人工智能實踐:Tensorflow筆記2
文章目錄
- 前言
- 1、檔案一覽
- 2、将load_data()函數替換掉
- 2、調用generateds函數
- 4、效果
- 總結
前言
本講目标:自制資料集,解決本領域應用
将我們手中的圖檔和标簽資訊制作為可以直接導入的npy檔案。
1、檔案一覽
首先看看我們的檔案長什麼樣:
路徑:D:\python code\AI\class4\MNIST_FC\mnist_image_label\mnist_test_jpg_10000
圖檔檔案:(黑底白字的灰階圖,大小:28x28,每個像素點都是0~255之間的整數)
![](https://img.laitimes.com/img/9ZDMuAjOiMmIsIjOiQnIsIiclRnblN2XjlGcjAzNfRHLGZkRGZkRfJ3bs92YsYTMfVmepNHLyMGVNBTQq5UeRpHW4Z0MMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnL4cTM4EDMxUTM2ATOwAjMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
标簽檔案:(圖檔名和對應的标簽,中間用空格隔開)
2、将load_data()函數替換掉
之前我們導入資料集的方式是(以mnist資料集為例):
fashion = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
導入後變量的資料類型和形狀:
x_train.shape | (60000,28,28) ,3維數組,60000個28行28列的圖檔灰階值 |
---|---|
y_train.shape | (60000,) ,60000張圖檔對應的标簽,是1維數組 |
x_test.shape | (10000,28,28) ,3維數組,10000個28行28列的圖檔灰階值 |
y_test.shape | (10000,) ,10000張圖檔對應的标簽,是1維數組 |
我們需要自己寫個函數generateds(圖檔路徑,标簽檔案):
觀察資料集:
我們需要做的:把圖檔灰階值資料拼接到圖檔清單,把标簽資料拼接到标簽清單。
函數代碼如下:
def generateds(path, txt):
f = open(txt, 'r') #隻讀形式讀取文本資料
contents = f.readlines() # 按行讀取,讀取所有行
f.close() #關閉檔案
x, y_ = [], [] #建立空清單
for content in contents: #逐行讀出
value = content.split() # 以空格分開,存入數組 圖檔名為value0 标簽為value1
img_path = path + value[0] #圖檔路徑+圖檔名->拼接出索引路徑
img = Image.open(img_path) #讀入圖檔
img = np.array(img.convert('L'))
img = img / 255. #歸一化資料
x.append(img) #将歸一化的資料貼到清單x
y_.append(value[1]) #标簽貼到清單y_
print('loading : ' + content) #列印狀态提示
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_
2、調用generateds函數
使用函數代碼:
'''添加了:
訓練集圖檔路徑
訓練集标簽檔案
訓練集輸入特征存儲檔案
訓練集标簽存儲檔案
測試集圖檔路徑
測試集标簽檔案
測試集輸入特征存儲檔案
測試集标簽存儲檔案'''
train_path = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_train_jpg_60000/'
train_txt = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_train_jpg_60000.txt'
x_train_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_x_train.npy'
y_train_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fahion_y_train.npy'
test_path = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_test_jpg_10000/'
test_txt = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_test_jpg_10000.txt'
x_test_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_x_test.npy'
y_test_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_y_test.npy'
#觀察測試集訓練集檔案是否存在,如果存在直接讀取,如果不存在調用generate datasets函數
if os.path.exists(x_train_savepath) and os.path.exists(y_train_savepath) and os.path.exists(
x_test_savepath) and os.path.exists(y_test_savepath):
print('-------------Load Datasets-----------------')
x_train_save = np.load(x_train_savepath)
y_train = np.load(y_train_savepath)
x_test_save = np.load(x_test_savepath)
y_test = np.load(y_test_savepath)
x_train = np.reshape(x_train_save, (len(x_train_save), 28, 28))
x_test = np.reshape(x_test_save, (len(x_test_save), 28, 28))
else:
print('-------------Generate Datasets-----------------')
x_train, y_train = generateds(train_path, train_txt)
x_test, y_test = generateds(test_path, test_txt)
print('-------------Save Datasets-----------------')
x_train_save = np.reshape(x_train, (len(x_train), -1))
x_test_save = np.reshape(x_test, (len(x_test), -1))
np.save(x_train_savepath, x_train_save)
np.save(y_train_savepath, y_train)
np.save(x_test_savepath, x_test_save)
np.save(y_test_savepath, y_test)
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), validation_freq=1)
model.summary()
4、效果
制作完資料集之後開始用神經網絡訓練:
可以發現原本的檔案夾中出現了你所需要的npy檔案。
完整代碼:
import tensorflow as tf
from PIL import Image
import numpy as np
import os
train_path = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_train_jpg_60000/'
train_txt = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_train_jpg_60000.txt'
x_train_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_x_train.npy'
y_train_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fahion_y_train.npy'
test_path = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_test_jpg_10000/'
test_txt = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_test_jpg_10000.txt'
x_test_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_x_test.npy'
y_test_savepath = 'D:/python code/AI/class4/FASHION_FC/fashion_image_label/fashion_y_test.npy'
def generateds(path, txt):
f = open(txt, 'r')
contents = f.readlines() # 按行讀取
f.close()
x, y_ = [], []
for content in contents:
value = content.split() # 以空格分開,存入數組
img_path = path + value[0]
img = Image.open(img_path)
img = np.array(img.convert('L'))
img = img / 255.
x.append(img)
y_.append(value[1])
print('loading : ' + content)
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_
if os.path.exists(x_train_savepath) and os.path.exists(y_train_savepath) and os.path.exists(
x_test_savepath) and os.path.exists(y_test_savepath):
print('-------------Load Datasets-----------------')
x_train_save = np.load(x_train_savepath)
y_train = np.load(y_train_savepath)
x_test_save = np.load(x_test_savepath)
y_test = np.load(y_test_savepath)
x_train = np.reshape(x_train_save, (len(x_train_save), 28, 28))
x_test = np.reshape(x_test_save, (len(x_test_save), 28, 28))
else:
print('-------------Generate Datasets-----------------')
x_train, y_train = generateds(train_path, train_txt)
x_test, y_test = generateds(test_path, test_txt)
print('-------------Save Datasets-----------------')
x_train_save = np.reshape(x_train, (len(x_train), -1))
x_test_save = np.reshape(x_test, (len(x_test), -1))
np.save(x_train_savepath, x_train_save)
np.save(y_train_savepath, y_train)
np.save(x_test_savepath, x_test_save)
np.save(y_test_savepath, y_test)
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), validation_freq=1)
model.summary()
總結
課程連結:MOOC人工智能實踐:TensorFlow筆記2