Epoch:所有样本都参与训练
Batch-Size:训练时用的样本数量
Iteration:迭代 等于Batch-size样本训练一次
Dataset是一个抽象类,不能实例化,只能继承
DataLoader:可以实例化
在WindowS下,和Linux使用的库不同,用spwan代替了fork,会导致如下问题:
改写成
查看N,知道数据集有多少个
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# prepare dataset
"""
数据集不大 直接读加载到内存
无结构数据
"""
class DiabetesDataset(Dataset):
# 魔法函数
# 初始化
def __init__(self, filepath):
xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
# shape(多少行,多少列)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])
def __getitem__(self, index):
# 返回x y 的样本
return self.x_data[index], self.y_data[index]
# 数据集的数据条数
# 适用于数据量不大【图像数据集不适用】
def __len__(self):
return self.len
# 定义数据集对象
dataset = DiabetesDataset('data/diabetes.csv.gz')
# 加载器 shuffle是否要打乱 num_workers多线程来读取数据
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)
# design model using class
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
# construct loss and optimizer
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# training cycle forward, backward, update
if __name__ == '__main__':
for epoch in range(100):
# 从0开始枚举
for i, data in enumerate(train_loader, 0): # train_loader 是先shuffle后mini_batch
# 都是张量
inputs, labels = data
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print(epoch, i, loss.item())
# Backward
optimizer.zero_grad()
loss.backward()
# Update
optimizer.step()