PyTorch 预测 VO2max

目前通过神经网络训练出来的模型准确率并不高,但收敛速度要比线性回归快上很多。

几个类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class HumanDataset(Dataset):
def __init__(self, features, labels):
self.features = features
self.labels = labels

def __len__(self):
return len(self.features)

def __getitem__(self, index):
item_feature = self.features[index]
item_label = self.labels[index]
return item_feature, item_label

# ANN 网络
class MLPNet(torch.nn.Module):
def __init__(self, n_feature, n_hidden1, n_hidden2, n_label):
super(MLPNet, self).__init__()
self.hidden1 = torch.nn.Linear(n_feature, n_hidden1)
self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)
self.output = torch.nn.Linear(n_hidden2, n_label)

def forward(self, x):
x = torch.nn.functional.relu(self.hidden1(x))
x = torch.nn.functional.relu(self.hidden2(x))
# 不要分类,直接输出结果
# x = torch.nn.functional.softmax(self.output(x))
x = self.output(x)
return x

# 线性回归模型
class LinearNet(torch.nn.Module):
def __init__(self, n_feature):
super(LinearNet, self).__init__()
self.linear = torch.nn.Linear(n_feature, 1)
def forward(self, x):
x = self.linear(x)
return x

数据处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# 将 .csv 文件转换为训练集和训练集
def get_data():
tic = time.time()
print('load .csv')
cvs_path = '/Users/wonderhoi/Downloads/mipace_mlproject.csv'
df = pd.read_csv(cvs_path)

human_features = np.zeros((540, 12), dtype=float)
human_labels = []

for item in df.itertuples():
human_features[[item[0]], :] = [item[1], item[2], 0 if item[3] == 'Male' else 1, item[4], item[5], item[6],
item[7], item[8], item[9], item[10], item[11], item[12]]
human_labels.append(item[13])

# 缩放
# scaler = MinMaxScaler(feature_range=(0, 1))
# human_features = scaler.fit_transform(human_features)

# 标准化处理
# scaler = StandardScaler()
# human_features = scaler.fit_transform(human_features)

# 归一化处理
scaler = Normalizer()
human_features = scaler.fit_transform(human_features)

# 标准化处理
# human_features = (human_features - np.mean(human_features)) / np.std(human_features)

# mean = torch.mean(X, dim=0)
# std = torch.std(X, dim=0)
# X = (X - mean) / std

human_features = torch.FloatTensor(human_features)
human_labels = torch.FloatTensor(human_labels)

# 把数据集按照 8:2 的比例来划分为训练集和测试集
X_train, x_test, Y_train, y_train = train_test_split(human_features, human_labels, test_size=0.2)

train = HumanDataset(X_train, Y_train)

toc = time.time()
print('Loading Time: ' + str(1000 * (toc - tic)) + 'ms')
print('')
return train, x_test, y_train

训练模型:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def train_model():
tic = time.time()

model = MLPNet(n_feature=12, n_hidden1=64, n_hidden2=32, n_label=1) # ANN
# model = LinearNet(12) # 线性回归

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# loss_func = torch.nn.CrossEntropyLoss() # 分类损失函数
# loss_func = torch.nn.MSELoss() # 损失函数爆炸
loss_func = torch.nn.SmoothL1Loss()

dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

count_loss = 0 # 绘制损失函数的 x 轴
count_recall = 0 # 绘制召回率的 x 轴

for epoch in range(epochs):
for step, (features, labels) in enumerate(dataloader):
output = model(features)
loss = loss_func(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch: ', epoch + 1, '| Step: ', step + 1, '| Loss: ', loss.item())
Y_loss.append(loss.item())
count_loss = count_loss + 1
X_loss.append(count_loss)

count_recall = count_recall + 1
predictions = model(test_features)
errors = torch.abs(test_labels - predictions.t()) # 转置 predictions size[68, 1]->[1, 68]

errors = errors / test_labels
correct_predictions = (errors <= threshold/100).float().sum().item()

recall = (len(test_labels) - correct_predictions) / len(test_labels)
Y_recall.append(recall)
X_recall.append(count_recall)

toc = time.time()
print('训练结束')
print('Train time:' + str(1000 * (toc - tic)) + 'ms')
print('')

绘制图表:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 绘制损失函数
def plot_loss():
fig = plt.figure(figsize=(36, 10))
ax = fig.add_subplot()
ax.scatter(X_loss, Y_loss, c='red', marker='1', label='loss')
ax.set_xlabel('time')
ax.set_ylabel('loss')
ax.legend(loc=1)

# 绘制召回率
def plot_recall():
fig = plt.figure(figsize=(36, 10))
ax = fig.add_subplot()
ax.scatter(X_recall, Y_recall, c='blue', marker='1', label='recall')
ax.set_xlabel('time')
ax.set_ylabel('recall')
ax.legend(loc=1)

Main:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

if __name__ == '__main__':

'''
epochs 迭代次数
batch_size 每次迭代中子批次数量,目前共 432 训练样本,如果取值为 16,则每次迭代会分 432/16=27 次进行
threshold 接受误差范围,5=5%
'''
epochs = 200
batch_size = 432
threshold = 5

Y_loss = []
Y_recall = []
X_loss = []
X_recall = []
train_dataset, test_features, test_labels = get_data()
train_model()

plot_recall()
plot_loss()
plt.show()

PyTorch 预测 VO2max
https://wonderhoi.com/2024/11/13/PyTorch-预测-VO2max/
作者
wonderhoi
发布于
2024年11月13日
许可协议