PyTorch 预测 VO2max

目前通过神经网络训练出来的模型准确率并不高，但收敛速度要比线性回归快上很多。

几个类：

class HumanDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        item_feature = self.features[index]
        item_label = self.labels[index]
        return item_feature, item_label

# ANN 网络
class MLPNet(torch.nn.Module):
    def __init__(self, n_feature, n_hidden1, n_hidden2, n_label):
        super(MLPNet, self).__init__()
        self.hidden1 = torch.nn.Linear(n_feature, n_hidden1)
        self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)
        self.output = torch.nn.Linear(n_hidden2, n_label)

    def forward(self, x):
        x = torch.nn.functional.relu(self.hidden1(x))
        x = torch.nn.functional.relu(self.hidden2(x))
        # 不要分类，直接输出结果
        # x = torch.nn.functional.softmax(self.output(x))
        x = self.output(x)
        return x

# 线性回归模型
class LinearNet(torch.nn.Module):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = torch.nn.Linear(n_feature, 1)
    def forward(self, x):
        x = self.linear(x)
        return x

数据处理：

# 将 .csv 文件转换为训练集和训练集
def get_data():
    tic = time.time()
    print('load .csv')
    cvs_path = '/Users/wonderhoi/Downloads/mipace_mlproject.csv'
    df = pd.read_csv(cvs_path)

    human_features = np.zeros((540, 12), dtype=float)
    human_labels = []

    for item in df.itertuples():
        human_features[[item[0]], :] = [item[1], item[2], 0 if item[3] == 'Male' else 1, item[4], item[5], item[6],
                                        item[7], item[8], item[9], item[10], item[11], item[12]]
        human_labels.append(item[13])

    # 缩放
    # scaler = MinMaxScaler(feature_range=(0, 1))
    # human_features = scaler.fit_transform(human_features)

    # 标准化处理
    # scaler = StandardScaler()
    # human_features = scaler.fit_transform(human_features)

    # 归一化处理
    scaler = Normalizer()
    human_features = scaler.fit_transform(human_features)

    # 标准化处理
    # human_features = (human_features - np.mean(human_features)) / np.std(human_features)

    # mean = torch.mean(X, dim=0)
    # std = torch.std(X, dim=0)
    # X = (X - mean) / std

    human_features = torch.FloatTensor(human_features)
    human_labels = torch.FloatTensor(human_labels)

    # 把数据集按照 8:2 的比例来划分为训练集和测试集
    X_train, x_test, Y_train, y_train = train_test_split(human_features, human_labels, test_size=0.2)

    train = HumanDataset(X_train, Y_train)

    toc = time.time()
    print('Loading Time: ' + str(1000 * (toc - tic)) + 'ms')
    print('')
    return train, x_test, y_train

训练模型：

def train_model():
    tic = time.time()

    model = MLPNet(n_feature=12, n_hidden1=64, n_hidden2=32, n_label=1)   # ANN
    # model = LinearNet(12)   # 线性回归

    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

    # loss_func = torch.nn.CrossEntropyLoss()       # 分类损失函数
    # loss_func = torch.nn.MSELoss()                # 损失函数爆炸
    loss_func = torch.nn.SmoothL1Loss()

    dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

    count_loss = 0			#	绘制损失函数的 x 轴
    count_recall = 0		# 绘制召回率的 x 轴

    for epoch in range(epochs):
        for step, (features, labels) in enumerate(dataloader):
            output = model(features)
            loss = loss_func(output, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print('Epoch: ', epoch + 1, '| Step: ', step + 1, '| Loss: ', loss.item())
            Y_loss.append(loss.item())
            count_loss = count_loss + 1
            X_loss.append(count_loss)

        count_recall = count_recall + 1
        predictions = model(test_features)
        errors = torch.abs(test_labels - predictions.t())       # 转置 predictions size[68, 1]->[1, 68]

        errors = errors / test_labels
        correct_predictions = (errors <= threshold/100).float().sum().item()

        recall = (len(test_labels) - correct_predictions) / len(test_labels)
        Y_recall.append(recall)
        X_recall.append(count_recall)

    toc = time.time()
    print('训练结束')
    print('Train time:' + str(1000 * (toc - tic)) + 'ms')
    print('')

绘制图表：

# 绘制损失函数
def plot_loss():
    fig = plt.figure(figsize=(36, 10))
    ax = fig.add_subplot()
    ax.scatter(X_loss, Y_loss, c='red', marker='1', label='loss')
    ax.set_xlabel('time')
    ax.set_ylabel('loss')
    ax.legend(loc=1)

# 绘制召回率
def plot_recall():
    fig = plt.figure(figsize=(36, 10))
    ax = fig.add_subplot()
    ax.scatter(X_recall, Y_recall, c='blue', marker='1', label='recall')
    ax.set_xlabel('time')
    ax.set_ylabel('recall')
    ax.legend(loc=1)

Main：

import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

if __name__ == '__main__':

    '''
    epochs 迭代次数
    batch_size 每次迭代中子批次数量，目前共 432 训练样本，如果取值为 16，则每次迭代会分 432/16=27 次进行
    threshold 接受误差范围，5=5%
    '''
    epochs = 200
    batch_size = 432
    threshold = 5

    Y_loss = []
    Y_recall = []
    X_loss = []
    X_recall = []
    train_dataset, test_features, test_labels = get_data()
    train_model()

    plot_recall()
    plot_loss()
    plt.show()

VO2max Project > 模型训练

#VO2 #PyTorch

PyTorch 预测 VO2max

https://wonderhoi.com/2024/11/13/PyTorch-预测-VO2max/

作者

wonderhoi

发布于

2024年11月13日

许可协议

SwiftUI 通过 XPath 显示网站信息上一篇

VO2max 特征选择下一篇