1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, Binarizer
scaler = MinMaxScaler(feature_range=(0,1)) features = scaler.fit_transform(features)
scaler = StandardScaler() features = scaler.fit_transform(features)
scaler = Normalizer() features = scaler.fit_transform(features)
scaler = Binarizer(threshold=3) features = scaler.fit_transform(features)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=0.8, random_state=0)
''' 参数说明: - arrays:样本数组,包含特征向量和标签 - test_size:float-获得多大比重的测试样本, int - 获得多少个测试样本 - train_size: 同test_size - random_state:int - 随机种子(种子固定,实验可复现) - shuffle - 是否在分割之前对数据进行洗牌 '''
|