1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
| import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.model_selection import train_test_split import pandas as pd import numpy as np from sklearn.metrics import accuracy_score, adjusted_rand_score
csv_path = '/Users/bakako/Downloads/archive/Iris.csv' df = pd.read_csv(csv_path)
dataset = df.values
dataset = np.delete(dataset, 0, axis=1)
features = np.delete(dataset, -1, axis=1)
labels = np.delete(dataset, [0, 1, 2, 3], axis=1)
train_features, \ test_features, \ train_labels, \ test_labels = train_test_split(features, labels, test_size=0.2)
X = train_features
model = KMeans(n_clusters=3) model.fit(X)
label_predict = model.labels_
x0 = X[label_predict == 0] x1 = X[label_predict == 1] x2 = X[label_predict == 2]
fig, axes = plt.subplots(1, 4, figsize=(32, 6))
axes[0].scatter(X[:, 0], X[:, 1], c='red', marker='o', label='see') axes[0].set_xlabel('sepal length') axes[0].set_ylabel('sepal width') axes[0].legend(loc=2)
axes[1].scatter(X[:, 2], X[:, 3], c='green', marker='o', label='see') axes[1].set_xlabel('petal length') axes[1].set_ylabel('petal width') axes[1].legend(loc=2)
axes[2].scatter(x0[:, 0], x0[:, 1], c='red', marker='o', label='label0') axes[2].scatter(x1[:, 0], x1[:, 1], c='green', marker='*', label='label1') axes[2].scatter(x2[:, 0], x2[:, 1], c='blue', marker='+', label='label2') axes[2].set_xlabel('sepal length') axes[2].set_ylabel('sepal width') axes[2].legend(loc=2)
axes[3].scatter(x0[:, 2], x0[:, 3], c='red', marker='o', label='label0') axes[3].scatter(x1[:, 2], x1[:, 3], c='green', marker='*', label='label1') axes[3].scatter(x2[:, 2], x2[:, 3], c='blue', marker='+', label='label2') axes[3].set_xlabel('petal length') axes[3].set_ylabel('petal width') axes[3].legend(loc=2)
prediction = model.predict(test_features)
test_labels_num = []
for item in test_labels: if item == 'Iris-setosa': test_labels_num.append(0) elif item == 'Iris-versicolor': test_labels_num.append(1) elif item == 'Iris-virginica': test_labels_num.append(2)
print(accuracy_score(test_labels_num, prediction)) print(adjusted_rand_score(test_labels_num, prediction))
|