1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| import tensorflow as tf import numpy as np import pandas as pd
TRAIN_URL='http://download.tensorflow.org/data/iris_training.csv' train_path=tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)
print('Download location', train_path)
class KNN: def __init__(self,k): self.k=k
def fit(self,X,y): self.X=np.asarray(X) self.y=np.asarray(y)
def predict(self,X): X=np.asarray(X) result=[] for x in X: dis=np.sqrt(np.sum((x-self.X)**2,axis=1)) index=dis.argsort() # 对dis中的元素排序,返回排序的元素的索引 index=index[:self.k] count=np.bincount(self.y[index]) # 对y的类别的计数器,以列表的形式表示数字的数量,比如np.bincount([1,2,1,2,3,4])=[0,2,2,1,1],ls=[0,2,2,1,1],ls[0]=0,ls[1]=2... result.append(count.argmax()) return np.asarray(result)
# csv文件的第一行是乱的数据,从第二行开始读,并起列名。 data=pd.read_csv(train_path,skiprows=1,names=['a','b','c','d','label']) print(data,type(data))
# 打乱 data=data.sample(frac=1) print(data)
train_X=data.iloc[:90,:-1] train_y=data.iloc[:90,-1] test_X=data.iloc[90:,:-1] test_y=data.iloc[90:,-1]
print(train_X,train_y)
knn=KNN(k=10) knn.fit(train_X,train_y) result=knn.predict(test_X) print('accuracy',np.sum(result==test_y)/len(result))
|