Diff of /knn.py [000000] .. [63ed18]

Switch to unified view

a b/knn.py
1
# import all necessary libraries
2
import pandas
3
import sklearn
4
from sklearn.model_selection import cross_validate,cross_val_score,train_test_split
5
from sklearn.metrics import matthews_corrcoef
6
from sklearn.metrics import classification_report
7
from sklearn.metrics import confusion_matrix
8
from sklearn.neighbors import KNeighborsClassifier
9
from sklearn.preprocessing import MinMaxScaler
10
from sklearn.metrics import accuracy_score
11
12
# load the dataset (local path)
13
url = "data.csv"
14
# feature names
15
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
16
dataset = pandas.read_csv(url, names = features)
17
18
# store the dataset as an array for easier processing
19
array = dataset.values
20
scaler = MinMaxScaler(feature_range=(0,1))
21
scaled = scaler.fit_transform(array)
22
# X stores feature values
23
X = scaled[:,0:22]
24
# Y stores "answers", the flower species / class (every row, 4th column)
25
Y = scaled[:,22]
26
validation_size = 0.25
27
# randomize which part of the data is training and which part is validation
28
seed = 7
29
# split dataset into training set (80%) and validation set (20%)
30
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size = validation_size, random_state = seed)
31
print(X_train)
32
# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
33
num_folds = 10
34
num_instances = len(X_train)
35
seed = 7
36
# use the 'accuracy' metric to evaluate models (correct / total)
37
scoring = 'accuracy'
38
39
results = []
40
clf = KNeighborsClassifier()
41
kfold = sklearn.model_selection.KFold(n_splits=num_instances,random_state = seed)
42
cv_results = cross_val_score(clf, X_train, Y_train, cv = kfold, scoring = scoring)
43
clf.fit(X_train, Y_train)
44
predictions = clf.predict(X_validation)
45
print("KNN")
46
print(accuracy_score(Y_validation, predictions)*100)
47
print(matthews_corrcoef(Y_validation, predictions))
48
print(classification_report(Y_validation, predictions))