|
a |
|
b/benchmark.py |
|
|
1 |
# import all necessary libraries |
|
|
2 |
import pandas |
|
|
3 |
from pandas.tools.plotting import scatter_matrix |
|
|
4 |
from sklearn import cross_validation |
|
|
5 |
from sklearn.metrics import matthews_corrcoef |
|
|
6 |
from sklearn.metrics import classification_report |
|
|
7 |
from sklearn.metrics import confusion_matrix |
|
|
8 |
from sklearn.metrics import accuracy_score |
|
|
9 |
|
|
|
10 |
# load the dataset (local path) |
|
|
11 |
url = "data.csv" |
|
|
12 |
# feature names |
|
|
13 |
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"] |
|
|
14 |
dataset = pandas.read_csv(url, names = features) |
|
|
15 |
|
|
|
16 |
# store the dataset as an array for easier processing |
|
|
17 |
array = dataset.values |
|
|
18 |
# X stores feature values |
|
|
19 |
X = array[:,0:22] |
|
|
20 |
# Y stores "answers", the flower species / class (every row, 4th column) |
|
|
21 |
Y = array[:,22] |
|
|
22 |
validation_size = 0.3 |
|
|
23 |
# randomize which part of the data is training and which part is validation |
|
|
24 |
seed = 7 |
|
|
25 |
# split dataset into training set (80%) and validation set (20%) |
|
|
26 |
X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(X, Y, test_size = validation_size, random_state = seed) |
|
|
27 |
|
|
|
28 |
# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test) |
|
|
29 |
num_folds = 10 |
|
|
30 |
num_instances = len(X_train) |
|
|
31 |
seed = 7 |
|
|
32 |
# use the 'accuracy' metric to evaluate models (correct / total) |
|
|
33 |
scoring = 'accuracy' |
|
|
34 |
|
|
|
35 |
predictions = [] |
|
|
36 |
for instance in X_validation: |
|
|
37 |
predictions.append(1) |
|
|
38 |
|
|
|
39 |
print(accuracy_score(Y_validation, predictions)*100) |
|
|
40 |
print(matthews_corrcoef(Y_validation, predictions)) |