Diff of /benchmark.py [000000] .. [63ed18]

Switch to unified view

a b/benchmark.py
1
# import all necessary libraries
2
import pandas
3
from pandas.tools.plotting import scatter_matrix
4
from sklearn import cross_validation
5
from sklearn.metrics import matthews_corrcoef
6
from sklearn.metrics import classification_report
7
from sklearn.metrics import confusion_matrix
8
from sklearn.metrics import accuracy_score
9
10
# load the dataset (local path)
11
url = "data.csv"
12
# feature names
13
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
14
dataset = pandas.read_csv(url, names = features)
15
16
# store the dataset as an array for easier processing
17
array = dataset.values
18
# X stores feature values
19
X = array[:,0:22]
20
# Y stores "answers", the flower species / class (every row, 4th column)
21
Y = array[:,22]
22
validation_size = 0.3
23
# randomize which part of the data is training and which part is validation
24
seed = 7
25
# split dataset into training set (80%) and validation set (20%)
26
X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(X, Y, test_size = validation_size, random_state = seed)
27
28
# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
29
num_folds = 10
30
num_instances = len(X_train)
31
seed = 7
32
# use the 'accuracy' metric to evaluate models (correct / total)
33
scoring = 'accuracy'
34
35
predictions = []
36
for instance in X_validation:
37
    predictions.append(1)
38
39
print(accuracy_score(Y_validation, predictions)*100)
40
print(matthews_corrcoef(Y_validation, predictions))