Diff of /Models/svm.py [000000] .. [efbc2d]

Switch to unified view

a b/Models/svm.py
1
# Importing the libraries
2
import pandas as pd
3
import numpy as np
4
from sklearn.model_selection import train_test_split, cross_val_score
5
from sklearn.model_selection import GridSearchCV
6
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
7
from sklearn.svm import SVC
8
import matplotlib.pyplot as plt
9
from sklearn.metrics import classification_report
10
11
# Importing the dataset
12
dataset = pd.read_csv('../Dataset/diabetes.csv')
13
X = dataset.iloc[:, :-1].values
14
y = dataset.iloc[:, 8].values
15
16
# Splitting the dataset into the Training set and Test set
17
from sklearn.model_selection import train_test_split
18
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, 
19
                                                    random_state = 42)
20
21
# Feature Scaling
22
from sklearn.preprocessing import StandardScaler
23
sc = StandardScaler()
24
X_train = sc.fit_transform(X_train)
25
X_test = sc.transform(X_test)
26
27
#svm with grid search
28
svm = SVC(random_state = 42)
29
parameters = {'kernel':('linear', 'rbf'), 'C':(1,0.25,0.5,0.75),
30
              'gamma': (1,2,3,'auto'),'decision_function_shape':('ovo','ovr'),
31
              'shrinking':(True,False)}
32
33
scores = ['precision', 'recall']
34
35
for score in scores:
36
    print("# Tuning hyper-parameters for %s" % score)
37
    print()
38
39
    svm = GridSearchCV(SVC(), parameters, cv=5,
40
                       scoring='%s_macro' % score)
41
    svm.fit(X_train, y_train)
42
43
    print("Best parameters set found on development set:")
44
    print()
45
    print(svm.best_params_)
46
    print()
47
    print("Grid scores on development set:")
48
    print()
49
    means = svm.cv_results_['mean_test_score']
50
    stds = svm.cv_results_['std_test_score']
51
    for mean, std, params in zip(means, stds, svm.cv_results_['params']):
52
        print("%0.3f (+/-%0.03f) for %r"
53
              % (mean, std * 2, params))
54
    print()
55
56
    print("Detailed classification report:")
57
    print()
58
    print("The model is trained on the full development set.")
59
    print("The scores are computed on the full evaluation set.")
60
    print()
61
    y_true, y_pred = y_test, svm.predict(X_test)
62
    print(classification_report(y_true, y_pred))
63
    print()
64
65
svm_model = SVC(kernel='rbf', C=100, gamma = 0.0001, random_state=42)
66
svm_model.fit(X_train, y_train)
67
spred = svm_model.predict(X_test)
68
print ('Accuracy with SVM {0}'.format(accuracy_score(spred, y_test) * 100))
69
70
71
# Making the Confusion Matrix
72
from sklearn.metrics import classification_report, confusion_matrix
73
cm = confusion_matrix(y_test, y_pred)
74
75
print('TP - True Negative {}'.format(cm[0,0]))
76
print('FP - False Positive {}'.format(cm[0,1]))
77
print('FN - False Negative {}'.format(cm[1,0]))
78
print('TP - True Positive {}'.format(cm[1,1]))
79
print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm))))
80
print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm))))
81
82
round(roc_auc_score(y_test,y_pred),5)