|
a |
|
b/Models/svm.py |
|
|
1 |
# Importing the libraries |
|
|
2 |
import pandas as pd |
|
|
3 |
import numpy as np |
|
|
4 |
from sklearn.model_selection import train_test_split, cross_val_score |
|
|
5 |
from sklearn.model_selection import GridSearchCV |
|
|
6 |
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve |
|
|
7 |
from sklearn.svm import SVC |
|
|
8 |
import matplotlib.pyplot as plt |
|
|
9 |
from sklearn.metrics import classification_report |
|
|
10 |
|
|
|
11 |
# Importing the dataset |
|
|
12 |
dataset = pd.read_csv('../Dataset/diabetes.csv') |
|
|
13 |
X = dataset.iloc[:, :-1].values |
|
|
14 |
y = dataset.iloc[:, 8].values |
|
|
15 |
|
|
|
16 |
# Splitting the dataset into the Training set and Test set |
|
|
17 |
from sklearn.model_selection import train_test_split |
|
|
18 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, |
|
|
19 |
random_state = 42) |
|
|
20 |
|
|
|
21 |
# Feature Scaling |
|
|
22 |
from sklearn.preprocessing import StandardScaler |
|
|
23 |
sc = StandardScaler() |
|
|
24 |
X_train = sc.fit_transform(X_train) |
|
|
25 |
X_test = sc.transform(X_test) |
|
|
26 |
|
|
|
27 |
#svm with grid search |
|
|
28 |
svm = SVC(random_state = 42) |
|
|
29 |
parameters = {'kernel':('linear', 'rbf'), 'C':(1,0.25,0.5,0.75), |
|
|
30 |
'gamma': (1,2,3,'auto'),'decision_function_shape':('ovo','ovr'), |
|
|
31 |
'shrinking':(True,False)} |
|
|
32 |
|
|
|
33 |
scores = ['precision', 'recall'] |
|
|
34 |
|
|
|
35 |
for score in scores: |
|
|
36 |
print("# Tuning hyper-parameters for %s" % score) |
|
|
37 |
print() |
|
|
38 |
|
|
|
39 |
svm = GridSearchCV(SVC(), parameters, cv=5, |
|
|
40 |
scoring='%s_macro' % score) |
|
|
41 |
svm.fit(X_train, y_train) |
|
|
42 |
|
|
|
43 |
print("Best parameters set found on development set:") |
|
|
44 |
print() |
|
|
45 |
print(svm.best_params_) |
|
|
46 |
print() |
|
|
47 |
print("Grid scores on development set:") |
|
|
48 |
print() |
|
|
49 |
means = svm.cv_results_['mean_test_score'] |
|
|
50 |
stds = svm.cv_results_['std_test_score'] |
|
|
51 |
for mean, std, params in zip(means, stds, svm.cv_results_['params']): |
|
|
52 |
print("%0.3f (+/-%0.03f) for %r" |
|
|
53 |
% (mean, std * 2, params)) |
|
|
54 |
print() |
|
|
55 |
|
|
|
56 |
print("Detailed classification report:") |
|
|
57 |
print() |
|
|
58 |
print("The model is trained on the full development set.") |
|
|
59 |
print("The scores are computed on the full evaluation set.") |
|
|
60 |
print() |
|
|
61 |
y_true, y_pred = y_test, svm.predict(X_test) |
|
|
62 |
print(classification_report(y_true, y_pred)) |
|
|
63 |
print() |
|
|
64 |
|
|
|
65 |
svm_model = SVC(kernel='rbf', C=100, gamma = 0.0001, random_state=42) |
|
|
66 |
svm_model.fit(X_train, y_train) |
|
|
67 |
spred = svm_model.predict(X_test) |
|
|
68 |
print ('Accuracy with SVM {0}'.format(accuracy_score(spred, y_test) * 100)) |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
# Making the Confusion Matrix |
|
|
72 |
from sklearn.metrics import classification_report, confusion_matrix |
|
|
73 |
cm = confusion_matrix(y_test, y_pred) |
|
|
74 |
|
|
|
75 |
print('TP - True Negative {}'.format(cm[0,0])) |
|
|
76 |
print('FP - False Positive {}'.format(cm[0,1])) |
|
|
77 |
print('FN - False Negative {}'.format(cm[1,0])) |
|
|
78 |
print('TP - True Positive {}'.format(cm[1,1])) |
|
|
79 |
print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm)))) |
|
|
80 |
print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm)))) |
|
|
81 |
|
|
|
82 |
round(roc_auc_score(y_test,y_pred),5) |