Diff of /Models/svm.py [000000] .. [efbc2d]

Switch to side-by-side view

--- a
+++ b/Models/svm.py
@@ -0,0 +1,82 @@
+# Importing the libraries
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
+from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+from sklearn.metrics import classification_report
+
+# Importing the dataset
+dataset = pd.read_csv('../Dataset/diabetes.csv')
+X = dataset.iloc[:, :-1].values
+y = dataset.iloc[:, 8].values
+
+# Splitting the dataset into the Training set and Test set
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, 
+                                                    random_state = 42)
+
+# Feature Scaling
+from sklearn.preprocessing import StandardScaler
+sc = StandardScaler()
+X_train = sc.fit_transform(X_train)
+X_test = sc.transform(X_test)
+
+#svm with grid search
+svm = SVC(random_state = 42)
+parameters = {'kernel':('linear', 'rbf'), 'C':(1,0.25,0.5,0.75),
+              'gamma': (1,2,3,'auto'),'decision_function_shape':('ovo','ovr'),
+              'shrinking':(True,False)}
+
+scores = ['precision', 'recall']
+
+for score in scores:
+    print("# Tuning hyper-parameters for %s" % score)
+    print()
+
+    svm = GridSearchCV(SVC(), parameters, cv=5,
+                       scoring='%s_macro' % score)
+    svm.fit(X_train, y_train)
+
+    print("Best parameters set found on development set:")
+    print()
+    print(svm.best_params_)
+    print()
+    print("Grid scores on development set:")
+    print()
+    means = svm.cv_results_['mean_test_score']
+    stds = svm.cv_results_['std_test_score']
+    for mean, std, params in zip(means, stds, svm.cv_results_['params']):
+        print("%0.3f (+/-%0.03f) for %r"
+              % (mean, std * 2, params))
+    print()
+
+    print("Detailed classification report:")
+    print()
+    print("The model is trained on the full development set.")
+    print("The scores are computed on the full evaluation set.")
+    print()
+    y_true, y_pred = y_test, svm.predict(X_test)
+    print(classification_report(y_true, y_pred))
+    print()
+
+svm_model = SVC(kernel='rbf', C=100, gamma = 0.0001, random_state=42)
+svm_model.fit(X_train, y_train)
+spred = svm_model.predict(X_test)
+print ('Accuracy with SVM {0}'.format(accuracy_score(spred, y_test) * 100))
+
+
+# Making the Confusion Matrix
+from sklearn.metrics import classification_report, confusion_matrix
+cm = confusion_matrix(y_test, y_pred)
+
+print('TP - True Negative {}'.format(cm[0,0]))
+print('FP - False Positive {}'.format(cm[0,1]))
+print('FN - False Negative {}'.format(cm[1,0]))
+print('TP - True Positive {}'.format(cm[1,1]))
+print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm))))
+print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm))))
+
+round(roc_auc_score(y_test,y_pred),5)
\ No newline at end of file