Diabetes_Prediction / Git / Diff of /Models/logreg.py

Models:
RaymondKing/
Diabetes_Prediction
Downloads: 1
Diff of /Models/logreg.py [000000] .. [efbc2d]
Switch to side-by-side view

--- a
+++ b/Models/logreg.py
@@ -0,0 +1,55 @@
+# Importing the libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
+
+# Importing the dataset
+dataset = pd.read_csv('../Dataset/diabetes.csv')
+X = dataset.iloc[:, :-1].values
+y = dataset.iloc[:, 8].values
+
+# Splitting the dataset into the Training set and Test set
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, 
+                                                    random_state = 42)
+
+# Feature Scaling
+from sklearn.preprocessing import StandardScaler
+sc = StandardScaler()
+X_train = sc.fit_transform(X_train)
+X_test = sc.transform(X_test)
+
+# Parameter evaluation
+logclf = LogisticRegression(random_state=42)
+parameters={'C': [1, 4, 10], 'penalty': ['l1', 'l2']}
+gridsearch=GridSearchCV(logclf, parameters, cv=100, scoring='roc_auc')
+gridsearch.fit(X, y)
+print(gridsearch.best_params_)
+print(gridsearch.best_score_)
+
+# Adjusting development threshold
+logreg_classifier = LogisticRegression(C = 1, penalty = 'l1')
+X_train,X_test,y_train, y_test = train_test_split(X, y, random_state=42)
+logreg_classifier.fit(X_train, y_train)
+print("Training set score: {:.3f}".format(logreg_classifier.score(X_train, y_train)))
+print("Test set score: {:.3f}".format(logreg_classifier.score(X_test, y_test)))
+
+# Predicting the Test set results
+y_pred = logreg_classifier.predict(X_test)
+
+# Making the Confusion Matrix
+from sklearn.metrics import classification_report, confusion_matrix
+cm = confusion_matrix(y_test, y_pred)
+
+print('TP - True Negative {}'.format(cm[0,0]))
+print('FP - False Positive {}'.format(cm[0,1]))
+print('FN - False Negative {}'.format(cm[1,0]))
+print('TP - True Positive {}'.format(cm[1,1]))
+print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm))))
+print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm))))
+
+round(roc_auc_score(y_test,y_pred),5)
\ No newline at end of file