a b/Models/gradientboosting.py
1
# Importing the libraries
2
import numpy as np
3
import matplotlib.pyplot as plt
4
import pandas as pd
5
from sklearn.ensemble import GradientBoostingClassifier
6
from sklearn.model_selection import train_test_split, cross_val_score
7
from sklearn.model_selection import GridSearchCV
8
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
9
10
11
# Importing the dataset
12
dataset = pd.read_csv('../Dataset/diabetes.csv')
13
X = dataset.iloc[:, :-1].values
14
y = dataset.iloc[:, 8].values
15
16
# Splitting the dataset into the Training set and Test set
17
from sklearn.model_selection import train_test_split
18
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, 
19
                                                    random_state = 42)
20
21
# Feature Scaling
22
from sklearn.preprocessing import StandardScaler
23
sc = StandardScaler()
24
X_train = sc.fit_transform(X_train)
25
X_test = sc.transform(X_test)
26
27
# Parameter evaluation with GSC validation
28
gbe = GradientBoostingClassifier(random_state=42)
29
parameters={'learning_rate': [0.05, 0.1, 0.5],
30
            'max_features': [0.5, 1],
31
            'max_depth': [3, 4, 5]
32
}
33
gridsearch=GridSearchCV(gbe, parameters, cv=100, scoring='roc_auc')
34
gridsearch.fit(X, y)
35
print(gridsearch.best_params_)
36
print(gridsearch.best_score_)
37
38
# Adjusting development threshold
39
gbi = GradientBoostingClassifier(learning_rate=0.05, max_depth=3,
40
                                 max_features=0.5,
41
                                 random_state=42)
42
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state=42)
43
gbi.fit(X_train, y_train)
44
print("Accuracy on training set: {:.3f}".format(gbi.score(X_train, y_train)))
45
print("Accuracy on test set: {:.3f}".format(gbi.score(X_test, y_test)))
46
47
# Storing the prediction
48
y_pred = gbi.predict_proba(X_test)[:,1]
49
50
# Making the Confusion Matrix
51
from sklearn.metrics import classification_report, confusion_matrix
52
cm = confusion_matrix(y_test, y_pred.round())
53
54
print('TP - True Negative {}'.format(cm[0,0]))
55
print('FP - False Positive {}'.format(cm[0,1]))
56
print('FN - False Negative {}'.format(cm[1,0]))
57
print('TP - True Positive {}'.format(cm[1,1]))
58
print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm))))
59
print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm))))
60
61
62
# Plotting the predictions
63
plt.hist(y_pred,bins=10)
64
plt.xlim(0,1)
65
plt.xlabel("Predicted Proababilities")
66
plt.ylabel("Frequency")
67
68
round(roc_auc_score(y_test,y_pred),5)