Diff of /Models/decisiontrees.py [000000] .. [efbc2d]

Switch to unified view

a b/Models/decisiontrees.py
1
# Importing the libraries
2
import numpy as np
3
import matplotlib.pyplot as plt
4
import pandas as pd
5
from sklearn.model_selection import GridSearchCV
6
from sklearn.tree import DecisionTreeClassifier
7
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
8
9
# Importing the dataset
10
dataset = pd.read_csv('../Dataset/diabetes.csv')
11
X = dataset.iloc[:, :-1].values
12
y = dataset.iloc[:, 8].values
13
14
# Splitting the dataset into the Training set and Test set
15
from sklearn.model_selection import train_test_split
16
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)
17
18
# Feature Scaling
19
from sklearn.preprocessing import StandardScaler
20
sc = StandardScaler()
21
X_train = sc.fit_transform(X_train)
22
X_test = sc.transform(X_test)
23
24
# Parameter evaluation
25
treeclf = DecisionTreeClassifier(random_state=42)
26
parameters = {'max_depth': [6, 7, 8, 9],
27
              'min_samples_split': [2, 3, 4, 5],
28
              'max_features': [1, 2, 3, 4]
29
}
30
gridsearch=GridSearchCV(treeclf, parameters, cv=100, scoring='roc_auc')
31
gridsearch.fit(X,y)
32
print(gridsearch.best_params_)
33
print(gridsearch.best_score_)
34
35
# Adjusting development threshold
36
tree = DecisionTreeClassifier(max_depth = 6, max_features = 4, 
37
                              min_samples_split = 5, 
38
                              random_state=42)
39
X_train,X_test,y_train,y_test = train_test_split(X, y, random_state=42)
40
tree.fit(X_train, y_train)
41
print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
42
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))
43
44
# Predicting the Test set results
45
y_pred = tree.predict(X_test) 
46
47
# Making the Confusion Matrix
48
from sklearn.metrics import classification_report, confusion_matrix
49
cm = confusion_matrix(y_test, y_pred)
50
51
print('TP - True Negative {}'.format(cm[0,0]))
52
print('FP - False Positive {}'.format(cm[0,1]))
53
print('FN - False Negative {}'.format(cm[1,0]))
54
print('TP - True Positive {}'.format(cm[1,1]))
55
print('Accuracy Rate: {}'.format(np.divide(np.sum([cm[0,0],cm[1,1]]),np.sum(cm))))
56
print('Misclassification Rate: {}'.format(np.divide(np.sum([cm[0,1],cm[1,0]]),np.sum(cm))))
57
58
round(roc_auc_score(y_test,y_pred),5)