a b/scripts/model/testing.py
1
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
2
import warnings
3
import pandas as pd
4
import joblib
5
import os
6
7
warnings.filterwarnings('ignore')
8
9
# Load trained model & holdout test set
10
try:
11
    model = joblib.load(open(os.path.join(os.path.dirname(__file__), '../../models/gradient_boosting.joblib'), 'rb'))
12
    data = pd.read_csv(open(os.path.join(os.path.dirname(__file__), '../../data/input/test.csv'), 'r'))
13
except FileNotFoundError as err:
14
    print(f'Ann error occoured: {err}')
15
16
# Predict holdout test set
17
def _predict(data):
18
    return model.predict(data.drop('LUNG_CANCER', axis='columns'))
19
20
# Get the score metrices
21
def score_metrics(data, metrics):
22
    return metrics(data['LUNG_CANCER'], _predict(data))
23
24
# Get the class matrix
25
def matrix(data, matrix):
26
    classes = matrix(data['LUNG_CANCER'], _predict(data))
27
28
    return f'TP: {classes[1][1]} - TN: {classes[0][0]} - FP: {classes[0][1]} - FN: {classes[1][0]}'
29
30
# Get the auc (TPR/FPR)
31
def auc(data):
32
    probs = model.predict_proba(data.drop('LUNG_CANCER', axis='columns'))[:, 1] 
33
    
34
    return roc_auc_score(data['LUNG_CANCER'], probs)
35
36
# Get the class reports
37
def report(data, class_metric):
38
    return class_metric(data['LUNG_CANCER'], _predict(data))
39
40
if __name__ == '__main__':
41
   print(f'Accuracy: {score_metrics(data, accuracy_score)}')
42
   print(f'Precision: {score_metrics(data, precision_score)}')
43
   print(f'Recall: {score_metrics(data, recall_score)}')
44
   print(f'Measure: {score_metrics(data, f1_score)}')
45
   print(f'\n{matrix(data, confusion_matrix)}')
46
   print(f'\nAUC: {auc(data)}')
47
   print(f'\n{report(data, classification_report)}')