--- a +++ b/scripts/model/testing.py @@ -0,0 +1,47 @@ +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report +import warnings +import pandas as pd +import joblib +import os + +warnings.filterwarnings('ignore') + +# Load trained model & holdout test set +try: + model = joblib.load(open(os.path.join(os.path.dirname(__file__), '../../models/gradient_boosting.joblib'), 'rb')) + data = pd.read_csv(open(os.path.join(os.path.dirname(__file__), '../../data/input/test.csv'), 'r')) +except FileNotFoundError as err: + print(f'Ann error occoured: {err}') + +# Predict holdout test set +def _predict(data): + return model.predict(data.drop('LUNG_CANCER', axis='columns')) + +# Get the score metrices +def score_metrics(data, metrics): + return metrics(data['LUNG_CANCER'], _predict(data)) + +# Get the class matrix +def matrix(data, matrix): + classes = matrix(data['LUNG_CANCER'], _predict(data)) + + return f'TP: {classes[1][1]} - TN: {classes[0][0]} - FP: {classes[0][1]} - FN: {classes[1][0]}' + +# Get the auc (TPR/FPR) +def auc(data): + probs = model.predict_proba(data.drop('LUNG_CANCER', axis='columns'))[:, 1] + + return roc_auc_score(data['LUNG_CANCER'], probs) + +# Get the class reports +def report(data, class_metric): + return class_metric(data['LUNG_CANCER'], _predict(data)) + +if __name__ == '__main__': + print(f'Accuracy: {score_metrics(data, accuracy_score)}') + print(f'Precision: {score_metrics(data, precision_score)}') + print(f'Recall: {score_metrics(data, recall_score)}') + print(f'Measure: {score_metrics(data, f1_score)}') + print(f'\n{matrix(data, confusion_matrix)}') + print(f'\nAUC: {auc(data)}') + print(f'\n{report(data, classification_report)}') \ No newline at end of file