--- a +++ b/Classifier/Classes/utils.py @@ -0,0 +1,161 @@ +import csv +import os +import numpy as np +import pandas as pd +from sklearn.metrics import roc_auc_score, accuracy_score +import torch +from sklearn.metrics import confusion_matrix +from sklearn import metrics +import seaborn as sns +import matplotlib.pyplot as plt + + + + +def image_to_csv(file_path, save_path, mode=""): + + """ + Create CSV from Acute Lymphoblastic Leukemia dataset + Args: + file_path: Acute Lymphoblastic Leukemia data path + save_path: path to save the created CSV + mode: + """ + + columns = ['data', 'label'] + csv_data = "" + imagefiles = list() # create a list to store image names + if mode == "train": + csv_data = "train.csv" + if mode == "test": + csv_data = "test.csv" + + with open(os.path.join(save_path, csv_data), 'w', newline='') as csvfile: + for root, dirs, files in os.walk(file_path): #scan through the file path + for file in files: # loop through all files + if '.tif' or '.jpg' or '.png' in file: #chech if there are files with *.tif, *.jpg or *.png + imagefiles.append(os.path.splitext(file)[0])#retrieve file names and add to imagefiles list + + writer = csv.writer(csvfile, dialect='excel') # Create a writer from csv module + writer.writerow(columns)#write down the columns + for image in imagefiles:# loop through all image names in the imagefiles list + label = os.path.basename(image) + if "_0" in label: + label = 0 + elif "_1" in label: + label = 1 + + writer.writerow([image, label]) + print("done") + + + + + + +def print_accuracy_and_classification_report(labels, prediction): + """Print model accuracy and classification report. + + Args: + labels (numpy.array): Truth label + prediction (numpy.array): Model predictions + """ + print('Cross validation accuracy:') + print('\t', metrics.accuracy_score(labels, prediction)) + print('\nCross validation classification report\n') + print(metrics.classification_report(labels, prediction)) + + + +def f1_loss(y_true: torch.Tensor, y_pred: torch.Tensor, is_training=False) -> torch.Tensor: + '''Calculate F1 score. Can work with gpu tensors + + The original implmentation is written by Michal Haltuf on Kaggle. + + Returns + ------- + torch.Tensor + `ndim` == 1. 0 <= val <= 1 + + Reference + --------- + - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric + - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score + - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6 + + ''' + assert y_true.ndim == 1 + assert y_pred.ndim == 1 or y_pred.ndim == 2 + + if y_pred.ndim == 2: + y_pred = y_pred.argmax(dim=1) + + tp = (y_true * y_pred).sum().to(torch.float32) + tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32) + fp = ((1 - y_true) * y_pred).sum().to(torch.float32) + fn = (y_true * (1 - y_pred)).sum().to(torch.float32) + + epsilon = 1e-7 + + precision = tp / (tp + fp + epsilon) + recall = tp / (tp + fn + epsilon) + + f1 = 2 * (precision * recall) / (precision + recall + epsilon) + f1.requires_grad = is_training + return f1 + + +def sigmoid(x): + return 1.0 / (1.0 + np.exp(-x)) + + +def accuracy_mini_batch(predicted, true, i, acc, tpr, tnr): + predicted = predicted.cpu() + true = true.cpu() + + predicted = (sigmoid(predicted.data.numpy()) > 0.5) + true = true.data.numpy() + + accuracy = np.sum(predicted == true) / true.shape[0] + true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1) + true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0) + acc = acc * (i) / (i + 1) + accuracy / (i + 1) + tpr = tpr * (i) / (i + 1) + true_positive_rate / (i + 1) + tnr = tnr * (i) / (i + 1) + true_negative_rate / (i + 1) + + return acc, tpr, tnr + + +def accuracy(predicted, true): + predicted = predicted.cpu() + true = true.cpu() + + predicted = (sigmoid(predicted.data.numpy()) > 0.5) + true = true.data.numpy() + + accuracy = np.sum(predicted == true) / true.shape[0] + true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1) + true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0) + + return accuracy, true_positive_rate, true_negative_rate + + +def model_confusion_matrix(y_true, y_pred, classes=[]): + + cm = confusion_matrix(y_true=y_true, y_pred=y_pred) + df_cm = pd.DataFrame(cm, index=classes, columns=classes) + hmap = sns.heatmap(df_cm, annot=True, fmt='d') + hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right') + hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right') + plt.ylabel('True Label') + plt.xlabel('Predicted label') + plt.show() + + + + + + + + +