ALLeukemia-Detection / Git / Diff of /Classifier/Classes/utils.py

Models:
WandaB/
ALLeukemia-Detection
Downloads: 1
Diff of /Classifier/Classes/utils.py [000000] .. [f757a9]
Switch to side-by-side view

--- a
+++ b/Classifier/Classes/utils.py
@@ -0,0 +1,161 @@
+import csv
+import os
+import numpy as np
+import pandas as pd
+from sklearn.metrics import roc_auc_score, accuracy_score
+import torch
+from sklearn.metrics import confusion_matrix
+from sklearn import metrics
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+
+
+
+def image_to_csv(file_path, save_path, mode=""):
+
+    """
+    Create CSV from Acute Lymphoblastic Leukemia dataset
+    Args:
+        file_path: Acute Lymphoblastic Leukemia data path
+        save_path: path to save the created CSV
+        mode:
+    """
+
+    columns = ['data', 'label']
+    csv_data = ""
+    imagefiles = list()  # create a list to store image names
+    if mode == "train":
+        csv_data = "train.csv"
+    if mode == "test":
+        csv_data = "test.csv"
+
+    with open(os.path.join(save_path, csv_data), 'w', newline='') as csvfile:
+        for root, dirs, files in os.walk(file_path): #scan through the file path
+            for file in files: # loop through all files
+                if '.tif' or '.jpg' or '.png' in file: #chech if there are files with *.tif, *.jpg or *.png
+                    imagefiles.append(os.path.splitext(file)[0])#retrieve file names and add to imagefiles list
+
+        writer = csv.writer(csvfile, dialect='excel')  # Create a writer from csv module
+        writer.writerow(columns)#write down the columns
+        for image in imagefiles:# loop through all image names in the imagefiles list
+            label = os.path.basename(image)
+            if "_0" in label:
+                label = 0
+            elif "_1" in label:
+                label = 1
+
+            writer.writerow([image, label])
+    print("done")
+
+    
+
+
+
+
+def print_accuracy_and_classification_report(labels, prediction):
+    """Print model accuracy and classification report.
+
+    Args:
+        labels (numpy.array): Truth label
+        prediction (numpy.array): Model predictions
+    """
+    print('Cross validation accuracy:')
+    print('\t', metrics.accuracy_score(labels, prediction))
+    print('\nCross validation classification report\n')
+    print(metrics.classification_report(labels, prediction))
+
+
+
+def f1_loss(y_true: torch.Tensor, y_pred: torch.Tensor, is_training=False) -> torch.Tensor:
+    '''Calculate F1 score. Can work with gpu tensors
+
+    The original implmentation is written by Michal Haltuf on Kaggle.
+
+    Returns
+    -------
+    torch.Tensor
+        `ndim` == 1. 0 <= val <= 1
+
+    Reference
+    ---------
+    - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
+    - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
+    - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
+
+    '''
+    assert y_true.ndim == 1
+    assert y_pred.ndim == 1 or y_pred.ndim == 2
+
+    if y_pred.ndim == 2:
+        y_pred = y_pred.argmax(dim=1)
+
+    tp = (y_true * y_pred).sum().to(torch.float32)
+    tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
+    fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
+    fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
+
+    epsilon = 1e-7
+
+    precision = tp / (tp + fp + epsilon)
+    recall = tp / (tp + fn + epsilon)
+
+    f1 = 2 * (precision * recall) / (precision + recall + epsilon)
+    f1.requires_grad = is_training
+    return f1
+
+
+def sigmoid(x):
+    return 1.0 / (1.0 + np.exp(-x))
+
+
+def accuracy_mini_batch(predicted, true, i, acc, tpr, tnr):
+    predicted = predicted.cpu()
+    true = true.cpu()
+
+    predicted = (sigmoid(predicted.data.numpy()) > 0.5)
+    true = true.data.numpy()
+
+    accuracy = np.sum(predicted == true) / true.shape[0]
+    true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
+    true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
+    acc = acc * (i) / (i + 1) + accuracy / (i + 1)
+    tpr = tpr * (i) / (i + 1) + true_positive_rate / (i + 1)
+    tnr = tnr * (i) / (i + 1) + true_negative_rate / (i + 1)
+
+    return acc, tpr, tnr
+
+
+def accuracy(predicted, true):
+    predicted = predicted.cpu()
+    true = true.cpu()
+
+    predicted = (sigmoid(predicted.data.numpy()) > 0.5)
+    true = true.data.numpy()
+
+    accuracy = np.sum(predicted == true) / true.shape[0]
+    true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
+    true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
+
+    return accuracy, true_positive_rate, true_negative_rate
+
+
+def model_confusion_matrix(y_true, y_pred, classes=[]):
+
+    cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
+    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
+    hmap = sns.heatmap(df_cm, annot=True, fmt='d')
+    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
+    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
+    plt.ylabel('True Label')
+    plt.xlabel('Predicted label')
+    plt.show()
+
+
+
+
+
+
+
+
+