Switch to unified view

a b/Classifier/Classes/utils.py
1
import csv
2
import os
3
import numpy as np
4
import pandas as pd
5
from sklearn.metrics import roc_auc_score, accuracy_score
6
import torch
7
from sklearn.metrics import confusion_matrix
8
from sklearn import metrics
9
import seaborn as sns
10
import matplotlib.pyplot as plt
11
12
13
14
15
def image_to_csv(file_path, save_path, mode=""):
16
17
    """
18
    Create CSV from Acute Lymphoblastic Leukemia dataset
19
    Args:
20
        file_path: Acute Lymphoblastic Leukemia data path
21
        save_path: path to save the created CSV
22
        mode:
23
    """
24
25
    columns = ['data', 'label']
26
    csv_data = ""
27
    imagefiles = list()  # create a list to store image names
28
    if mode == "train":
29
        csv_data = "train.csv"
30
    if mode == "test":
31
        csv_data = "test.csv"
32
33
    with open(os.path.join(save_path, csv_data), 'w', newline='') as csvfile:
34
        for root, dirs, files in os.walk(file_path): #scan through the file path
35
            for file in files: # loop through all files
36
                if '.tif' or '.jpg' or '.png' in file: #chech if there are files with *.tif, *.jpg or *.png
37
                    imagefiles.append(os.path.splitext(file)[0])#retrieve file names and add to imagefiles list
38
39
        writer = csv.writer(csvfile, dialect='excel')  # Create a writer from csv module
40
        writer.writerow(columns)#write down the columns
41
        for image in imagefiles:# loop through all image names in the imagefiles list
42
            label = os.path.basename(image)
43
            if "_0" in label:
44
                label = 0
45
            elif "_1" in label:
46
                label = 1
47
48
            writer.writerow([image, label])
49
    print("done")
50
51
    
52
53
54
55
56
def print_accuracy_and_classification_report(labels, prediction):
57
    """Print model accuracy and classification report.
58
59
    Args:
60
        labels (numpy.array): Truth label
61
        prediction (numpy.array): Model predictions
62
    """
63
    print('Cross validation accuracy:')
64
    print('\t', metrics.accuracy_score(labels, prediction))
65
    print('\nCross validation classification report\n')
66
    print(metrics.classification_report(labels, prediction))
67
68
69
70
def f1_loss(y_true: torch.Tensor, y_pred: torch.Tensor, is_training=False) -> torch.Tensor:
71
    '''Calculate F1 score. Can work with gpu tensors
72
73
    The original implmentation is written by Michal Haltuf on Kaggle.
74
75
    Returns
76
    -------
77
    torch.Tensor
78
        `ndim` == 1. 0 <= val <= 1
79
80
    Reference
81
    ---------
82
    - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
83
    - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
84
    - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
85
86
    '''
87
    assert y_true.ndim == 1
88
    assert y_pred.ndim == 1 or y_pred.ndim == 2
89
90
    if y_pred.ndim == 2:
91
        y_pred = y_pred.argmax(dim=1)
92
93
    tp = (y_true * y_pred).sum().to(torch.float32)
94
    tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
95
    fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
96
    fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
97
98
    epsilon = 1e-7
99
100
    precision = tp / (tp + fp + epsilon)
101
    recall = tp / (tp + fn + epsilon)
102
103
    f1 = 2 * (precision * recall) / (precision + recall + epsilon)
104
    f1.requires_grad = is_training
105
    return f1
106
107
108
def sigmoid(x):
109
    return 1.0 / (1.0 + np.exp(-x))
110
111
112
def accuracy_mini_batch(predicted, true, i, acc, tpr, tnr):
113
    predicted = predicted.cpu()
114
    true = true.cpu()
115
116
    predicted = (sigmoid(predicted.data.numpy()) > 0.5)
117
    true = true.data.numpy()
118
119
    accuracy = np.sum(predicted == true) / true.shape[0]
120
    true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
121
    true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
122
    acc = acc * (i) / (i + 1) + accuracy / (i + 1)
123
    tpr = tpr * (i) / (i + 1) + true_positive_rate / (i + 1)
124
    tnr = tnr * (i) / (i + 1) + true_negative_rate / (i + 1)
125
126
    return acc, tpr, tnr
127
128
129
def accuracy(predicted, true):
130
    predicted = predicted.cpu()
131
    true = true.cpu()
132
133
    predicted = (sigmoid(predicted.data.numpy()) > 0.5)
134
    true = true.data.numpy()
135
136
    accuracy = np.sum(predicted == true) / true.shape[0]
137
    true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
138
    true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
139
140
    return accuracy, true_positive_rate, true_negative_rate
141
142
143
def model_confusion_matrix(y_true, y_pred, classes=[]):
144
145
    cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
146
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
147
    hmap = sns.heatmap(df_cm, annot=True, fmt='d')
148
    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
149
    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
150
    plt.ylabel('True Label')
151
    plt.xlabel('Predicted label')
152
    plt.show()
153
154
155
156
157
158
159
160
161