[f757a9]: / Classifier / Classes / utils.py

Download this file

162 lines (109 with data), 5.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import csv
import os
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score
import torch
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt
def image_to_csv(file_path, save_path, mode=""):
"""
Create CSV from Acute Lymphoblastic Leukemia dataset
Args:
file_path: Acute Lymphoblastic Leukemia data path
save_path: path to save the created CSV
mode:
"""
columns = ['data', 'label']
csv_data = ""
imagefiles = list() # create a list to store image names
if mode == "train":
csv_data = "train.csv"
if mode == "test":
csv_data = "test.csv"
with open(os.path.join(save_path, csv_data), 'w', newline='') as csvfile:
for root, dirs, files in os.walk(file_path): #scan through the file path
for file in files: # loop through all files
if '.tif' or '.jpg' or '.png' in file: #chech if there are files with *.tif, *.jpg or *.png
imagefiles.append(os.path.splitext(file)[0])#retrieve file names and add to imagefiles list
writer = csv.writer(csvfile, dialect='excel') # Create a writer from csv module
writer.writerow(columns)#write down the columns
for image in imagefiles:# loop through all image names in the imagefiles list
label = os.path.basename(image)
if "_0" in label:
label = 0
elif "_1" in label:
label = 1
writer.writerow([image, label])
print("done")
def print_accuracy_and_classification_report(labels, prediction):
"""Print model accuracy and classification report.
Args:
labels (numpy.array): Truth label
prediction (numpy.array): Model predictions
"""
print('Cross validation accuracy:')
print('\t', metrics.accuracy_score(labels, prediction))
print('\nCross validation classification report\n')
print(metrics.classification_report(labels, prediction))
def f1_loss(y_true: torch.Tensor, y_pred: torch.Tensor, is_training=False) -> torch.Tensor:
'''Calculate F1 score. Can work with gpu tensors
The original implmentation is written by Michal Haltuf on Kaggle.
Returns
-------
torch.Tensor
`ndim` == 1. 0 <= val <= 1
Reference
---------
- https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
- https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
'''
assert y_true.ndim == 1
assert y_pred.ndim == 1 or y_pred.ndim == 2
if y_pred.ndim == 2:
y_pred = y_pred.argmax(dim=1)
tp = (y_true * y_pred).sum().to(torch.float32)
tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
epsilon = 1e-7
precision = tp / (tp + fp + epsilon)
recall = tp / (tp + fn + epsilon)
f1 = 2 * (precision * recall) / (precision + recall + epsilon)
f1.requires_grad = is_training
return f1
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def accuracy_mini_batch(predicted, true, i, acc, tpr, tnr):
predicted = predicted.cpu()
true = true.cpu()
predicted = (sigmoid(predicted.data.numpy()) > 0.5)
true = true.data.numpy()
accuracy = np.sum(predicted == true) / true.shape[0]
true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
acc = acc * (i) / (i + 1) + accuracy / (i + 1)
tpr = tpr * (i) / (i + 1) + true_positive_rate / (i + 1)
tnr = tnr * (i) / (i + 1) + true_negative_rate / (i + 1)
return acc, tpr, tnr
def accuracy(predicted, true):
predicted = predicted.cpu()
true = true.cpu()
predicted = (sigmoid(predicted.data.numpy()) > 0.5)
true = true.data.numpy()
accuracy = np.sum(predicted == true) / true.shape[0]
true_positive_rate = np.sum((predicted == 1) * (true == 1)) / np.sum(true == 1)
true_negative_rate = np.sum((predicted == 0) * (true == 0)) / np.sum(true == 0)
return accuracy, true_positive_rate, true_negative_rate
def model_confusion_matrix(y_true, y_pred, classes=[]):
cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
df_cm = pd.DataFrame(cm, index=classes, columns=classes)
hmap = sns.heatmap(df_cm, annot=True, fmt='d')
hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
plt.ylabel('True Label')
plt.xlabel('Predicted label')
plt.show()