|
a |
|
b/utils/eval_utils_mtl_concat.py |
|
|
1 |
import numpy as np |
|
|
2 |
|
|
|
3 |
import torch |
|
|
4 |
import torch.nn as nn |
|
|
5 |
import torch.nn.functional as F |
|
|
6 |
from models.model_toad import TOAD_fc_mtl_concat |
|
|
7 |
import pdb |
|
|
8 |
import os |
|
|
9 |
import pandas as pd |
|
|
10 |
from utils.utils import * |
|
|
11 |
from utils.core_utils_mtl_concat import EarlyStopping, Accuracy_Logger |
|
|
12 |
from utils.file_utils import save_pkl, load_pkl |
|
|
13 |
from sklearn.metrics import roc_auc_score, roc_curve, auc |
|
|
14 |
import h5py |
|
|
15 |
from models.resnet_custom import resnet50_baseline |
|
|
16 |
import math |
|
|
17 |
from sklearn.preprocessing import label_binarize |
|
|
18 |
|
|
|
19 |
def initiate_model(args, ckpt_path=None): |
|
|
20 |
print('Init Model') |
|
|
21 |
model_dict = {"dropout": args.drop_out, 'n_classes': args.n_classes} |
|
|
22 |
model = TOAD_fc_mtl_concat(**model_dict) |
|
|
23 |
|
|
|
24 |
model.relocate() |
|
|
25 |
print_network(model) |
|
|
26 |
|
|
|
27 |
if ckpt_path is not None: |
|
|
28 |
ckpt = torch.load(ckpt_path) |
|
|
29 |
model.load_state_dict(ckpt, strict=False) |
|
|
30 |
|
|
|
31 |
model.eval() |
|
|
32 |
return model |
|
|
33 |
|
|
|
34 |
def eval(dataset, args, ckpt_path): |
|
|
35 |
model = initiate_model(args, ckpt_path) |
|
|
36 |
|
|
|
37 |
print('Init Loaders') |
|
|
38 |
loader = get_simple_loader(dataset) |
|
|
39 |
results_dict = summary(model, loader, args) |
|
|
40 |
|
|
|
41 |
print('cls_test_error: ', results_dict['cls_test_error']) |
|
|
42 |
print('cls_auc: ', results_dict['cls_auc']) |
|
|
43 |
print('site_test_error: ', results_dict['site_test_error']) |
|
|
44 |
print('site_auc: ', results_dict['site_auc']) |
|
|
45 |
|
|
|
46 |
return model, results_dict |
|
|
47 |
|
|
|
48 |
# Code taken from pytorch/examples for evaluating topk classification on on ImageNet |
|
|
49 |
def accuracy(output, target, topk=(1,)): |
|
|
50 |
"""Computes the accuracy over the k top predictions for the specified values of k""" |
|
|
51 |
with torch.no_grad(): |
|
|
52 |
maxk = max(topk) |
|
|
53 |
batch_size = target.size(0) |
|
|
54 |
|
|
|
55 |
_, pred = output.topk(maxk, 1, True, True) |
|
|
56 |
pred = pred.t() |
|
|
57 |
correct = pred.eq(target.view(1, -1).expand_as(pred)) |
|
|
58 |
|
|
|
59 |
res = [] |
|
|
60 |
for k in topk: |
|
|
61 |
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) |
|
|
62 |
res.append(correct_k.mul_(1.0 / batch_size)) |
|
|
63 |
return res |
|
|
64 |
|
|
|
65 |
def summary(model, loader, args): |
|
|
66 |
device=torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
67 |
cls_logger = Accuracy_Logger(n_classes=args.n_classes) |
|
|
68 |
site_logger = Accuracy_Logger(n_classes=2) |
|
|
69 |
model.eval() |
|
|
70 |
cls_test_error = 0. |
|
|
71 |
cls_test_loss = 0. |
|
|
72 |
site_test_error = 0. |
|
|
73 |
site_test_loss = 0. |
|
|
74 |
|
|
|
75 |
all_cls_probs = np.zeros((len(loader), args.n_classes)) |
|
|
76 |
all_cls_labels = np.zeros(len(loader)) |
|
|
77 |
all_site_probs = np.zeros((len(loader), 2)) |
|
|
78 |
all_site_labels = np.zeros(len(loader)) |
|
|
79 |
all_sexes = np.zeros(len(loader)) |
|
|
80 |
|
|
|
81 |
slide_ids = loader.dataset.slide_data['slide_id'] |
|
|
82 |
patient_results = {} |
|
|
83 |
|
|
|
84 |
for batch_idx, (data, label, site, sex) in enumerate(loader): |
|
|
85 |
data = data.to(device) |
|
|
86 |
label = label.to(device) |
|
|
87 |
site = site.to(device) |
|
|
88 |
sex = sex.float().to(device) |
|
|
89 |
slide_id = slide_ids.iloc[batch_idx] |
|
|
90 |
with torch.no_grad(): |
|
|
91 |
model_results_dict = model(data, sex) |
|
|
92 |
|
|
|
93 |
logits, Y_prob, Y_hat = model_results_dict['logits'], model_results_dict['Y_prob'], model_results_dict['Y_hat'] |
|
|
94 |
site_logits, site_prob, site_hat = model_results_dict['site_logits'], model_results_dict['site_prob'], model_results_dict['site_hat'] |
|
|
95 |
del model_results_dict |
|
|
96 |
|
|
|
97 |
cls_logger.log(Y_hat, label) |
|
|
98 |
site_logger.log(site_hat, site) |
|
|
99 |
cls_probs = Y_prob.cpu().numpy() |
|
|
100 |
all_cls_probs[batch_idx] = cls_probs |
|
|
101 |
all_cls_labels[batch_idx] = label.item() |
|
|
102 |
|
|
|
103 |
all_sexes[batch_idx] = sex.item() |
|
|
104 |
|
|
|
105 |
site_probs = site_prob.cpu().numpy() |
|
|
106 |
all_site_probs[batch_idx] = site_probs |
|
|
107 |
all_site_labels[batch_idx] = site.item() |
|
|
108 |
|
|
|
109 |
patient_results.update({slide_id: {'slide_id': np.array(slide_id), 'cls_prob': cls_probs, 'cls_label': label.item(), |
|
|
110 |
'site_prob': site_probs, 'site_label': site.item()}}) |
|
|
111 |
cls_error = calculate_error(Y_hat, label) |
|
|
112 |
cls_test_error += cls_error |
|
|
113 |
site_error = calculate_error(site_hat, site) |
|
|
114 |
site_test_error += site_error |
|
|
115 |
|
|
|
116 |
cls_test_error /= len(loader) |
|
|
117 |
site_test_error /= len(loader) |
|
|
118 |
|
|
|
119 |
all_cls_preds = np.argmax(all_cls_probs, axis=1) |
|
|
120 |
all_site_preds = np.argmax(all_site_probs, axis=1) |
|
|
121 |
|
|
|
122 |
if args.n_classes > 2: |
|
|
123 |
if args.n_classes > 5: |
|
|
124 |
topk = (1,3,5) |
|
|
125 |
else: |
|
|
126 |
topk = (1,3) |
|
|
127 |
topk_accs = accuracy(torch.from_numpy(all_cls_probs), torch.from_numpy(all_cls_labels), topk=topk) |
|
|
128 |
for k in range(len(topk)): |
|
|
129 |
print('top{} acc: {:.3f}'.format(topk[k], topk_accs[k].item())) |
|
|
130 |
|
|
|
131 |
if len(np.unique(all_cls_labels)) == 1: |
|
|
132 |
cls_auc = -1 |
|
|
133 |
cls_aucs = [] |
|
|
134 |
else: |
|
|
135 |
if args.n_classes == 2: |
|
|
136 |
cls_auc = roc_auc_score(all_cls_labels, all_cls_probs[:, 1]) |
|
|
137 |
cls_aucs = [] |
|
|
138 |
else: |
|
|
139 |
cls_aucs = [] |
|
|
140 |
binary_labels = label_binarize(all_cls_labels, classes=[i for i in range(args.n_classes)]) |
|
|
141 |
for class_idx in range(args.n_classes): |
|
|
142 |
if class_idx in all_cls_labels: |
|
|
143 |
fpr, tpr, _ = roc_curve(binary_labels[:, class_idx], all_cls_probs[:, class_idx]) |
|
|
144 |
cls_aucs.append(auc(fpr, tpr)) |
|
|
145 |
else: |
|
|
146 |
cls_aucs.append(float('nan')) |
|
|
147 |
if args.micro_average: |
|
|
148 |
binary_labels = label_binarize(all_cls_labels, classes=[i for i in range(args.n_classes)]) |
|
|
149 |
valid_classes = np.where(np.any(binary_labels, axis=0))[0] |
|
|
150 |
binary_labels = binary_labels[:, valid_classes] |
|
|
151 |
valid_cls_probs = all_cls_probs[:, valid_classes] |
|
|
152 |
fpr, tpr, _ = roc_curve(binary_labels.ravel(), valid_cls_probs.ravel()) |
|
|
153 |
cls_auc = auc(fpr, tpr) |
|
|
154 |
else: |
|
|
155 |
cls_auc = np.nanmean(np.array(cls_aucs)) |
|
|
156 |
|
|
|
157 |
if len(np.unique(all_site_labels)) == 1: |
|
|
158 |
site_auc = -1 |
|
|
159 |
else: |
|
|
160 |
site_auc = roc_auc_score(all_site_labels, all_site_probs[:, 1]) |
|
|
161 |
|
|
|
162 |
results_dict = {'slide_id': slide_ids, 'sex': all_sexes, 'Y': all_cls_labels, 'Y_hat': all_cls_preds, |
|
|
163 |
'site': all_site_labels, 'site_hat': all_site_preds} |
|
|
164 |
for c in range(args.n_classes): |
|
|
165 |
results_dict.update({'p_{}'.format(c): all_cls_probs[:,c]}) |
|
|
166 |
|
|
|
167 |
results_dict.update({'site_p': all_site_probs[:,1]}) |
|
|
168 |
|
|
|
169 |
df = pd.DataFrame(results_dict) |
|
|
170 |
inference_results = {'patient_results': patient_results, 'cls_test_error': cls_test_error, |
|
|
171 |
'cls_auc': cls_auc, 'cls_aucs': cls_aucs, |
|
|
172 |
'site_test_error': site_test_error, 'site_auc': site_auc, 'loggers': (cls_logger, site_logger), 'df':df} |
|
|
173 |
|
|
|
174 |
for k in range(len(topk)): |
|
|
175 |
inference_results.update({'top{}_acc'.format(topk[k]): topk_accs[k].item()}) |
|
|
176 |
|
|
|
177 |
return inference_results |