|
a |
|
b/utils/eval_utils.py |
|
|
1 |
import numpy as np |
|
|
2 |
|
|
|
3 |
import torch |
|
|
4 |
import torch.nn as nn |
|
|
5 |
import torch.nn.functional as F |
|
|
6 |
from models.model_mil import MIL_fc, MIL_fc_mc |
|
|
7 |
from models.model_clam import CLAM_SB, CLAM_MB |
|
|
8 |
import pdb |
|
|
9 |
import os |
|
|
10 |
import pandas as pd |
|
|
11 |
from utils.utils import * |
|
|
12 |
from utils.core_utils import Accuracy_Logger |
|
|
13 |
from sklearn.metrics import roc_auc_score, roc_curve, auc |
|
|
14 |
from sklearn.preprocessing import label_binarize |
|
|
15 |
import matplotlib.pyplot as plt |
|
|
16 |
|
|
|
17 |
def initiate_model(args, ckpt_path): |
|
|
18 |
print('Init Model') |
|
|
19 |
model_dict = {"dropout": args.drop_out, 'n_classes': args.n_classes} |
|
|
20 |
|
|
|
21 |
if args.model_size is not None and args.model_type in ['clam_sb', 'clam_mb']: |
|
|
22 |
model_dict.update({"size_arg": args.model_size}) |
|
|
23 |
|
|
|
24 |
if args.model_type =='clam_sb': |
|
|
25 |
model = CLAM_SB(**model_dict) |
|
|
26 |
elif args.model_type =='clam_mb': |
|
|
27 |
model = CLAM_MB(**model_dict) |
|
|
28 |
else: # args.model_type == 'mil' |
|
|
29 |
if args.n_classes > 2: |
|
|
30 |
model = MIL_fc_mc(**model_dict) |
|
|
31 |
else: |
|
|
32 |
model = MIL_fc(**model_dict) |
|
|
33 |
|
|
|
34 |
print_network(model) |
|
|
35 |
|
|
|
36 |
ckpt = torch.load(ckpt_path) |
|
|
37 |
ckpt_clean = {} |
|
|
38 |
for key in ckpt.keys(): |
|
|
39 |
if 'instance_loss_fn' in key: |
|
|
40 |
continue |
|
|
41 |
ckpt_clean.update({key.replace('.module', ''):ckpt[key]}) |
|
|
42 |
model.load_state_dict(ckpt_clean, strict=True) |
|
|
43 |
|
|
|
44 |
model.relocate() |
|
|
45 |
model.eval() |
|
|
46 |
return model |
|
|
47 |
|
|
|
48 |
def eval(dataset, args, ckpt_path): |
|
|
49 |
model = initiate_model(args, ckpt_path) |
|
|
50 |
|
|
|
51 |
print('Init Loaders') |
|
|
52 |
loader = get_simple_loader(dataset) |
|
|
53 |
patient_results, test_error, auc, df, _ = summary(model, loader, args) |
|
|
54 |
print('test_error: ', test_error) |
|
|
55 |
print('auc: ', auc) |
|
|
56 |
return model, patient_results, test_error, auc, df |
|
|
57 |
|
|
|
58 |
def summary(model, loader, args): |
|
|
59 |
acc_logger = Accuracy_Logger(n_classes=args.n_classes) |
|
|
60 |
model.eval() |
|
|
61 |
test_loss = 0. |
|
|
62 |
test_error = 0. |
|
|
63 |
|
|
|
64 |
all_probs = np.zeros((len(loader), args.n_classes)) |
|
|
65 |
all_labels = np.zeros(len(loader)) |
|
|
66 |
all_preds = np.zeros(len(loader)) |
|
|
67 |
|
|
|
68 |
slide_ids = loader.dataset.slide_data['slide_id'] |
|
|
69 |
patient_results = {} |
|
|
70 |
for batch_idx, (data, label) in enumerate(loader): |
|
|
71 |
data, label = data.to(device), label.to(device) |
|
|
72 |
slide_id = slide_ids.iloc[batch_idx] |
|
|
73 |
with torch.no_grad(): |
|
|
74 |
logits, Y_prob, Y_hat, _, results_dict = model(data) |
|
|
75 |
|
|
|
76 |
acc_logger.log(Y_hat, label) |
|
|
77 |
|
|
|
78 |
probs = Y_prob.cpu().numpy() |
|
|
79 |
|
|
|
80 |
all_probs[batch_idx] = probs |
|
|
81 |
all_labels[batch_idx] = label.item() |
|
|
82 |
all_preds[batch_idx] = Y_hat.item() |
|
|
83 |
|
|
|
84 |
patient_results.update({slide_id: {'slide_id': np.array(slide_id), 'prob': probs, 'label': label.item()}}) |
|
|
85 |
|
|
|
86 |
error = calculate_error(Y_hat, label) |
|
|
87 |
test_error += error |
|
|
88 |
|
|
|
89 |
del data |
|
|
90 |
test_error /= len(loader) |
|
|
91 |
|
|
|
92 |
aucs = [] |
|
|
93 |
if len(np.unique(all_labels)) == 1: |
|
|
94 |
auc_score = -1 |
|
|
95 |
|
|
|
96 |
else: |
|
|
97 |
if args.n_classes == 2: |
|
|
98 |
auc_score = roc_auc_score(all_labels, all_probs[:, 1]) |
|
|
99 |
else: |
|
|
100 |
binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)]) |
|
|
101 |
for class_idx in range(args.n_classes): |
|
|
102 |
if class_idx in all_labels: |
|
|
103 |
fpr, tpr, _ = roc_curve(binary_labels[:, class_idx], all_probs[:, class_idx]) |
|
|
104 |
aucs.append(auc(fpr, tpr)) |
|
|
105 |
else: |
|
|
106 |
aucs.append(float('nan')) |
|
|
107 |
if args.micro_average: |
|
|
108 |
binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)]) |
|
|
109 |
fpr, tpr, _ = roc_curve(binary_labels.ravel(), all_probs.ravel()) |
|
|
110 |
auc_score = auc(fpr, tpr) |
|
|
111 |
else: |
|
|
112 |
auc_score = np.nanmean(np.array(aucs)) |
|
|
113 |
|
|
|
114 |
results_dict = {'slide_id': slide_ids, 'Y': all_labels, 'Y_hat': all_preds} |
|
|
115 |
for c in range(args.n_classes): |
|
|
116 |
results_dict.update({'p_{}'.format(c): all_probs[:,c]}) |
|
|
117 |
df = pd.DataFrame(results_dict) |
|
|
118 |
return patient_results, test_error, auc_score, df, acc_logger |