Diff of /utils/eval_utils.py [000000] .. [4cd6c8]

Switch to unified view

a b/utils/eval_utils.py
1
import numpy as np
2
3
import torch
4
import torch.nn as nn
5
import torch.nn.functional as F
6
from models.model_mil import MIL_fc, MIL_fc_mc
7
from models.model_clam import CLAM, CLAM_Simple
8
from models.model_attention_mil import MIL_Attention_fc
9
from models.model_histogram import MIL_fc_Histogram
10
import pdb
11
import os
12
import pandas as pd
13
from utils.utils import *
14
from utils.core_utils import EarlyStopping,  Accuracy_Logger
15
from utils.file_utils import save_pkl, load_pkl
16
from sklearn.metrics import roc_auc_score, roc_curve, auc
17
import h5py
18
from models.resnet_custom import resnet50_baseline
19
import math
20
from sklearn.preprocessing import label_binarize
21
22
def initiate_model(args, ckpt_path=None):
23
    print('Init Model')    
24
    model_dict = {"dropout": args.drop_out, 'n_classes': args.n_classes}
25
    
26
    if args.model_size is not None and args.model_type in ['clam', 'attention_mil', 'clam_simple']:
27
        model_dict.update({"size_arg": args.model_size})
28
    
29
    if args.model_type =='clam':
30
        model = CLAM(**model_dict)
31
    elif args.model_type =='clam_simple':
32
        model = CLAM_Simple(**model_dict)
33
    elif args.model_type == 'attention_mil':
34
        model = MIL_Attention_fc(**model_dict) 
35
    elif args.model_type == 'histogram_mil':
36
        model = MIL_fc_Histogram(**model_dict)
37
    else: # args.model_type == 'mil'
38
        if args.n_classes > 2:
39
            model = MIL_fc_mc(**model_dict)
40
        else:
41
            model = MIL_fc(**model_dict)
42
43
    #model.relocate()
44
    print_network(model)
45
46
    if ckpt_path is not None:
47
        ckpt = torch.load(ckpt_path)
48
        ckpt_clean = {}
49
        for key in ckpt.keys():
50
            if 'instance_loss_fn' in key:
51
                continue
52
            ckpt_clean.update({key.replace('.module', ''):ckpt[key]})
53
        model.load_state_dict(ckpt_clean, strict=True)
54
    model.relocate()
55
    model.eval()
56
    return model
57
58
59
#%------------
60
#    if ckpt_path is not None:
61
#        ckpt = torch.load(ckpt_path)
62
#        ckpt_clean = {}
63
#        for key in ckpt.keys():
64
#            if 'instance_loss_fn' in key:
65
#                continue
66
#            ckpt_clean.update({key.replace('.module', ''):ckpt[key]})
67
#        model.load_state_dict(ckpt_clean, strict=True)
68
#    model.relocate()
69
#    model.eval()
70
#    return model
71
#
72
#%----------
73
74
75
def eval(dataset, args, ckpt_path):
76
    model = initiate_model(args, ckpt_path)
77
    
78
    print('Init Loaders')
79
    loader = get_simple_loader(dataset)
80
    patient_results, test_error, auc, aucs, df, _ = summary(model, loader, args)
81
    print('test_error: ', test_error)
82
    print('auc: ', auc)
83
    for cls_idx in range(len(aucs)):
84
        print('class {} auc: {}'.format(cls_idx, aucs[cls_idx]))
85
    return model, patient_results, test_error, auc, aucs, df
86
87
def infer(dataset, args, ckpt_path, class_labels):
88
    model = initiate_model(args, ckpt_path)
89
    df = infer_dataset(model, dataset, args, class_labels)
90
    return model, df
91
92
# Code taken from pytorch/examples for evaluating topk classification on on ImageNet
93
def accuracy(output, target, topk=(1,)):
94
    """Computes the accuracy over the k top predictions for the specified values of k"""
95
    with torch.no_grad():
96
        maxk = max(topk)
97
        batch_size = target.size(0)
98
99
        _, pred = output.topk(maxk, 1, True, True)
100
        pred = pred.t()
101
        correct = pred.eq(target.view(1, -1).expand_as(pred))
102
103
        res = []
104
        for k in topk:
105
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
106
            res.append(correct_k.mul_(1.0 / batch_size))
107
        return res
108
109
def summary(model, loader, args):
110
    acc_logger = Accuracy_Logger(n_classes=args.n_classes)
111
    model.eval()
112
    test_loss = 0.
113
    test_error = 0.
114
115
    all_probs = np.zeros((len(loader), args.n_classes))
116
    all_labels = np.zeros(len(loader))
117
    all_preds = np.zeros(len(loader))
118
119
    if not args.patient_level:
120
        slide_ids = loader.dataset.slide_data['slide_id']
121
        patient_results = {}
122
        for batch_idx, (data, label) in enumerate(loader):
123
            data, label = data.to(device), label.to(device)
124
            slide_id = slide_ids.iloc[batch_idx]
125
            with torch.no_grad():
126
                logits, Y_prob, Y_hat, _, results_dict = model(data)
127
        
128
            acc_logger.log(Y_hat, label)    
129
            probs = Y_prob.cpu().numpy()
130
131
            all_probs[batch_idx] = probs
132
            all_labels[batch_idx] = label.item()
133
            all_preds[batch_idx] = Y_hat.item()
134
        
135
            patient_results.update({slide_id: {'slide_id': np.array(slide_id), 'prob': probs, 'label': label.item()}})
136
        
137
            error = calculate_error(Y_hat, label)
138
            test_error += error
139
    else:
140
        case_ids = loader.dataset.slide_data['case_id']
141
        patient_results = {}
142
        for batch_idx, (data, label) in enumerate(loader):
143
            data, label = data.to(device), label.to(device)
144
            case_id = case_ids.iloc[batch_idx]
145
            with torch.no_grad():
146
                logits, Y_prob, Y_hat, _, results_dict = model(data)
147
148
            acc_logger.log(Y_hat, label)
149
            probs = Y_prob.cpu().numpy()
150
151
            all_probs[batch_idx] = probs
152
            all_labels[batch_idx] = label.item()
153
            all_preds[batch_idx] = Y_hat.item()
154
155
            patient_results.update({case_id: {'case_id': np.array(case_id), 'prob': probs, 'label': label.item()}})
156
157
            error = calculate_error(Y_hat, label)
158
            test_error += error
159
160
161
    del data
162
    test_error /= len(loader)
163
    if args.n_classes > 2:
164
        # pdb.set_trace()
165
        acc1, acc3 = accuracy(torch.from_numpy(all_probs), torch.from_numpy(all_labels), topk=(1, 3))
166
        print('top1 acc: {:.3f}, top3 acc: {:.3f}'.format(acc1.item(), acc3.item()))
167
        
168
    if len(np.unique(all_labels)) == 1:
169
        auc_score = -1
170
        aucs = []
171
    else:
172
        if args.n_classes == 2:
173
            auc_score = roc_auc_score(all_labels, all_probs[:, 1])
174
            aucs = []
175
        else:
176
            aucs = []
177
            binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)])
178
            for class_idx in range(args.n_classes):
179
                if class_idx in all_labels:
180
                    fpr, tpr, _ = roc_curve(binary_labels[:, class_idx], all_probs[:, class_idx])
181
                    aucs.append(auc(fpr, tpr))
182
                else:
183
                    aucs.append(float('nan'))
184
            if args.micro_average:
185
                binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)])
186
                fpr, tpr, _ = roc_curve(binary_labels.ravel(), all_probs.ravel())
187
                auc_score = auc(fpr, tpr)
188
            else:
189
                auc_score = np.nanmean(np.array(aucs))
190
191
    if not args.patient_level:  
192
        results_dict = {'slide_id': slide_ids, 'Y': all_labels, 'Y_hat': all_preds}
193
    else: 
194
        results_dict = {'case_id': case_ids, 'Y': all_labels, 'Y_hat': all_preds}
195
    
196
    for c in range(args.n_classes):
197
        results_dict.update({'p_{}'.format(c): all_probs[:,c]})
198
199
    df = pd.DataFrame(results_dict)
200
201
    if args.patient_level:  
202
        df = df.drop_duplicates(subset=['case_id'])     
203
204
    return patient_results, test_error, auc_score, aucs, df, acc_logger
205
206
def infer_dataset(model, dataset, args, class_labels, k=3):
207
    model.eval()
208
    all_probs = np.zeros((len(dataset), k))
209
    all_preds = np.zeros((len(dataset), k))
210
    all_preds_str = np.full((len(dataset), k), ' ', dtype=object)
211
    slide_ids = dataset.slide_data
212
    for batch_idx, data in enumerate(dataset):
213
        data = data.to(device)
214
        with torch.no_grad():
215
            logits, Y_prob, Y_hat, _, results_dict = model(data)
216
        
217
        probs, ids = torch.topk(Y_prob, k)
218
        probs = probs.cpu().numpy()
219
        ids = ids.cpu().numpy()
220
        all_probs[batch_idx] = probs
221
        all_preds[batch_idx] = ids
222
        all_preds_str[batch_idx] = np.array(class_labels)[ids]
223
    del data
224
    results_dict = {'slide_id': slide_ids}
225
    for c in range(k):
226
        results_dict.update({'Pred_{}'.format(c): all_preds_str[:, c]})
227
        results_dict.update({'p_{}'.format(c): all_probs[:, c]})
228
    df = pd.DataFrame(results_dict)
229
    return df
230
231
# def infer_dataset(model, dataset, args, class_labels, k=3):
232
#     model.eval()
233
234
#     all_probs = np.zeros((len(dataset), args.n_classes))
235
#     all_preds = np.zeros(len(dataset))
236
#     all_str_preds = np.full(len(dataset), ' ', dtype=object)
237
238
#     slide_ids = dataset.slide_data
239
#     for batch_idx, data in enumerate(dataset):
240
#         data = data.to(device)
241
#         with torch.no_grad():
242
#             logits, Y_prob, Y_hat, _, results_dict = model(data)
243
        
244
#         probs = Y_prob.cpu().numpy()
245
#         all_probs[batch_idx] = probs
246
#         all_preds[batch_idx] = Y_hat.item()
247
#         all_str_preds[batch_idx] = class_labels[Y_hat.item()]
248
#     del data
249
250
#     results_dict = {'slide_id': slide_ids, 'Prediction': all_str_preds, 'Y_hat': all_preds}
251
#     for c in range(args.n_classes):
252
#         results_dict.update({'p_{}_{}'.format(c, class_labels[c]): all_probs[:,c]})
253
#     df = pd.DataFrame(results_dict)
254
#     return df
255
256
def compute_features(dataset, args, ckpt_path, save_dir, model=None, feature_dim=512):
257
    if model is None:
258
        model = initiate_model(args, ckpt_path)
259
260
    names = dataset.get_list(np.arange(len(dataset))).values
261
    file_path = os.path.join(save_dir, 'features.h5')
262
263
    initialize_features_hdf5_file(file_path, len(dataset), feature_dim=feature_dim, names=names)
264
    for i in range(len(dataset)):
265
        print("Progress: {}/{}".format(i, len(dataset)))
266
        save_features(dataset, i, model, args, file_path)
267
268
def save_features(dataset, idx, model, args, save_file_path):
269
    name = dataset.get_list(idx)
270
    print(name)
271
    features, label = dataset[idx]
272
    features = features.to(device)
273
    with torch.no_grad():
274
        if type(model) == CLAM:
275
            _, Y_prob, Y_hat, _, results_dict = model(features, instance_eval=False, return_features=True)
276
            bag_feat = results_dict['features'][Y_hat.item()]
277
        else:
278
            _, Y_prob, Y_hat, _, results_dict = model(features, return_features=True)
279
            bag_feat = results_dict['features']
280
    del features
281
    Y_hat = Y_hat.item()
282
    Y_prob = Y_prob.view(-1).cpu().numpy()
283
    bag_feat = bag_feat.view(1, -1).cpu().numpy()
284
285
    with h5py.File(save_file_path, 'r+') as file:
286
        print('label', label)
287
        file['features'][idx, :] = bag_feat
288
        file['label'][idx] = label
289
        file['Y_hat'][idx] = Y_hat
290
        file['Y_prob'][idx] = Y_prob[Y_hat]
291
292
def initialize_features_hdf5_file(file_path, length, feature_dim=512, names = None):
293
    
294
    file = h5py.File(file_path, "w")
295
296
    dset = file.create_dataset('features', 
297
                                shape=(length, feature_dim), chunks=(1, feature_dim), dtype=np.float32)
298
299
    # if names is not None:
300
    #     names = np.array(names, dtype='S')
301
    #     dset.attrs['names'] = names
302
    if names is not None:
303
        dt = h5py.string_dtype()
304
        label_dset = file.create_dataset('names', 
305
                                        shape=(length, ), chunks=(1, ), dtype=dt)
306
        file['names'][:] = names
307
    
308
    label_dset = file.create_dataset('label', 
309
                                        shape=(length, ), chunks=(1, ), dtype=np.int32)
310
311
    pred_dset = file.create_dataset('Y_hat', 
312
                                        shape=(length, ), chunks=(1, ), dtype=np.int32)
313
314
    prob_dset = file.create_dataset('Y_prob', 
315
                                        shape=(length, ), chunks=(1, ), dtype=np.float32)
316
317
    file.close()
318
    return file_path
319