a b/utils/eval_utils.py
1
import numpy as np
2
3
import torch
4
import torch.nn as nn
5
import torch.nn.functional as F
6
from models.model_mil import MIL_fc, MIL_fc_mc
7
from models.model_clam import CLAM
8
from models.model_attention_mil import MIL_Attention_fc
9
import pdb
10
import os
11
import pandas as pd
12
from utils.utils import *
13
from utils.core_utils import EarlyStopping,  Accuracy_Logger
14
from utils.file_utils import save_pkl, load_pkl
15
from sklearn.metrics import roc_auc_score, roc_curve, auc
16
import h5py
17
from models.resnet_custom import resnet50_baseline
18
import math
19
from sklearn.preprocessing import label_binarize
20
21
def initiate_model(args, ckpt_path=None):
22
    print('Init Model')    
23
    model_dict = {"dropout": args.drop_out, 'n_classes': args.n_classes}
24
    
25
    if args.model_size is not None and args.model_type in ['clam', 'attention_mil', 'clam_new']:
26
        model_dict.update({"size_arg": args.model_size})
27
    
28
    if args.model_type =='clam':
29
        model = CLAM(**model_dict)
30
    elif args.model_type == 'attention_mil':
31
        model = MIL_Attention_fc(**model_dict)    
32
    else: # args.model_type == 'mil'
33
        if args.n_classes > 2:
34
            model = MIL_fc_mc(**model_dict)
35
        else:
36
            model = MIL_fc(**model_dict)
37
38
    model.relocate()
39
    #print_network(model)
40
41
    if ckpt_path is not None:
42
        ckpt = torch.load(ckpt_path)
43
        model.load_state_dict(ckpt, strict=False)
44
45
    model.eval()
46
    return model
47
48
def eval(dataset, args, ckpt_path):
49
    model = initiate_model(args, ckpt_path)
50
    
51
    print('Init Loaders')
52
    loader = get_simple_loader(dataset)
53
    patient_results, test_error, auc, aucs, df, _ = summary(model, loader, args)
54
    print('test_error: ', test_error)
55
    print('auc: ', auc)
56
    for cls_idx in range(len(aucs)):
57
        print('class {} auc: {}'.format(cls_idx, aucs[cls_idx]))
58
    return model, patient_results, test_error, auc, aucs, df
59
60
def infer(dataset, args, ckpt_path, class_labels):
61
    model = initiate_model(args, ckpt_path)
62
    df = infer_dataset(model, dataset, args, class_labels)
63
    return model, df
64
65
# Code taken from pytorch/examples for evaluating topk classification on on ImageNet
66
def accuracy(output, target, topk=(1,)):
67
    """Computes the accuracy over the k top predictions for the specified values of k"""
68
    with torch.no_grad():
69
        maxk = max(topk)
70
        batch_size = target.size(0)
71
72
        _, pred = output.topk(maxk, 1, True, True)
73
        pred = pred.t()
74
        correct = pred.eq(target.view(1, -1).expand_as(pred))
75
76
        res = []
77
        for k in topk:
78
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
79
            res.append(correct_k.mul_(1.0 / batch_size))
80
        return res
81
82
def summary(model, loader, args):
83
    acc_logger = Accuracy_Logger(n_classes=args.n_classes)
84
    model.eval()
85
    test_loss = 0.
86
    test_error = 0.
87
88
    all_probs = np.zeros((len(loader), args.n_classes))
89
    all_labels = np.zeros(len(loader))
90
    all_preds = np.zeros(len(loader))
91
92
    slide_ids = loader.dataset.slide_data['slide_id']
93
    patient_results = {}
94
    for batch_idx, (data, label) in enumerate(loader):
95
        data, label = data.to(device), label.to(device)
96
        slide_id = slide_ids.iloc[batch_idx]
97
        with torch.no_grad():
98
            logits, Y_prob, Y_hat, _, results_dict = model(data)
99
        
100
        acc_logger.log(Y_hat, label)
101
        
102
        probs = Y_prob.cpu().numpy()
103
104
        all_probs[batch_idx] = probs
105
        all_labels[batch_idx] = label.item()
106
        all_preds[batch_idx] = Y_hat.item()
107
        
108
        patient_results.update({slide_id: {'slide_id': np.array(slide_id), 'prob': probs, 'label': label.item()}})
109
        
110
        error = calculate_error(Y_hat, label)
111
        test_error += error
112
113
    del data
114
    test_error /= len(loader)
115
    if args.n_classes > 2:
116
        acc1, acc3 = accuracy(torch.from_numpy(all_probs), torch.from_numpy(all_labels), topk=(1, 3))
117
        print('top1 acc: {:.3f}, top3 acc: {:.3f}'.format(acc1.item(), acc3.item()))
118
        
119
    if len(np.unique(all_labels)) == 1:
120
        auc_score = -1
121
    else:
122
        if args.n_classes == 2:
123
            auc_score = roc_auc_score(all_labels, all_probs[:, 1])
124
            aucs = []
125
        else:
126
            aucs = []
127
            binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)])
128
            for class_idx in range(args.n_classes):
129
                if class_idx in all_labels:
130
                    fpr, tpr, _ = roc_curve(binary_labels[:, class_idx], all_probs[:, class_idx])
131
                    aucs.append(auc(fpr, tpr))
132
                else:
133
                    aucs.append(float('nan'))
134
            if args.micro_average:
135
                binary_labels = label_binarize(all_labels, classes=[i for i in range(args.n_classes)])
136
                fpr, tpr, _ = roc_curve(binary_labels.ravel(), all_probs.ravel())
137
                auc_score = auc(fpr, tpr)
138
            else:
139
                auc_score = np.nanmean(np.array(aucs))
140
141
    results_dict = {'slide_id': slide_ids, 'Y': all_labels, 'Y_hat': all_preds}
142
    for c in range(args.n_classes):
143
        results_dict.update({'p_{}'.format(c): all_probs[:,c]})
144
    df = pd.DataFrame(results_dict)
145
    return patient_results, test_error, auc_score, aucs, df, acc_logger
146
147
def infer_dataset(model, dataset, args, class_labels, k=3):
148
    model.eval()
149
    all_probs = np.zeros((len(dataset), k))
150
    all_preds = np.zeros((len(dataset), k))
151
    all_preds_str = np.full((len(dataset), k), ' ', dtype=object)
152
    slide_ids = dataset.slide_data
153
    for batch_idx, data in enumerate(dataset):
154
        data = data.to(device)
155
        with torch.no_grad():
156
            logits, Y_prob, Y_hat, _, results_dict = model(data)
157
        
158
        probs, ids = torch.topk(Y_prob, k)
159
        probs = probs.cpu().numpy()
160
        ids = ids.cpu().numpy()
161
        all_probs[batch_idx] = probs
162
        all_preds[batch_idx] = ids
163
        all_preds_str[batch_idx] = np.array(class_labels)[ids]
164
    del data
165
    results_dict = {'slide_id': slide_ids}
166
    for c in range(k):
167
        results_dict.update({'Pred_{}'.format(c): all_preds_str[:, c]})
168
        results_dict.update({'p_{}'.format(c): all_probs[:, c]})
169
    df = pd.DataFrame(results_dict)
170
    return df
171
172
# def infer_dataset(model, dataset, args, class_labels, k=3):
173
#     model.eval()
174
175
#     all_probs = np.zeros((len(dataset), args.n_classes))
176
#     all_preds = np.zeros(len(dataset))
177
#     all_str_preds = np.full(len(dataset), ' ', dtype=object)
178
179
#     slide_ids = dataset.slide_data
180
#     for batch_idx, data in enumerate(dataset):
181
#         data = data.to(device)
182
#         with torch.no_grad():
183
#             logits, Y_prob, Y_hat, _, results_dict = model(data)
184
        
185
#         probs = Y_prob.cpu().numpy()
186
#         all_probs[batch_idx] = probs
187
#         all_preds[batch_idx] = Y_hat.item()
188
#         all_str_preds[batch_idx] = class_labels[Y_hat.item()]
189
#     del data
190
191
#     results_dict = {'slide_id': slide_ids, 'Prediction': all_str_preds, 'Y_hat': all_preds}
192
#     for c in range(args.n_classes):
193
#         results_dict.update({'p_{}_{}'.format(c, class_labels[c]): all_probs[:,c]})
194
#     df = pd.DataFrame(results_dict)
195
#     return df
196
197
def compute_features(dataset, args, ckpt_path, save_dir, model=None, feature_dim=512):
198
    if model is None:
199
        model = initiate_model(args, ckpt_path)
200
201
    names = dataset.get_list(np.arange(len(dataset))).values
202
    file_path = os.path.join(save_dir, 'features.h5')
203
204
    initialize_features_hdf5_file(file_path, len(dataset), feature_dim=feature_dim, names=names)
205
    for i in range(len(dataset)):
206
        print("Progress: {}/{}".format(i, len(dataset)))
207
        save_features(dataset, i, model, args, file_path)
208
209
def save_features(dataset, idx, model, args, save_file_path):
210
    name = dataset.get_list(idx)
211
    print(name)
212
    features, label = dataset[idx]
213
    features = features.to(device)
214
    with torch.no_grad():
215
        if type(model) == CLAM:
216
            _, Y_prob, Y_hat, _, results_dict = model(features, instance_eval=False, return_features=True)
217
            bag_feat = results_dict['features'][Y_hat.item()]
218
        else:
219
            _, Y_prob, Y_hat, _, results_dict = model(features, return_features=True)
220
            bag_feat = results_dict['features']
221
    del features
222
    Y_hat = Y_hat.item()
223
    Y_prob = Y_prob.view(-1).cpu().numpy()
224
    bag_feat = bag_feat.view(1, -1).cpu().numpy()
225
226
    with h5py.File(save_file_path, 'r+') as file:
227
        print('label', label)
228
        file['features'][idx, :] = bag_feat
229
        file['label'][idx] = label
230
        file['Y_hat'][idx] = Y_hat
231
        file['Y_prob'][idx] = Y_prob[Y_hat]
232
233
def initialize_features_hdf5_file(file_path, length, feature_dim=512, names = None):
234
    
235
    file = h5py.File(file_path, "w")
236
237
    dset = file.create_dataset('features', 
238
                                shape=(length, feature_dim), chunks=(1, feature_dim), dtype=np.float32)
239
240
    # if names is not None:
241
    #     names = np.array(names, dtype='S')
242
    #     dset.attrs['names'] = names
243
    if names is not None:
244
        dt = h5py.string_dtype()
245
        label_dset = file.create_dataset('names', 
246
                                        shape=(length, ), chunks=(1, ), dtype=dt)
247
    
248
    label_dset = file.create_dataset('label', 
249
                                        shape=(length, ), chunks=(1, ), dtype=np.int32)
250
251
    pred_dset = file.create_dataset('Y_hat', 
252
                                        shape=(length, ), chunks=(1, ), dtype=np.int32)
253
254
    prob_dset = file.create_dataset('Y_prob', 
255
                                        shape=(length, ), chunks=(1, ), dtype=np.float32)
256
257
    file.close()
258
    return file_path
259
260
261
262
def eval2(datasets: tuple, cur: int, args: Namespace):
263
    """   
264
        train for a single fold
265
    """
266
    print('\nTraining Fold {}!'.format(cur))
267
    writer_dir = os.path.join(args.results_dir, str(cur))
268
    if not os.path.isdir(writer_dir):
269
        os.mkdir(writer_dir)
270
271
    if args.log_data:
272
        from tensorboardX import SummaryWriter
273
        writer = SummaryWriter(writer_dir, flush_secs=15)
274
275
    else:
276
        writer = None
277
278
    if args.pretrain_VAE:
279
        print("Initializing VAE")
280
        VAE = GenomicVAE(input_dim=args.omic_input_dim, hidden=[1024, 256, 128])
281
        ckpt = torch.load('./VAE/logs/tcga_base/000-all/%d/%d/%d_best.ckpt' % (cur, cur, cur))
282
        state_dict = ckpt['state_dict']
283
        state_dict = OrderedDict((k[6:], v) for k, v in state_dict.items())
284
        VAE.load_state_dict(state_dict)
285
        args.omic_input_dim = 128
286
        VAE.relocate()
287
        dfs_freeze(VAE)
288
        VAE.eval()
289
    else:
290
        VAE = None
291
292
    print('\nInit train/val/test splits...', end=' ')
293
    train_split, val_split, test_split = datasets
294
    save_splits(datasets, ['train', 'val', 'test'], os.path.join(args.results_dir, 'splits_{}.csv'.format(cur)))
295
    print('Done!')
296
    print("Training on {} samples".format(len(train_split)))
297
    print("Validating on {} samples".format(len(val_split)))
298
    print("Testing on {} samples".format(len(test_split)))
299
300
    print('\nInit loss function...', end=' ')
301
    if args.task_type == 'survival':
302
        if args.bag_loss == 'ce_surv':
303
            loss_fn = CrossEntropySurvLoss(alpha=args.alpha_surv)
304
        elif args.bag_loss == 'nll_surv':
305
            loss_fn = NLLSurvLoss(alpha=args.alpha_surv)
306
        elif args.bag_loss == 'cox_surv':
307
            loss_fn = CoxSurvLoss()
308
        else:
309
            raise NotImplementedError
310
    else:
311
        if args.bag_loss == 'svm':
312
            from topk import SmoothTop1SVM
313
            loss_fn = SmoothTop1SVM(n_classes = args.n_classes)
314
            if device.type == 'cuda':
315
                loss_fn = loss_fn.cuda()
316
        elif args.bag_loss == 'ce':
317
            loss_fn = nn.CrossEntropyLoss()
318
        else:
319
            raise NotImplementedError
320
321
    if args.reg_type == 'omic':
322
        reg_fn = l1_reg_all
323
    elif args.reg_type == 'pathomic':
324
        reg_fn = l1_reg_modules
325
    else:
326
        reg_fn = None
327
328
    print('Done!')
329
    
330
    print('\nInit Model...', end=' ')
331
    model_dict = {"dropout": args.drop_out, 'n_classes': args.n_classes}
332
    if args.model_type in ['clam', 'clam_simple'] and args.subtyping:
333
        model_dict.update({'subtyping': True})
334
    
335
        if args.model_size is not None:
336
            model_dict.update({"size_arg": args.model_size})
337
    
338
    if args.model_type in ['clam', 'clam_simple']:
339
        if args.task_type == 'survival':
340
            raise NotImplementedError
341
        else:
342
            if args.inst_loss == 'svm':
343
                from topk import SmoothTop1SVM
344
                instance_loss_fn = SmoothTop1SVM(n_classes = 2)
345
                if device.type == 'cuda':
346
                    instance_loss_fn = instance_loss_fn.cuda()
347
            else:
348
                instance_loss_fn = nn.CrossEntropyLoss()
349
            
350
            if args.model_type =='clam':
351
                model = CLAM(**model_dict, instance_loss_fn=instance_loss_fn)
352
            else:
353
                model = CLAM_Simple(**model_dict, instance_loss_fn=instance_loss_fn)
354
355
    elif args.model_type =='attention_mil':
356
        if args.task_type == 'survival':
357
            model = MIL_Attention_fc_surv(**model_dict)
358
            # model.alpha.requires_grad = False
359
        else:
360
            model = MIL_Attention_fc(**model_dict)
361
362
    elif args.model_type =='mm_attention_mil':
363
        model_dict.update({'input_dim': args.omic_input_dim, 'meta_dim': args.meta_dim, 
364
            'fusion': args.fusion, 'model_size_wsi':args.model_size_wsi, 'model_size_omic':args.model_size_omic,
365
            'gate_path': args.gate_path, 'gate_omic': args.gate_omic, 'n_classes': args.n_classes, 
366
            'pretrain': args.pretrain, 'tcga_proj': '_'.join(args.task.split('_')[:2]), 'split_idx': cur})
367
368
        if args.task_type == 'survival':
369
            model = MM_MIL_Attention_fc_surv(**model_dict)
370
            # model.alpha.requires_grad = False
371
        else:
372
            model = MM_MIL_Attention_fc(**model_dict)
373
374
    elif args.model_type =='max_net':
375
        model_dict = {'input_dim': args.omic_input_dim, 'meta_dim': args.meta_dim, 'model_size_omic': args.model_size_omic, 'n_classes': args.n_classes}
376
        if args.task_type == 'survival':
377
            model = MaxNet(**model_dict)
378
            # model.alpha.requires_grad = False
379
        else:
380
            raise NotImplementedError
381
382
    else: # args.model_type == 'mil'
383
        if args.task_type == 'survival':
384
            raise NotImplementedError
385
        else:
386
            if args.n_classes > 2:
387
                model = MIL_fc_mc(**model_dict)
388
            else:
389
                model = MIL_fc(**model_dict)
390
391
    model.relocate()
392
    print('Done!')
393
    print_network(model)
394
    ckpt = torch.load(os.path.join(args.results_dir, "s_{}_checkpoint.pt".format(cur)))
395
    model.load_state_dict(ckpt, strict=False)
396
    model.eval()
397
398
    
399
    print('\nInit Loaders...', end=' ')
400
    train_loader = get_split_loader(train_split, training=True, testing = args.testing, 
401
                                    weighted = args.weighted_sample, task_type=args.task_type, batch_size=args.batch_size)
402
    val_loader = get_split_loader(val_split,  testing = args.testing, task_type=args.task_type, batch_size=args.batch_size)
403
    test_loader = get_split_loader(test_split, testing = args.testing, task_type=args.task_type, batch_size=args.batch_size)
404
    print('Done!')
405
406
407
    if args.task_type == 'survival':
408
        results_val_dict, val_c_index = summary_survival(model, val_loader, args.n_classes, VAE)
409
        print('Val c-index: {:.4f}'.format(val_c_index))
410
411
        results_test_dict, test_c_index = summary_survival(model, test_loader, args.n_classes, VAE)
412
        print('Test c-index: {:.4f}'.format(test_c_index))
413
414
        if writer:
415
            writer.add_scalar('final/val_c_index', val_c_index, 0)
416
            writer.add_scalar('final/test_c_index', test_c_index, 0)
417
        
418
        writer.close()
419
        return results_val_dict, results_test_dict, val_c_index, test_c_index
420
421
    elif args.task_type == 'classification':
422
        pass