Diff of /train_ssasnet.py [000000] .. [903821]

Switch to unified view

a b/train_ssasnet.py
1
import os
2
import sys
3
from tqdm import tqdm
4
from tensorboardX import SummaryWriter
5
import argparse
6
import logging
7
import time
8
import random
9
import numpy as np
10
11
import torch
12
import torch.optim as optim
13
from torchvision import transforms
14
import torch.nn.functional as F
15
import torch.backends.cudnn as cudnn
16
import torch.nn as nn
17
from torch.nn import BCEWithLogitsLoss, MSELoss
18
from torch.utils.data import DataLoader
19
20
from networks.vnet_sdf import VNet
21
from networks.discriminator import FC3DDiscriminator
22
23
from utils import ramps, losses
24
from dataloaders.la_heart import *
25
from dataloaders.utils import compute_sdf
26
27
parser = argparse.ArgumentParser()
28
parser.add_argument('--dataset_name', type=str,  default='LA', help='dataset_name')
29
parser.add_argument('--root_path', type=str, default='/data/omnisky/postgraduate/Yb/data_set/LASet/data', help='Name of Experiment')
30
parser.add_argument('--exp', type=str,  default='vnet', help='model_name')
31
parser.add_argument('--model', type=str,  default='SASSNet', help='model_name')
32
parser.add_argument('--max_iterations', type=int,  default=6000, help='maximum epoch number to train')
33
parser.add_argument('--batch_size', type=int, default=4, help='batch_size per gpu')
34
parser.add_argument('--labeled_bs', type=int, default=2, help='labeled_batch_size per gpu')
35
parser.add_argument('--base_lr', type=float,  default=0.01, help='maximum epoch number to train')
36
parser.add_argument('--D_lr', type=float,  default=1e-4, help='maximum discriminator learning rate to train')
37
parser.add_argument('--deterministic', type=int,  default=1, help='whether use deterministic training')
38
parser.add_argument('--labelnum', type=int,  default=25, help='random seed')  # 25有标签,98无标签
39
parser.add_argument('--max_samples', type=int, default=123, help='all samples')
40
parser.add_argument('--seed', type=int,  default=1337, help='random seed')
41
parser.add_argument('--gpu', type=str,  default='2', help='GPU to use')
42
parser.add_argument('--beta', type=float,  default=0.3, help='balance factor to control regional and sdm loss')
43
parser.add_argument('--gamma', type=float,  default=0.5, help='balance factor to control supervised and consistency loss')
44
### costs
45
parser.add_argument('--ema_decay', type=float,  default=0.99, help='ema_decay')
46
parser.add_argument('--consistency', type=float,  default=0.01, help='consistency')
47
parser.add_argument('--consistency_rampup', type=float,  default=40.0, help='consistency_rampup')
48
args = parser.parse_args()
49
50
num_classes = 2
51
patch_size = (112, 112, 80)
52
snapshot_path = "model/{}_{}_{}_labeled/{}".format(args.dataset_name, args.exp, args.labelnum, args.model)
53
54
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
55
batch_size = args.batch_size * len(args.gpu.split(','))
56
max_iterations = args.max_iterations
57
base_lr = args.base_lr
58
labeled_bs = args.labeled_bs
59
60
if not args.deterministic:
61
    cudnn.benchmark = True #
62
    cudnn.deterministic = False #
63
else:
64
    cudnn.benchmark = False  # True #
65
    cudnn.deterministic = True  # False #
66
random.seed(args.seed)
67
np.random.seed(args.seed)
68
torch.manual_seed(args.seed)
69
torch.cuda.manual_seed(args.seed)
70
71
def cal_dice(output, target, eps=1e-3):
72
    output = torch.sigmoid(output)
73
    output = (output>0.5).float()
74
    output = torch.squeeze(output)
75
    inter = torch.sum(output * target) + eps
76
    union = torch.sum(output) + torch.sum(target) + eps * 2
77
    dice = 2 * inter / union
78
    return dice
79
80
def get_current_consistency_weight(epoch):
81
    # Consistency ramp-up from https://arxiv.org/abs/1610.02242
82
    return args.consistency * ramps.sigmoid_rampup(epoch, args.consistency_rampup)
83
84
if __name__ == "__main__":
85
    # make logger file
86
    if not os.path.exists(snapshot_path):
87
        os.makedirs(snapshot_path)
88
89
    logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO,
90
                        format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S')
91
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
92
    logging.info(str(args))
93
94
    model = VNet(n_channels=1, n_classes=num_classes-1, normalization='batchnorm', has_dropout=True).cuda()
95
    D = FC3DDiscriminator(num_classes=num_classes - 1).cuda()
96
97
    db_train = LAHeart(base_dir=args.root_path,
98
                       split='train',
99
                       transform=transforms.Compose([
100
                           RandomRotFlip(),
101
                           RandomCrop(patch_size),
102
                           ToTensor(),
103
                       ]))
104
    db_test = LAHeart(base_dir=args.root_path,
105
                      split='test',
106
                      transform=transforms.Compose([
107
                          CenterCrop(patch_size),
108
                          ToTensor()
109
                      ]))
110
    labeled_idxs = list(range(args.labelnum))
111
    unlabeled_idxs = list(range(args.labelnum, args.max_samples))
112
    batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - labeled_bs)
113
    def worker_init_fn(worker_id):
114
        random.seed(args.seed+worker_id)
115
    trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True,worker_init_fn=worker_init_fn)
116
    test_loader = DataLoader(db_test, batch_size=1,shuffle=False, num_workers=4, pin_memory=True)
117
    
118
    model.train()
119
120
    Dopt = optim.Adam(D.parameters(), lr=args.D_lr, betas=(0.9,0.99))
121
    optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001)
122
    ce_loss = BCEWithLogitsLoss()
123
    mse_loss = MSELoss()
124
125
    writer = SummaryWriter(snapshot_path+'/log')
126
    logging.info("{} itertations per epoch".format(len(trainloader)))
127
128
    iter_num = 0
129
    best_dice = 0
130
    max_epoch = max_iterations//len(trainloader)+1
131
    lr_ = base_lr
132
133
    iterator = tqdm(range(max_epoch), ncols=70)
134
    for epoch_num in iterator:
135
        time1 = time.time()
136
        for i_batch, sampled_batch in enumerate(trainloader):
137
            time2 = time.time()
138
            # print('fetch data cost {}'.format(time2-time1))
139
            volume_batch, label_batch = sampled_batch['image'], sampled_batch['label']
140
            volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda()
141
142
            # Generate Discriminator target based on sampler
143
            Dtarget = torch.tensor([1, 1, 0, 0]).cuda()
144
            model.train()
145
            D.eval()
146
147
            outputs_tanh, outputs = model(volume_batch)
148
            # print(outputs.shape)
149
            outputs_soft = torch.sigmoid(outputs)
150
151
            ## calculate the loss
152
            with torch.no_grad():
153
                gt_dis = compute_sdf(label_batch[:].cpu().numpy(), outputs[:labeled_bs, 0, ...].shape)
154
                gt_dis = torch.from_numpy(gt_dis).float().cuda()
155
            loss_sdf = mse_loss(outputs_tanh[:labeled_bs, 0, ...], gt_dis)
156
            loss_seg = ce_loss(outputs[:labeled_bs, 0, ...], label_batch[:labeled_bs].float())
157
            loss_seg_dice = losses.dice_loss(outputs_soft[:labeled_bs, 0, :, :, :], label_batch[:labeled_bs] == 1)
158
159
            supervised_loss = loss_seg_dice + args.beta * loss_sdf
160
            
161
            consistency_weight = get_current_consistency_weight(iter_num//150)
162
163
            Doutputs = D(outputs_tanh[labeled_bs:], volume_batch[labeled_bs:])
164
            # G want D to misclassify unlabel data to label data.
165
            loss_adv = F.cross_entropy(Doutputs, (Dtarget[:labeled_bs]).long())
166
167
            loss = supervised_loss + consistency_weight*loss_adv
168
169
            optimizer.zero_grad()
170
            loss.backward()
171
            optimizer.step()
172
173
            # Train D
174
            model.eval()
175
            D.train()
176
            with torch.no_grad():
177
                outputs_tanh, outputs = model(volume_batch)
178
179
            Doutputs = D(outputs_tanh, volume_batch)
180
            # D want to classify unlabel data and label data rightly.
181
            D_loss = F.cross_entropy(Doutputs, Dtarget.long())
182
183
            # Dtp and Dfn is unreliable because of the num of samples is small(4)
184
            Dacc = torch.mean((torch.argmax(Doutputs, dim=1).float()==Dtarget.float()).float())
185
            Dtp = torch.mean((torch.argmax(Doutputs, dim=1).float()==Dtarget.float()).float())
186
            Dfn = torch.mean((torch.argmax(Doutputs, dim=1).float()==Dtarget.float()).float())
187
            Dopt.zero_grad()
188
            D_loss.backward()
189
            Dopt.step()
190
191
            iter_num = iter_num + 1
192
            writer.add_scalar('lr', lr_, iter_num)
193
            writer.add_scalar('loss/loss', loss, iter_num)
194
            writer.add_scalar('loss/loss_seg', loss_seg, iter_num)
195
            writer.add_scalar('loss/loss_dice', loss_seg_dice, iter_num)
196
            writer.add_scalar('loss/loss_hausdorff', loss_sdf, iter_num)
197
            writer.add_scalar('train/consistency_weight', consistency_weight, iter_num)
198
            writer.add_scalar('loss/loss_adv', consistency_weight*loss_adv, iter_num)
199
            writer.add_scalar('GAN/loss_adv', loss_adv, iter_num)
200
            writer.add_scalar('GAN/D_loss', D_loss, iter_num)
201
            writer.add_scalar('GAN/Dtp', Dtp, iter_num)
202
            writer.add_scalar('GAN/Dfn', Dfn, iter_num)
203
204
            logging.info(
205
                'iteration %d : loss : %f, loss_weight: %f, loss_haus: %f, loss_seg: %f, loss_dice: %f' %
206
                (iter_num, loss.item(), consistency_weight, loss_sdf.item(),
207
                 loss_seg.item(), loss_seg_dice.item()))
208
209
            if iter_num >= 800 and iter_num % 200 == 0:
210
                model.eval()
211
                with torch.no_grad():
212
                    dice_sample = 0
213
                    for sampled_batch in test_loader:
214
                        img, lbl = sampled_batch['image'].cuda(), sampled_batch['label'].cuda()
215
                        _, outputs = model(img)
216
                        dice_once = cal_dice(outputs,lbl)
217
                        print(dice_once)
218
                        dice_sample += dice_once
219
                    dice_sample = dice_sample / len(test_loader)
220
                    print('Average center dice:{:.3f}'.format(dice_sample))
221
                        
222
                    if dice_sample > best_dice:
223
                        best_dice = dice_sample
224
                        save_mode_path = os.path.join(snapshot_path, 'iter_{}_dice_{}.pth'.format(iter_num, best_dice))
225
                        save_best_path = os.path.join(snapshot_path, '{}_best_model.pth'.format(args.model))
226
                        torch.save(model.state_dict(), save_mode_path)
227
                        torch.save(model.state_dict(), save_best_path)
228
                        logging.info("save best model to {}".format(save_mode_path))
229
                    writer.add_scalar('Var_dice/Dice', dice_sample, iter_num)
230
                    writer.add_scalar('Var_dice/Best_dice', best_dice, iter_num)
231
                    model.train()
232
233
            if iter_num >= max_iterations:
234
                break
235
            time1 = time.time()
236
        if iter_num >= max_iterations:
237
            iterator.close()
238
            break
239
    writer.close()