a b/main.py
1
#main.py
2
3
import timeit
4
5
from run_experiment import DukeCTModel
6
from models import custom_models_ctnet, custom_models_alternative, custom_models_ablation
7
from load_dataset import custom_datasets
8
9
#Note that here NUM_EPOCHS is set to 2 for the purposes of quickly demonstrating
10
#the code on the fake data. In all of the experiments reported in the paper,
11
#NUM_EPOCHS was set to 100. No model actually trained all the way to 100 epochs
12
#due to use of early stopping.
13
NUM_EPOCHS = 2
14
15
if __name__=='__main__':
16
    ####################################
17
    # CTNet-83 Model on Whole Data Set #----------------------------------------
18
    ####################################
19
    tot0 = timeit.default_timer()
20
    DukeCTModel(descriptor = 'CTNet83',
21
                custom_net = custom_models_ctnet.CTNetModel,
22
                custom_net_args = {'n_outputs':83},
23
                loss = 'bce', loss_args = {},
24
                num_epochs=NUM_EPOCHS, patience = 15,
25
                batch_size = 2, device = 'all', data_parallel = True,
26
                use_test_set = False, task = 'train_eval',
27
                old_params_dir = '',
28
                dataset_class = custom_datasets.CTDataset_2019_10,
29
                dataset_args = {'label_type_ld':'disease_new',
30
                                    'label_meanings':'all',
31
                                    'num_channels':3,
32
                                    'pixel_bounds':[-1000,200],
33
                                    'data_augment':True,
34
                                    'crop_type':'single',
35
                                    'selected_note_acc_files':{'train':'','valid':''}})
36
    tot1 = timeit.default_timer()
37
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
38
    
39
    ###################################
40
    # CTNet-9 Model on Whole Data Set #-----------------------------------------
41
    ###################################
42
    tot0 = timeit.default_timer()
43
    DukeCTModel(descriptor = 'CTNet9',
44
                custom_net = custom_models_ctnet.CTNetModel,
45
                custom_net_args = {'n_outputs':9},
46
                loss = 'bce', loss_args = {},
47
                num_epochs=NUM_EPOCHS, patience = 15,
48
                batch_size = 2, device = 'all', data_parallel = True,
49
                use_test_set = False, task = 'train_eval',
50
                old_params_dir = '',
51
                dataset_class = custom_datasets.CTDataset_2019_10,
52
                dataset_args = {'label_type_ld':'disease_new',
53
                                    'label_meanings':['nodule','opacity','atelectasis','pleural_effusion','consolidation','mass','pericardial_effusion','cardiomegaly','pneumothorax'],
54
                                    'num_channels':3,
55
                                    'pixel_bounds':[-1000,200],
56
                                    'data_augment':True,
57
                                    'crop_type':'single',
58
                                    'selected_note_acc_files':{'train':'','valid':''}})
59
    tot1 = timeit.default_timer()
60
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
61
    
62
    ####################################################
63
    # CTNet-83 Model on 2000 Train and 1000 Val Subset #------------------------
64
    ####################################################
65
    tot0 = timeit.default_timer()
66
    DukeCTModel(descriptor = 'CTNet83_SmallData',
67
                custom_net = custom_models_ctnet.CTNetModel,
68
                custom_net_args = {'n_outputs':83},
69
                loss = 'bce', loss_args = {},
70
                num_epochs=NUM_EPOCHS, patience = 15,
71
                batch_size = 2, device = 'all', data_parallel = True,
72
                use_test_set = False, task = 'train_eval',
73
                old_params_dir = '',
74
                dataset_class = custom_datasets.CTDataset_2019_10,
75
                dataset_args = {'label_type_ld':'disease_new',
76
                                    'label_meanings':'all',
77
                                    'num_channels':3,
78
                                    'pixel_bounds':[-1000,200],
79
                                    'data_augment':True,
80
                                    'crop_type':'single',
81
                                    'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
82
                                        'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}})
83
    tot1 = timeit.default_timer()
84
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
85
    
86
    ######################################################################
87
    # Alternative Arch: BodyConv Model on 2000 Train and 1000 Val Subset #------
88
    ######################################################################
89
    tot0 = timeit.default_timer()
90
    DukeCTModel(descriptor = 'BodyConv_SmallData',
91
                custom_net = custom_models_alternative.BodyConv,
92
                custom_net_args = {'n_outputs':83},
93
                loss = 'bce', loss_args = {},
94
                num_epochs=NUM_EPOCHS, patience = 15,
95
                batch_size = 2, device = 'all', data_parallel = True,
96
                use_test_set = False, task = 'train_eval',
97
                old_params_dir = '',
98
                dataset_class = custom_datasets.CTDataset_2019_10,
99
                dataset_args = {'label_type_ld':'disease_new',
100
                                    'label_meanings':'all',
101
                                    'num_channels':3,
102
                                    'pixel_bounds':[-1000,200],
103
                                    'data_augment':True,
104
                                    'crop_type':'single',
105
                                    'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
106
                                        'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}})
107
    tot1 = timeit.default_timer()
108
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
109
    
110
    ####################################################################
111
    # Alternative Arch: 3DConv Model on 2000 Train and 1000 Val Subset #--------
112
    ####################################################################
113
    tot0 = timeit.default_timer()
114
    DukeCTModel(descriptor = 'ThreeDConv_SmallData',
115
                custom_net = custom_models_alternative.ThreeDConv,
116
                custom_net_args = {'n_outputs':83},
117
                loss = 'bce', loss_args = {},
118
                num_epochs=NUM_EPOCHS, patience = 15,
119
                batch_size = 4, device = 'all', data_parallel = True,
120
                use_test_set = False, task = 'train_eval',
121
                old_params_dir = '',
122
                dataset_class = custom_datasets.CTDataset_2019_10,
123
                dataset_args = {'label_type_ld':'disease_new',
124
                                    'label_meanings':'all',
125
                                    'num_channels':1,
126
                                    'pixel_bounds':[-1000,200],
127
                                    'data_augment':True,
128
                                    'crop_type':'single',
129
                                    'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
130
                                        'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}})
131
    tot1 = timeit.default_timer()
132
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
133
    
134
    ####################################################################
135
    # Ablation Study: CTNet-83 (Pool) on 2000Train and 1000 Val Subset #--------
136
    ####################################################################
137
    tot0 = timeit.default_timer()
138
    DukeCTModel(descriptor = 'CTNet83AblatePool_SmallData',
139
                custom_net = custom_models_ablation.CTNetModel_Ablate_PoolInsteadOf3D,
140
                custom_net_args = {'n_outputs':83},
141
                loss = 'bce', loss_args = {},
142
                num_epochs=NUM_EPOCHS, patience = 15,
143
                batch_size = 2, device = 'all', data_parallel = True,
144
                use_test_set = False, task = 'train_eval',
145
                old_params_dir = '',
146
                dataset_class = custom_datasets.CTDataset_2019_10,
147
                dataset_args = {'label_type_ld':'disease_new',
148
                                    'label_meanings':'all',
149
                                    'num_channels':3,
150
                                    'pixel_bounds':[-1000,200],
151
                                    'data_augment':True,
152
                                    'crop_type':'single',
153
                                    'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
154
                                        'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}})
155
    tot1 = timeit.default_timer()
156
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
157
    
158
    #####################################################################
159
    # Ablation Study: CTNet-83 (Rand) on 2000 Train and 1000 Val Subset #-------
160
    #####################################################################
161
    tot0 = timeit.default_timer()
162
    DukeCTModel(descriptor = 'CTNet83AblateRand_SmallData',
163
                custom_net = custom_models_ablation.CTNetModel_Ablate_RandomInitResNet,
164
                custom_net_args = {'n_outputs':83},
165
                loss = 'bce', loss_args = {},
166
                num_epochs=NUM_EPOCHS, patience = 15,
167
                batch_size = 2, device = 'all', data_parallel = True,
168
                use_test_set = False, task = 'train_eval',
169
                old_params_dir = '',
170
                dataset_class = custom_datasets.CTDataset_2019_10,
171
                dataset_args = {'label_type_ld':'disease_new',
172
                                    'label_meanings':'all',
173
                                    'num_channels':3,
174
                                    'pixel_bounds':[-1000,200],
175
                                    'data_augment':True,
176
                                    'crop_type':'single',
177
                                    'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
178
                                        'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}})
179
    tot1 = timeit.default_timer()
180
    print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
181
    
182
    ###################################################
183
    # CTNet-1 Model on 2000 Train and 1000 Val Subset #-------------------------
184
    ###################################################
185
    for abnormality in ['nodule', 'opacity', 'atelectasis', 'pleural_effusion',
186
                        'consolidation', 'mass', 'pericardial_effusion',
187
                        'cardiomegaly', 'pneumothorax']:
188
        print('\n\n\n\n********** Working on',abnormality,'**********')
189
        tot0 = timeit.default_timer()
190
        DukeCTModel(descriptor = 'CTNet-'+abnormality,
191
                    custom_net =  custom_models_ctnet.CTNetModel,
192
                    custom_net_args = {'n_outputs':1},
193
                    loss = 'bce', loss_args = {},
194
                    num_epochs=NUM_EPOCHS, patience = 15,
195
                    batch_size = 2, device = 'all', data_parallel = True,
196
                    use_test_set = False, task = 'train_eval',
197
                    old_params_dir = '',
198
                    dataset_class = custom_datasets.CTDataset_2019_10,
199
                    dataset_args = {'label_type_ld':'disease_new',
200
                                        'label_meanings':[abnormality], #can be 'all' or a list of strings
201
                                        'num_channels':3,
202
                                        'pixel_bounds':[-1000,200],
203
                                        'data_augment':True,
204
                                        'crop_type':'single',
205
                                        'selected_note_acc_files':{'train':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgtrain_random2000.csv',
206
                                            'valid':'/load_dataset/fakedata/predefined_subsets/2020-01-10-imgvalid_a_random1000.csv'}}
207
                    )
208
        tot1 = timeit.default_timer()
209
        print('Total Time', round((tot1 - tot0)/60.0,2),'minutes')
210