Diff of /dc.py [000000] .. [03245f]

Switch to unified view

a b/dc.py
1
# python modules
2
import argparse, os, pickle
3
import logging
4
import errno
5
from pprint import pprint
6
import pandas as pd
7
import json
8
9
# os modifications
10
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
11
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
12
# os.environ["CUDA_VISIBLE_DEVICES"]="1" 
13
14
# tensorflow imports
15
import tensorflow
16
from tensorflow.keras.models import Model
17
physical_devices = tensorflow.config.list_physical_devices('GPU')
18
if len(physical_devices) > 0:
19
    USE_GPU = 1
20
else:
21
    USE_GPU = 0
22
for device in physical_devices:
23
    tensorflow.config.experimental.set_memory_growth(device, True)
24
25
# import utils and models    
26
from utils.metrics import compute_scores
27
from models import *
28
from modules.image_encoder import load_encoded_vecs
29
from utils import *
30
from utils.dataset import Dataset, IuXrayDataset, ImageCLEFDataset
31
32
# import nltk
33
import nltk
34
nltk.download('punkt', quiet=True)
35
36
# store dataset as well as results path
37
DATASET_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
38
RESULTS_PATH = os.path.join(DATASET_PATH, 'results')
39
40
41
def make_dir(str_path:str) -> None:
42
    """ Try to make directory properly
43
44
    Args:
45
        str_path (str): The str path to create our directory
46
    """
47
    try:
48
        os.mkdir(str_path)
49
    except OSError as exc:
50
        if exc.errno != errno.EEXIST:
51
            raise
52
        pass
53
# make results directory    
54
make_dir(RESULTS_PATH)
55
# begin loggings
56
logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO)
57
58
59
class DiagnosticCaptioning:
60
    def __init__(self):
61
        """ Main application to implement my created systems
62
        """
63
        # fetch user cmd selections
64
        self.parser = argparse.ArgumentParser()
65
        self.parse_agrs()
66
        
67
    def parse_agrs(self) -> None:
68
        """ Parse all arguments selected in execution from the user
69
        """
70
71
        # Data loader settings
72
        self.parser.add_argument("--dataset", type=str, default="iu_xray", choices=["iu_xray", "imageclef"], help="the dataset to be used.")
73
74
        # Employing model
75
        self.parser.add_argument("--model_choice", type=str, default="cnn_rnn", choices=["cnn_rnn", "knn"], help="Which model to employ for testing.")
76
        self.parser.add_argument("--k", type=int, default=5, help="k for K-NN")
77
        
78
        # Captions settings
79
        self.parser.add_argument("--max_length", type=int, default=40, help="the maximum sequence length of the reports.")
80
        self.parser.add_argument("--threshold", type=int, default=3, help="the cut off frequency for the words.")
81
82
        # Model settings (for layers)
83
        self.parser.add_argument("--image_encoder", type=str, default="densenet121", help="the visual encoder to be used.")
84
        self.parser.add_argument("--embedding_dim", type=int, default=100, help="the embedding dimension for Embedding Layers.")
85
        self.parser.add_argument("--ling_model", type=str, default="gru", choices=["gru", "lstm", "bigru"], help="the Linguistig Model (RNN) for Decoder module as well as Text encoder.")
86
87
        # Model settings
88
        self.parser.add_argument("--multi_modal", type=bool, default=False, help="if to use multi_modal as our model for CNN-RNN only.")
89
        self.parser.add_argument("--dropout", type=float, default=0.2, help="the dropout rate of our model.")
90
91
        # Generate text apporach related
92
        self.parser.add_argument("--sample_method", type=str, default="greedy", choices=["greedy", "beam_3", "beam_5", "beam_7"], help="the sample methods to sample a report.")
93
        
94
        # Trainer settings
95
        self.parser.add_argument("--batch_size", type=int, default=8, help="the number of samples for a batch",)
96
        self.parser.add_argument("--n_gpu", type=int, default=USE_GPU, help="the number of gpus to be used.")
97
        self.parser.add_argument("--epochs", type=int, default=100, help="the number of training epochs.")
98
        self.parser.add_argument("--save_dir",type=str, default="cnn_rnn",help="the path to save the models.")
99
        self.parser.add_argument("--early_stop", type=int, default=10, help="the patience of training.")
100
        
101
    def __init_device(self) -> tuple[bool, bool, bool]: 
102
        """ Private method to initialize the GPU usage if available else CPU
103
104
        Returns:
105
            tuple[bool, bool, bool]: Bool variables whether to use sinlge or multiple GPUs if available else CPU
106
        """
107
        use_CPU, use_GPU, use_multiGPU = False, False, False
108
109
        n_gpus = self.parser.parse_args().n_gpu
110
111
        # case GPU available
112
        if n_gpus > 0:
113
            if n_gpus == 1:
114
                use_GPU = True
115
            else:
116
                use_multiGPU = True
117
        else:
118
            # case CPU available
119
            use_CPU = True
120
121
        return use_CPU, use_GPU, use_multiGPU
122
        
123
    
124
    def __load_iuxray_data(self) -> tuple[dict, dict, dict]:
125
        """ Loads IU X-Ray dataset from directory
126
127
        Returns:
128
            tuple[dict, dict, dict]: Image vectors, captions and tags in dictionary format, with keys to be the Image IDs.
129
        """
130
        # get dataset path
131
        iu_xray_data_path = os.path.join(DATASET_PATH, 'iu_xray')
132
        iu_xray_images_data_path = os.path.join(iu_xray_data_path, 'two_images.json')
133
        iu_xray_captions_data_path = os.path.join(iu_xray_data_path, 'two_captions.json')
134
        iu_xray_tags_data_path = os.path.join(iu_xray_data_path, 'two_tags.json')
135
        
136
        # fetch images, captions, tags
137
        with open(iu_xray_images_data_path) as json_file:
138
            images = json.load(json_file)
139
140
        with open(iu_xray_captions_data_path) as json_file:
141
            captions = json.load(json_file)
142
143
        with open(iu_xray_tags_data_path) as json_file:
144
            tags = json.load(json_file)
145
            
146
        encoder = self.parser.parse_args().image_encoder
147
        
148
        image_encoded_vectors_path = os.path.join(iu_xray_data_path, f"{encoder}.pkl")
149
        # load image embeddings for the employed encoder      
150
        image_vecs = load_encoded_vecs(image_encoded_vectors_path)
151
        return image_vecs, captions, tags
152
    
153
    def __load_imageclef_data(self) -> tuple[dict, dict]:
154
        """ Loads ImageCLEF dataset from directory
155
156
        Returns:
157
            tuple[dict, dict]: Image vectors, captions in dictionary format, with keys to be the Image IDs.
158
        """
159
         # get dataset path
160
        imageclef_data_path = os.path.join(DATASET_PATH, 'imageCLEF')
161
        # fetch images, captions
162
        imageclef_image_captions_pairs = os.path.join(imageclef_data_path, 'Imageclef2022_dataset_all.csv')
163
        clef_df = pd.read_csv(imageclef_image_captions_pairs, sep='\t')
164
        captions = dict( zip( clef_df.ID.to_list(), clef_df.caption.to_list() ) )
165
        
166
            
167
        encoder = self.parser.parse_args().image_encoder
168
        
169
        image_encoded_vectors_path = os.path.join(imageclef_data_path, f"{encoder}.pkl")
170
        # load image embeddings for the employed encoder   
171
        image_vecs = load_encoded_vecs(image_encoded_vectors_path)
172
        return image_vecs, captions
173
    
174
    def __create_iu_xray_dataset(self, images:dict, captions:dict, tags:dict) -> IuXrayDataset:
175
        """ Builds the IU X-Ray dataset using the IuXrayDataset loader class
176
177
        Args:
178
            images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings.
179
            captions (dict): Dictionary with keys to be the ImageIDs and values the captions.
180
            tags (dict): Dictionary with keys to be the ImageIDs and values the tags embeddings.
181
182
        Returns:
183
            IuXrayDataset: the employed IuXrayDataset object
184
        """
185
        iu_xray_dataset = IuXrayDataset(image_vectors=images, captions_data=captions, tags_data=tags)
186
        logging.info('IU-XRay dataset created.')
187
        logging.info(iu_xray_dataset)
188
        return iu_xray_dataset
189
    
190
    def __create_imageCLEF_dataset(self, images:dict, captions:dict) -> ImageCLEFDataset:
191
        """ Builds the ImageCLEF dataset using the ImageCLEFDataset loader class
192
193
        Args:
194
            images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings.
195
            captions (dict): Dictionary with keys to be the ImageIDs and values the captions.
196
197
        Returns:
198
            ImageCLEFDataset: the employed ImageCLEFDataset object
199
        """
200
        imageCLEF_dataset = ImageCLEFDataset(image_vectors=images, captions_data=captions)
201
        logging.info('ImageCLEF dataset created.')
202
        logging.info(imageCLEF_dataset)
203
        return imageCLEF_dataset
204
    
205
    def train_cnn_rnn(self, dataset:Dataset) -> tuple[CNN_RNN, Model]:
206
        """ Begins the training process for the implemented CNN-RNN model
207
        More details are provided in my Thesis
208
209
        Args:
210
            dataset (Dataset): The employed dataset, i.e. IU X-Ray or ImageCLEF
211
212
        Returns:
213
            CNN_RNN, Model: The created CNN-RNN and the trained model
214
        """
215
        # fetch important args
216
        which_dataset = self.parser.parse_args().dataset
217
        epochs = self.parser.parse_args().epochs
218
        encoder = self.parser.parse_args().image_encoder
219
        max_length = self.parser.parse_args().max_length
220
        embedding_dim = self.parser.parse_args().embedding_dim
221
        ling_model = self.parser.parse_args().ling_model
222
        multi_modal = self.parser.parse_args().multi_modal
223
        logging.info(multi_modal)
224
        batch_size = self.parser.parse_args().batch_size
225
        
226
        # create the save directory for the model
227
        saved_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.parser.parse_args().save_dir)
228
        make_dir(saved_dir)
229
        # get the created vocabulary for our CNN-RNN
230
        _, tokenizer, word2idx, idx2word = dataset.get_tokenizer_utils()
231
        # make the model name according to arguments
232
        model_name = f'{which_dataset}_enc{encoder}_epochs{epochs}_maxlen{max_length}_embed{embedding_dim}_lingmodel{ling_model}_multimodal{multi_modal}'
233
        saved_model_name = os.path.join(saved_dir, model_name)
234
        logging.info(f'CNN-RNN model will be saved at: {saved_model_name}.h5')
235
236
        # build the CNN-RNN model
237
        SnT = CNN_RNN(tokenizer=tokenizer, word_to_idx=word2idx, 
238
                        idx_to_word=idx2word, max_length=max_length, 
239
                        embedding_dim=embedding_dim, ling_model=ling_model, 
240
                        multi_modal=multi_modal, loss="categorical_crossentropy")
241
        logging.info(f'Utilized vocabulary contains {SnT.vocab_size} words!')
242
243
        # get dataset splits   
244
        train, dev, test = dataset.get_splits_sets()
245
        
246
        # case IU X-Ray
247
        if which_dataset == 'iu_xray':
248
            # fetch all tags
249
            all_tags = dict(train[2], **dev[2])
250
            all_tags = dict(all_tags, **test[2])
251
            print('TAGS:', len(all_tags))
252
            # initialize the Multi-Modal version if user selected this kind of network
253
            tags_patient_pair = SnT.build_multimodal_encoder(all_tags)
254
            train_tags = {
255
                    key:value for key,value in tags_patient_pair.items() if key in train[1].keys()
256
            }      
257
            # store training data we want to utilise
258
            # 1st index --> image vectors
259
            # 2nd index --> captions
260
            # 3rd index --> tags
261
            train_data = [train[0], train[1], train_tags]
262
        else:
263
             # case ImageCLEF
264
             # store training data we want to utilise
265
             # 1st index --> image vectors
266
             # 2nd index --> captions
267
            train_data = [train[0], train[1]]
268
        # we use Adam as our optimizer for our training procedure  
269
        optimizer = tensorflow.keras.optimizers.Adam()
270
        
271
        # case IU X-Ray
272
        if which_dataset == 'iu_xray':
273
            # get the image embedding input shape. Every patient in IU X-Ray has 2 medical images. Thus, we read the shape from the first one.
274
            image_input_shape = list(train[0].values())[0][0].shape[1]
275
            # start train
276
            trained_model = SnT.train_iuxray_model(train_data=train_data, 
277
                                                    input_shape=(image_input_shape,), 
278
                                                    optimizer=optimizer, 
279
                                                    model_name=saved_model_name, 
280
                                                    n_epochs=epochs, 
281
                                                    batch_size=batch_size)
282
        else:
283
            # case ImageCLEF
284
              # get the image embedding input shape.
285
            image_input_shape = list(train[0].values())[0].shape[1]
286
            # start train
287
            trained_model = SnT.train_imageclef_model(train_data=train_data, 
288
                                                    input_shape=(image_input_shape,), 
289
                                                    optimizer=optimizer, 
290
                                                    model_name=saved_model_name, 
291
                                                    n_epochs=epochs, 
292
                                                    batch_size=batch_size)
293
        return SnT, trained_model
294
    
295
    def eval_cnn_rnn(self, cnn_rnn:CNN_RNN, model_to_eval:Model, dataset:Dataset) -> None:
296
        """ Begins the evaluation process for the trained model in the given dataset
297
298
        Args:
299
            cnn_rnn (CNN_RNN): The created CNN-RNN object that we will employ to apply our evaluation method
300
            model_to_eval (Model): The trained model that will be assessed
301
            dataset (Dataset): The employed dataset (IU X-Ray, ImageCLEF)
302
        """
303
        # fetch the generation algorithm (Greedy or Beam Search)
304
        generate_choice = self.parser.parse_args().sample_method
305
        which_dataset = self.parser.parse_args().dataset
306
        
307
        # fetch dev, test set
308
        _, dev, test = dataset.get_splits_sets()
309
        
310
        # first evaluate our model in validation set
311
        if which_dataset == 'iu_xray':
312
            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
313
                                                            test_captions=dev[1], 
314
                                                            test_images=dev[0], 
315
                                                            test_tags=dev[2], 
316
                                                            evaluator_choice=generate_choice)
317
        else:
318
            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
319
                                                            test_captions=dev[1], 
320
                                                            test_images=dev[0], 
321
                                                            test_tags=None, 
322
                                                            evaluator_choice=generate_choice)
323
        # get the results path for our results dataframe
324
        dev_gold_path = os.path.join(RESULTS_PATH, 'dev_gold.csv')
325
        dev_pred_path = os.path.join(RESULTS_PATH, 'dev_pred.csv')
326
        
327
        # save gold truth captions
328
        df_gold = pd.DataFrame.from_dict(gold, orient="index")
329
        df_gold.to_csv(dev_gold_path, sep='|', header=False)
330
        # save predicted captions  
331
        df_pred = pd.DataFrame.from_dict(predicted, orient="index")
332
        df_pred.to_csv(dev_pred_path, sep='|', header=False)
333
        # score
334
        scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='dev_set_cnn_rnn_scores', save_scores=True)
335
        print('CNN_RNN scores in Validation set')
336
        pprint(scores)
337
        
338
        # Now evaluate our model in test set
339
        if which_dataset == 'iu_xray':
340
            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
341
                                                            test_captions=test[1], 
342
                                                            test_images=test[0], 
343
                                                            test_tags=test[2],
344
                                                            eval_dataset=which_dataset,
345
                                                            evaluator_choice=generate_choice)
346
        else:
347
            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
348
                                                            test_captions=test[1], 
349
                                                            test_images=test[0], 
350
                                                            test_tags=None,
351
                                                            eval_dataset=which_dataset, 
352
                                                            evaluator_choice=generate_choice)
353
        # get the results path for our results dataframe
354
        dev_gold_path = os.path.join(RESULTS_PATH, 'test_gold.csv')
355
        dev_pred_path = os.path.join(RESULTS_PATH, 'test_pred.csv')
356
         # save gold truth captions 
357
        df_gold = pd.DataFrame.from_dict(gold, orient="index")
358
        df_gold.to_csv(dev_gold_path, sep='|', header=False)
359
        # save predicted captions  
360
        df_pred = pd.DataFrame.from_dict(predicted, orient="index")
361
        df_pred.to_csv(dev_pred_path, sep='|', header=False)
362
        # score
363
        scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='test_set_cnn_rnn_scores', save_scores=True)
364
        print('CNN_RNN scores in Test set')
365
        pprint(scores)
366
        
367
    
368
    def run_process(self) -> None:
369
        """ Begins the whole process according to the user settings.
370
        It employes the selected dataset in the selected model.
371
        For the latter we have CNN-RNN and kNN. More details for each of these models are provided in my Thesis.
372
        """
373
        which_dataset = self.parser.parse_args().dataset
374
        employed_model = self.parser.parse_args().model_choice
375
        
376
        # case IU X-Ray
377
        if which_dataset == "iu_xray":
378
            image_vecs, captions, tags = self.__load_iuxray_data()
379
            iu_xray_dataset = self.__create_iu_xray_dataset(image_vecs, captions, tags)
380
            
381
            # case CNN-RNN
382
            if employed_model == 'cnn_rnn':
383
                
384
                # Train CNN-RNN model
385
                cnn_rnn, trained_model = self.train_cnn_rnn(dataset=iu_xray_dataset)
386
                
387
                # Evaluate in model in Validation and Test set
388
                self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=iu_xray_dataset)
389
            else:
390
                 # case k-NN
391
                k = self.parser.parse_args().k
392
                multi_modal = self.parser.parse_args().multi_modal
393
                kNN = KNN(dataset=iu_xray_dataset, k=k, similarity_function='cosine', text_model='clinical_bert')
394
                # init the results path
395
                results_path = os.path.join(RESULTS_PATH, 'iuxray_{k}-NN_test_captions.csv')
396
                # and execute the k-NN algorithm
397
                kNN.run_algo(multi_modal = multi_modal, results_dir_path=results_path)
398
        else:
399
            # case ImageCLEF
400
            image_vecs, captions = self.__load_imageclef_data()
401
            imageCLEF_dataset = self.__create_imageCLEF_dataset(image_vecs, captions)
402
            
403
            # case CNN-RNN
404
            if employed_model == 'cnn_rnn':
405
                
406
                # Train CNN-RNN model
407
                cnn_rnn, trained_model = self.train_cnn_rnn(dataset=imageCLEF_dataset)
408
                
409
                # Evaluate in model in Validation and Test set
410
                self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=imageCLEF_dataset)
411
            else:
412
                 # case k-NN
413
                k = self.parser.parse_args().k
414
                kNN = KNN(dataset=imageCLEF_dataset, k=k, similarity_function='cosine', text_model='clinical_bert')
415
                # init the results path
416
                results_path = os.path.join(RESULTS_PATH, 'imageclef_{k}-NN_test_captions.csv')
417
                 # and execute the k-NN algorithm
418
                kNN.run_algo(results_dir_path=results_path)
419
                
420
421
    def main(self) -> None:
422
        """ Begins the process for this application
423
        """
424
        # flags for GPU and CPU usage
425
        use_CPU, use_GPU, _ = self.__init_device()
426
427
428
        if use_CPU:
429
            logging.info('Using CPU')
430
            with tensorflow.device("/device:GPU:0"):
431
                self.run_process()
432
        elif use_GPU:
433
            logging.info('Using single GPU')
434
            with tensorflow.device("/device:GPU:0"):
435
                self.run_process()
436
        else:
437
            logging.info('Using multi GPU')
438
            tensorflow.debugging.set_log_device_placement(True)
439
            gpus = tensorflow.config.list_logical_devices("GPU")
440
            strategy = tensorflow.distribute.MirroredStrategy(gpus)
441
            with strategy.scope():
442
                self.run_process()
443
        
444
445
if __name__ == '__main__':
446
    logging.info(DATASET_PATH)
447
    dc = DiagnosticCaptioning()
448
    dc.main()
449
    
450
    
451