--- a
+++ b/dc.py
@@ -0,0 +1,451 @@
+# python modules
+import argparse, os, pickle
+import logging
+import errno
+from pprint import pprint
+import pandas as pd
+import json
+
+# os modifications
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+# os.environ["CUDA_VISIBLE_DEVICES"]="1" 
+
+# tensorflow imports
+import tensorflow
+from tensorflow.keras.models import Model
+physical_devices = tensorflow.config.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+    USE_GPU = 1
+else:
+    USE_GPU = 0
+for device in physical_devices:
+    tensorflow.config.experimental.set_memory_growth(device, True)
+
+# import utils and models    
+from utils.metrics import compute_scores
+from models import *
+from modules.image_encoder import load_encoded_vecs
+from utils import *
+from utils.dataset import Dataset, IuXrayDataset, ImageCLEFDataset
+
+# import nltk
+import nltk
+nltk.download('punkt', quiet=True)
+
+# store dataset as well as results path
+DATASET_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+RESULTS_PATH = os.path.join(DATASET_PATH, 'results')
+
+
+def make_dir(str_path:str) -> None:
+    """ Try to make directory properly
+
+    Args:
+        str_path (str): The str path to create our directory
+    """
+    try:
+        os.mkdir(str_path)
+    except OSError as exc:
+        if exc.errno != errno.EEXIST:
+            raise
+        pass
+# make results directory    
+make_dir(RESULTS_PATH)
+# begin loggings
+logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO)
+
+
+class DiagnosticCaptioning:
+    def __init__(self):
+        """ Main application to implement my created systems
+        """
+        # fetch user cmd selections
+        self.parser = argparse.ArgumentParser()
+        self.parse_agrs()
+        
+    def parse_agrs(self) -> None:
+        """ Parse all arguments selected in execution from the user
+        """
+
+        # Data loader settings
+        self.parser.add_argument("--dataset", type=str, default="iu_xray", choices=["iu_xray", "imageclef"], help="the dataset to be used.")
+
+        # Employing model
+        self.parser.add_argument("--model_choice", type=str, default="cnn_rnn", choices=["cnn_rnn", "knn"], help="Which model to employ for testing.")
+        self.parser.add_argument("--k", type=int, default=5, help="k for K-NN")
+        
+        # Captions settings
+        self.parser.add_argument("--max_length", type=int, default=40, help="the maximum sequence length of the reports.")
+        self.parser.add_argument("--threshold", type=int, default=3, help="the cut off frequency for the words.")
+
+        # Model settings (for layers)
+        self.parser.add_argument("--image_encoder", type=str, default="densenet121", help="the visual encoder to be used.")
+        self.parser.add_argument("--embedding_dim", type=int, default=100, help="the embedding dimension for Embedding Layers.")
+        self.parser.add_argument("--ling_model", type=str, default="gru", choices=["gru", "lstm", "bigru"], help="the Linguistig Model (RNN) for Decoder module as well as Text encoder.")
+
+        # Model settings
+        self.parser.add_argument("--multi_modal", type=bool, default=False, help="if to use multi_modal as our model for CNN-RNN only.")
+        self.parser.add_argument("--dropout", type=float, default=0.2, help="the dropout rate of our model.")
+
+        # Generate text apporach related
+        self.parser.add_argument("--sample_method", type=str, default="greedy", choices=["greedy", "beam_3", "beam_5", "beam_7"], help="the sample methods to sample a report.")
+        
+        # Trainer settings
+        self.parser.add_argument("--batch_size", type=int, default=8, help="the number of samples for a batch",)
+        self.parser.add_argument("--n_gpu", type=int, default=USE_GPU, help="the number of gpus to be used.")
+        self.parser.add_argument("--epochs", type=int, default=100, help="the number of training epochs.")
+        self.parser.add_argument("--save_dir",type=str, default="cnn_rnn",help="the path to save the models.")
+        self.parser.add_argument("--early_stop", type=int, default=10, help="the patience of training.")
+        
+    def __init_device(self) -> tuple[bool, bool, bool]: 
+        """ Private method to initialize the GPU usage if available else CPU
+
+        Returns:
+            tuple[bool, bool, bool]: Bool variables whether to use sinlge or multiple GPUs if available else CPU
+        """
+        use_CPU, use_GPU, use_multiGPU = False, False, False
+
+        n_gpus = self.parser.parse_args().n_gpu
+
+        # case GPU available
+        if n_gpus > 0:
+            if n_gpus == 1:
+                use_GPU = True
+            else:
+                use_multiGPU = True
+        else:
+            # case CPU available
+            use_CPU = True
+
+        return use_CPU, use_GPU, use_multiGPU
+        
+    
+    def __load_iuxray_data(self) -> tuple[dict, dict, dict]:
+        """ Loads IU X-Ray dataset from directory
+
+        Returns:
+            tuple[dict, dict, dict]: Image vectors, captions and tags in dictionary format, with keys to be the Image IDs.
+        """
+        # get dataset path
+        iu_xray_data_path = os.path.join(DATASET_PATH, 'iu_xray')
+        iu_xray_images_data_path = os.path.join(iu_xray_data_path, 'two_images.json')
+        iu_xray_captions_data_path = os.path.join(iu_xray_data_path, 'two_captions.json')
+        iu_xray_tags_data_path = os.path.join(iu_xray_data_path, 'two_tags.json')
+        
+        # fetch images, captions, tags
+        with open(iu_xray_images_data_path) as json_file:
+            images = json.load(json_file)
+
+        with open(iu_xray_captions_data_path) as json_file:
+            captions = json.load(json_file)
+
+        with open(iu_xray_tags_data_path) as json_file:
+            tags = json.load(json_file)
+            
+        encoder = self.parser.parse_args().image_encoder
+        
+        image_encoded_vectors_path = os.path.join(iu_xray_data_path, f"{encoder}.pkl")
+        # load image embeddings for the employed encoder      
+        image_vecs = load_encoded_vecs(image_encoded_vectors_path)
+        return image_vecs, captions, tags
+    
+    def __load_imageclef_data(self) -> tuple[dict, dict]:
+        """ Loads ImageCLEF dataset from directory
+
+        Returns:
+            tuple[dict, dict]: Image vectors, captions in dictionary format, with keys to be the Image IDs.
+        """
+         # get dataset path
+        imageclef_data_path = os.path.join(DATASET_PATH, 'imageCLEF')
+        # fetch images, captions
+        imageclef_image_captions_pairs = os.path.join(imageclef_data_path, 'Imageclef2022_dataset_all.csv')
+        clef_df = pd.read_csv(imageclef_image_captions_pairs, sep='\t')
+        captions = dict( zip( clef_df.ID.to_list(), clef_df.caption.to_list() ) )
+        
+            
+        encoder = self.parser.parse_args().image_encoder
+        
+        image_encoded_vectors_path = os.path.join(imageclef_data_path, f"{encoder}.pkl")
+        # load image embeddings for the employed encoder   
+        image_vecs = load_encoded_vecs(image_encoded_vectors_path)
+        return image_vecs, captions
+    
+    def __create_iu_xray_dataset(self, images:dict, captions:dict, tags:dict) -> IuXrayDataset:
+        """ Builds the IU X-Ray dataset using the IuXrayDataset loader class
+
+        Args:
+            images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings.
+            captions (dict): Dictionary with keys to be the ImageIDs and values the captions.
+            tags (dict): Dictionary with keys to be the ImageIDs and values the tags embeddings.
+
+        Returns:
+            IuXrayDataset: the employed IuXrayDataset object
+        """
+        iu_xray_dataset = IuXrayDataset(image_vectors=images, captions_data=captions, tags_data=tags)
+        logging.info('IU-XRay dataset created.')
+        logging.info(iu_xray_dataset)
+        return iu_xray_dataset
+    
+    def __create_imageCLEF_dataset(self, images:dict, captions:dict) -> ImageCLEFDataset:
+        """ Builds the ImageCLEF dataset using the ImageCLEFDataset loader class
+
+        Args:
+            images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings.
+            captions (dict): Dictionary with keys to be the ImageIDs and values the captions.
+
+        Returns:
+            ImageCLEFDataset: the employed ImageCLEFDataset object
+        """
+        imageCLEF_dataset = ImageCLEFDataset(image_vectors=images, captions_data=captions)
+        logging.info('ImageCLEF dataset created.')
+        logging.info(imageCLEF_dataset)
+        return imageCLEF_dataset
+    
+    def train_cnn_rnn(self, dataset:Dataset) -> tuple[CNN_RNN, Model]:
+        """ Begins the training process for the implemented CNN-RNN model
+        More details are provided in my Thesis
+
+        Args:
+            dataset (Dataset): The employed dataset, i.e. IU X-Ray or ImageCLEF
+
+        Returns:
+            CNN_RNN, Model: The created CNN-RNN and the trained model
+        """
+        # fetch important args
+        which_dataset = self.parser.parse_args().dataset
+        epochs = self.parser.parse_args().epochs
+        encoder = self.parser.parse_args().image_encoder
+        max_length = self.parser.parse_args().max_length
+        embedding_dim = self.parser.parse_args().embedding_dim
+        ling_model = self.parser.parse_args().ling_model
+        multi_modal = self.parser.parse_args().multi_modal
+        logging.info(multi_modal)
+        batch_size = self.parser.parse_args().batch_size
+        
+        # create the save directory for the model
+        saved_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.parser.parse_args().save_dir)
+        make_dir(saved_dir)
+        # get the created vocabulary for our CNN-RNN
+        _, tokenizer, word2idx, idx2word = dataset.get_tokenizer_utils()
+        # make the model name according to arguments
+        model_name = f'{which_dataset}_enc{encoder}_epochs{epochs}_maxlen{max_length}_embed{embedding_dim}_lingmodel{ling_model}_multimodal{multi_modal}'
+        saved_model_name = os.path.join(saved_dir, model_name)
+        logging.info(f'CNN-RNN model will be saved at: {saved_model_name}.h5')
+
+        # build the CNN-RNN model
+        SnT = CNN_RNN(tokenizer=tokenizer, word_to_idx=word2idx, 
+                        idx_to_word=idx2word, max_length=max_length, 
+                        embedding_dim=embedding_dim, ling_model=ling_model, 
+                        multi_modal=multi_modal, loss="categorical_crossentropy")
+        logging.info(f'Utilized vocabulary contains {SnT.vocab_size} words!')
+
+        # get dataset splits   
+        train, dev, test = dataset.get_splits_sets()
+        
+        # case IU X-Ray
+        if which_dataset == 'iu_xray':
+            # fetch all tags
+            all_tags = dict(train[2], **dev[2])
+            all_tags = dict(all_tags, **test[2])
+            print('TAGS:', len(all_tags))
+            # initialize the Multi-Modal version if user selected this kind of network
+            tags_patient_pair = SnT.build_multimodal_encoder(all_tags)
+            train_tags = {
+                    key:value for key,value in tags_patient_pair.items() if key in train[1].keys()
+            }      
+            # store training data we want to utilise
+            # 1st index --> image vectors
+            # 2nd index --> captions
+            # 3rd index --> tags
+            train_data = [train[0], train[1], train_tags]
+        else:
+             # case ImageCLEF
+             # store training data we want to utilise
+             # 1st index --> image vectors
+             # 2nd index --> captions
+            train_data = [train[0], train[1]]
+        # we use Adam as our optimizer for our training procedure  
+        optimizer = tensorflow.keras.optimizers.Adam()
+        
+        # case IU X-Ray
+        if which_dataset == 'iu_xray':
+            # get the image embedding input shape. Every patient in IU X-Ray has 2 medical images. Thus, we read the shape from the first one.
+            image_input_shape = list(train[0].values())[0][0].shape[1]
+            # start train
+            trained_model = SnT.train_iuxray_model(train_data=train_data, 
+                                                    input_shape=(image_input_shape,), 
+                                                    optimizer=optimizer, 
+                                                    model_name=saved_model_name, 
+                                                    n_epochs=epochs, 
+                                                    batch_size=batch_size)
+        else:
+            # case ImageCLEF
+              # get the image embedding input shape.
+            image_input_shape = list(train[0].values())[0].shape[1]
+            # start train
+            trained_model = SnT.train_imageclef_model(train_data=train_data, 
+                                                    input_shape=(image_input_shape,), 
+                                                    optimizer=optimizer, 
+                                                    model_name=saved_model_name, 
+                                                    n_epochs=epochs, 
+                                                    batch_size=batch_size)
+        return SnT, trained_model
+    
+    def eval_cnn_rnn(self, cnn_rnn:CNN_RNN, model_to_eval:Model, dataset:Dataset) -> None:
+        """ Begins the evaluation process for the trained model in the given dataset
+
+        Args:
+            cnn_rnn (CNN_RNN): The created CNN-RNN object that we will employ to apply our evaluation method
+            model_to_eval (Model): The trained model that will be assessed
+            dataset (Dataset): The employed dataset (IU X-Ray, ImageCLEF)
+        """
+        # fetch the generation algorithm (Greedy or Beam Search)
+        generate_choice = self.parser.parse_args().sample_method
+        which_dataset = self.parser.parse_args().dataset
+        
+        # fetch dev, test set
+        _, dev, test = dataset.get_splits_sets()
+        
+        # first evaluate our model in validation set
+        if which_dataset == 'iu_xray':
+            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
+                                                            test_captions=dev[1], 
+                                                            test_images=dev[0], 
+                                                            test_tags=dev[2], 
+                                                            evaluator_choice=generate_choice)
+        else:
+            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
+                                                            test_captions=dev[1], 
+                                                            test_images=dev[0], 
+                                                            test_tags=None, 
+                                                            evaluator_choice=generate_choice)
+        # get the results path for our results dataframe
+        dev_gold_path = os.path.join(RESULTS_PATH, 'dev_gold.csv')
+        dev_pred_path = os.path.join(RESULTS_PATH, 'dev_pred.csv')
+        
+        # save gold truth captions
+        df_gold = pd.DataFrame.from_dict(gold, orient="index")
+        df_gold.to_csv(dev_gold_path, sep='|', header=False)
+        # save predicted captions  
+        df_pred = pd.DataFrame.from_dict(predicted, orient="index")
+        df_pred.to_csv(dev_pred_path, sep='|', header=False)
+        # score
+        scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='dev_set_cnn_rnn_scores', save_scores=True)
+        print('CNN_RNN scores in Validation set')
+        pprint(scores)
+        
+        # Now evaluate our model in test set
+        if which_dataset == 'iu_xray':
+            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
+                                                            test_captions=test[1], 
+                                                            test_images=test[0], 
+                                                            test_tags=test[2],
+                                                            eval_dataset=which_dataset,
+                                                            evaluator_choice=generate_choice)
+        else:
+            gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, 
+                                                            test_captions=test[1], 
+                                                            test_images=test[0], 
+                                                            test_tags=None,
+                                                            eval_dataset=which_dataset, 
+                                                            evaluator_choice=generate_choice)
+        # get the results path for our results dataframe
+        dev_gold_path = os.path.join(RESULTS_PATH, 'test_gold.csv')
+        dev_pred_path = os.path.join(RESULTS_PATH, 'test_pred.csv')
+         # save gold truth captions 
+        df_gold = pd.DataFrame.from_dict(gold, orient="index")
+        df_gold.to_csv(dev_gold_path, sep='|', header=False)
+        # save predicted captions  
+        df_pred = pd.DataFrame.from_dict(predicted, orient="index")
+        df_pred.to_csv(dev_pred_path, sep='|', header=False)
+        # score
+        scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='test_set_cnn_rnn_scores', save_scores=True)
+        print('CNN_RNN scores in Test set')
+        pprint(scores)
+        
+    
+    def run_process(self) -> None:
+        """ Begins the whole process according to the user settings.
+        It employes the selected dataset in the selected model.
+        For the latter we have CNN-RNN and kNN. More details for each of these models are provided in my Thesis.
+        """
+        which_dataset = self.parser.parse_args().dataset
+        employed_model = self.parser.parse_args().model_choice
+        
+        # case IU X-Ray
+        if which_dataset == "iu_xray":
+            image_vecs, captions, tags = self.__load_iuxray_data()
+            iu_xray_dataset = self.__create_iu_xray_dataset(image_vecs, captions, tags)
+            
+            # case CNN-RNN
+            if employed_model == 'cnn_rnn':
+                
+                # Train CNN-RNN model
+                cnn_rnn, trained_model = self.train_cnn_rnn(dataset=iu_xray_dataset)
+                
+                # Evaluate in model in Validation and Test set
+                self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=iu_xray_dataset)
+            else:
+                 # case k-NN
+                k = self.parser.parse_args().k
+                multi_modal = self.parser.parse_args().multi_modal
+                kNN = KNN(dataset=iu_xray_dataset, k=k, similarity_function='cosine', text_model='clinical_bert')
+                # init the results path
+                results_path = os.path.join(RESULTS_PATH, 'iuxray_{k}-NN_test_captions.csv')
+                # and execute the k-NN algorithm
+                kNN.run_algo(multi_modal = multi_modal, results_dir_path=results_path)
+        else:
+            # case ImageCLEF
+            image_vecs, captions = self.__load_imageclef_data()
+            imageCLEF_dataset = self.__create_imageCLEF_dataset(image_vecs, captions)
+            
+            # case CNN-RNN
+            if employed_model == 'cnn_rnn':
+                
+                # Train CNN-RNN model
+                cnn_rnn, trained_model = self.train_cnn_rnn(dataset=imageCLEF_dataset)
+                
+                # Evaluate in model in Validation and Test set
+                self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=imageCLEF_dataset)
+            else:
+                 # case k-NN
+                k = self.parser.parse_args().k
+                kNN = KNN(dataset=imageCLEF_dataset, k=k, similarity_function='cosine', text_model='clinical_bert')
+                # init the results path
+                results_path = os.path.join(RESULTS_PATH, 'imageclef_{k}-NN_test_captions.csv')
+                 # and execute the k-NN algorithm
+                kNN.run_algo(results_dir_path=results_path)
+                
+
+    def main(self) -> None:
+        """ Begins the process for this application
+        """
+        # flags for GPU and CPU usage
+        use_CPU, use_GPU, _ = self.__init_device()
+
+
+        if use_CPU:
+            logging.info('Using CPU')
+            with tensorflow.device("/device:GPU:0"):
+                self.run_process()
+        elif use_GPU:
+            logging.info('Using single GPU')
+            with tensorflow.device("/device:GPU:0"):
+                self.run_process()
+        else:
+            logging.info('Using multi GPU')
+            tensorflow.debugging.set_log_device_placement(True)
+            gpus = tensorflow.config.list_logical_devices("GPU")
+            strategy = tensorflow.distribute.MirroredStrategy(gpus)
+            with strategy.scope():
+                self.run_process()
+        
+
+if __name__ == '__main__':
+    logging.info(DATASET_PATH)
+    dc = DiagnosticCaptioning()
+    dc.main()
+    
+    
+    
\ No newline at end of file