--- a +++ b/dc.py @@ -0,0 +1,451 @@ +# python modules +import argparse, os, pickle +import logging +import errno +from pprint import pprint +import pandas as pd +import json + +# os modifications +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +# os.environ["CUDA_VISIBLE_DEVICES"]="1" + +# tensorflow imports +import tensorflow +from tensorflow.keras.models import Model +physical_devices = tensorflow.config.list_physical_devices('GPU') +if len(physical_devices) > 0: + USE_GPU = 1 +else: + USE_GPU = 0 +for device in physical_devices: + tensorflow.config.experimental.set_memory_growth(device, True) + +# import utils and models +from utils.metrics import compute_scores +from models import * +from modules.image_encoder import load_encoded_vecs +from utils import * +from utils.dataset import Dataset, IuXrayDataset, ImageCLEFDataset + +# import nltk +import nltk +nltk.download('punkt', quiet=True) + +# store dataset as well as results path +DATASET_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') +RESULTS_PATH = os.path.join(DATASET_PATH, 'results') + + +def make_dir(str_path:str) -> None: + """ Try to make directory properly + + Args: + str_path (str): The str path to create our directory + """ + try: + os.mkdir(str_path) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass +# make results directory +make_dir(RESULTS_PATH) +# begin loggings +logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO) + + +class DiagnosticCaptioning: + def __init__(self): + """ Main application to implement my created systems + """ + # fetch user cmd selections + self.parser = argparse.ArgumentParser() + self.parse_agrs() + + def parse_agrs(self) -> None: + """ Parse all arguments selected in execution from the user + """ + + # Data loader settings + self.parser.add_argument("--dataset", type=str, default="iu_xray", choices=["iu_xray", "imageclef"], help="the dataset to be used.") + + # Employing model + self.parser.add_argument("--model_choice", type=str, default="cnn_rnn", choices=["cnn_rnn", "knn"], help="Which model to employ for testing.") + self.parser.add_argument("--k", type=int, default=5, help="k for K-NN") + + # Captions settings + self.parser.add_argument("--max_length", type=int, default=40, help="the maximum sequence length of the reports.") + self.parser.add_argument("--threshold", type=int, default=3, help="the cut off frequency for the words.") + + # Model settings (for layers) + self.parser.add_argument("--image_encoder", type=str, default="densenet121", help="the visual encoder to be used.") + self.parser.add_argument("--embedding_dim", type=int, default=100, help="the embedding dimension for Embedding Layers.") + self.parser.add_argument("--ling_model", type=str, default="gru", choices=["gru", "lstm", "bigru"], help="the Linguistig Model (RNN) for Decoder module as well as Text encoder.") + + # Model settings + self.parser.add_argument("--multi_modal", type=bool, default=False, help="if to use multi_modal as our model for CNN-RNN only.") + self.parser.add_argument("--dropout", type=float, default=0.2, help="the dropout rate of our model.") + + # Generate text apporach related + self.parser.add_argument("--sample_method", type=str, default="greedy", choices=["greedy", "beam_3", "beam_5", "beam_7"], help="the sample methods to sample a report.") + + # Trainer settings + self.parser.add_argument("--batch_size", type=int, default=8, help="the number of samples for a batch",) + self.parser.add_argument("--n_gpu", type=int, default=USE_GPU, help="the number of gpus to be used.") + self.parser.add_argument("--epochs", type=int, default=100, help="the number of training epochs.") + self.parser.add_argument("--save_dir",type=str, default="cnn_rnn",help="the path to save the models.") + self.parser.add_argument("--early_stop", type=int, default=10, help="the patience of training.") + + def __init_device(self) -> tuple[bool, bool, bool]: + """ Private method to initialize the GPU usage if available else CPU + + Returns: + tuple[bool, bool, bool]: Bool variables whether to use sinlge or multiple GPUs if available else CPU + """ + use_CPU, use_GPU, use_multiGPU = False, False, False + + n_gpus = self.parser.parse_args().n_gpu + + # case GPU available + if n_gpus > 0: + if n_gpus == 1: + use_GPU = True + else: + use_multiGPU = True + else: + # case CPU available + use_CPU = True + + return use_CPU, use_GPU, use_multiGPU + + + def __load_iuxray_data(self) -> tuple[dict, dict, dict]: + """ Loads IU X-Ray dataset from directory + + Returns: + tuple[dict, dict, dict]: Image vectors, captions and tags in dictionary format, with keys to be the Image IDs. + """ + # get dataset path + iu_xray_data_path = os.path.join(DATASET_PATH, 'iu_xray') + iu_xray_images_data_path = os.path.join(iu_xray_data_path, 'two_images.json') + iu_xray_captions_data_path = os.path.join(iu_xray_data_path, 'two_captions.json') + iu_xray_tags_data_path = os.path.join(iu_xray_data_path, 'two_tags.json') + + # fetch images, captions, tags + with open(iu_xray_images_data_path) as json_file: + images = json.load(json_file) + + with open(iu_xray_captions_data_path) as json_file: + captions = json.load(json_file) + + with open(iu_xray_tags_data_path) as json_file: + tags = json.load(json_file) + + encoder = self.parser.parse_args().image_encoder + + image_encoded_vectors_path = os.path.join(iu_xray_data_path, f"{encoder}.pkl") + # load image embeddings for the employed encoder + image_vecs = load_encoded_vecs(image_encoded_vectors_path) + return image_vecs, captions, tags + + def __load_imageclef_data(self) -> tuple[dict, dict]: + """ Loads ImageCLEF dataset from directory + + Returns: + tuple[dict, dict]: Image vectors, captions in dictionary format, with keys to be the Image IDs. + """ + # get dataset path + imageclef_data_path = os.path.join(DATASET_PATH, 'imageCLEF') + # fetch images, captions + imageclef_image_captions_pairs = os.path.join(imageclef_data_path, 'Imageclef2022_dataset_all.csv') + clef_df = pd.read_csv(imageclef_image_captions_pairs, sep='\t') + captions = dict( zip( clef_df.ID.to_list(), clef_df.caption.to_list() ) ) + + + encoder = self.parser.parse_args().image_encoder + + image_encoded_vectors_path = os.path.join(imageclef_data_path, f"{encoder}.pkl") + # load image embeddings for the employed encoder + image_vecs = load_encoded_vecs(image_encoded_vectors_path) + return image_vecs, captions + + def __create_iu_xray_dataset(self, images:dict, captions:dict, tags:dict) -> IuXrayDataset: + """ Builds the IU X-Ray dataset using the IuXrayDataset loader class + + Args: + images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings. + captions (dict): Dictionary with keys to be the ImageIDs and values the captions. + tags (dict): Dictionary with keys to be the ImageIDs and values the tags embeddings. + + Returns: + IuXrayDataset: the employed IuXrayDataset object + """ + iu_xray_dataset = IuXrayDataset(image_vectors=images, captions_data=captions, tags_data=tags) + logging.info('IU-XRay dataset created.') + logging.info(iu_xray_dataset) + return iu_xray_dataset + + def __create_imageCLEF_dataset(self, images:dict, captions:dict) -> ImageCLEFDataset: + """ Builds the ImageCLEF dataset using the ImageCLEFDataset loader class + + Args: + images (dict): Dictionary with keys to be the ImageIDs and values the image embeddings. + captions (dict): Dictionary with keys to be the ImageIDs and values the captions. + + Returns: + ImageCLEFDataset: the employed ImageCLEFDataset object + """ + imageCLEF_dataset = ImageCLEFDataset(image_vectors=images, captions_data=captions) + logging.info('ImageCLEF dataset created.') + logging.info(imageCLEF_dataset) + return imageCLEF_dataset + + def train_cnn_rnn(self, dataset:Dataset) -> tuple[CNN_RNN, Model]: + """ Begins the training process for the implemented CNN-RNN model + More details are provided in my Thesis + + Args: + dataset (Dataset): The employed dataset, i.e. IU X-Ray or ImageCLEF + + Returns: + CNN_RNN, Model: The created CNN-RNN and the trained model + """ + # fetch important args + which_dataset = self.parser.parse_args().dataset + epochs = self.parser.parse_args().epochs + encoder = self.parser.parse_args().image_encoder + max_length = self.parser.parse_args().max_length + embedding_dim = self.parser.parse_args().embedding_dim + ling_model = self.parser.parse_args().ling_model + multi_modal = self.parser.parse_args().multi_modal + logging.info(multi_modal) + batch_size = self.parser.parse_args().batch_size + + # create the save directory for the model + saved_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.parser.parse_args().save_dir) + make_dir(saved_dir) + # get the created vocabulary for our CNN-RNN + _, tokenizer, word2idx, idx2word = dataset.get_tokenizer_utils() + # make the model name according to arguments + model_name = f'{which_dataset}_enc{encoder}_epochs{epochs}_maxlen{max_length}_embed{embedding_dim}_lingmodel{ling_model}_multimodal{multi_modal}' + saved_model_name = os.path.join(saved_dir, model_name) + logging.info(f'CNN-RNN model will be saved at: {saved_model_name}.h5') + + # build the CNN-RNN model + SnT = CNN_RNN(tokenizer=tokenizer, word_to_idx=word2idx, + idx_to_word=idx2word, max_length=max_length, + embedding_dim=embedding_dim, ling_model=ling_model, + multi_modal=multi_modal, loss="categorical_crossentropy") + logging.info(f'Utilized vocabulary contains {SnT.vocab_size} words!') + + # get dataset splits + train, dev, test = dataset.get_splits_sets() + + # case IU X-Ray + if which_dataset == 'iu_xray': + # fetch all tags + all_tags = dict(train[2], **dev[2]) + all_tags = dict(all_tags, **test[2]) + print('TAGS:', len(all_tags)) + # initialize the Multi-Modal version if user selected this kind of network + tags_patient_pair = SnT.build_multimodal_encoder(all_tags) + train_tags = { + key:value for key,value in tags_patient_pair.items() if key in train[1].keys() + } + # store training data we want to utilise + # 1st index --> image vectors + # 2nd index --> captions + # 3rd index --> tags + train_data = [train[0], train[1], train_tags] + else: + # case ImageCLEF + # store training data we want to utilise + # 1st index --> image vectors + # 2nd index --> captions + train_data = [train[0], train[1]] + # we use Adam as our optimizer for our training procedure + optimizer = tensorflow.keras.optimizers.Adam() + + # case IU X-Ray + if which_dataset == 'iu_xray': + # get the image embedding input shape. Every patient in IU X-Ray has 2 medical images. Thus, we read the shape from the first one. + image_input_shape = list(train[0].values())[0][0].shape[1] + # start train + trained_model = SnT.train_iuxray_model(train_data=train_data, + input_shape=(image_input_shape,), + optimizer=optimizer, + model_name=saved_model_name, + n_epochs=epochs, + batch_size=batch_size) + else: + # case ImageCLEF + # get the image embedding input shape. + image_input_shape = list(train[0].values())[0].shape[1] + # start train + trained_model = SnT.train_imageclef_model(train_data=train_data, + input_shape=(image_input_shape,), + optimizer=optimizer, + model_name=saved_model_name, + n_epochs=epochs, + batch_size=batch_size) + return SnT, trained_model + + def eval_cnn_rnn(self, cnn_rnn:CNN_RNN, model_to_eval:Model, dataset:Dataset) -> None: + """ Begins the evaluation process for the trained model in the given dataset + + Args: + cnn_rnn (CNN_RNN): The created CNN-RNN object that we will employ to apply our evaluation method + model_to_eval (Model): The trained model that will be assessed + dataset (Dataset): The employed dataset (IU X-Ray, ImageCLEF) + """ + # fetch the generation algorithm (Greedy or Beam Search) + generate_choice = self.parser.parse_args().sample_method + which_dataset = self.parser.parse_args().dataset + + # fetch dev, test set + _, dev, test = dataset.get_splits_sets() + + # first evaluate our model in validation set + if which_dataset == 'iu_xray': + gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, + test_captions=dev[1], + test_images=dev[0], + test_tags=dev[2], + evaluator_choice=generate_choice) + else: + gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, + test_captions=dev[1], + test_images=dev[0], + test_tags=None, + evaluator_choice=generate_choice) + # get the results path for our results dataframe + dev_gold_path = os.path.join(RESULTS_PATH, 'dev_gold.csv') + dev_pred_path = os.path.join(RESULTS_PATH, 'dev_pred.csv') + + # save gold truth captions + df_gold = pd.DataFrame.from_dict(gold, orient="index") + df_gold.to_csv(dev_gold_path, sep='|', header=False) + # save predicted captions + df_pred = pd.DataFrame.from_dict(predicted, orient="index") + df_pred.to_csv(dev_pred_path, sep='|', header=False) + # score + scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='dev_set_cnn_rnn_scores', save_scores=True) + print('CNN_RNN scores in Validation set') + pprint(scores) + + # Now evaluate our model in test set + if which_dataset == 'iu_xray': + gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, + test_captions=test[1], + test_images=test[0], + test_tags=test[2], + eval_dataset=which_dataset, + evaluator_choice=generate_choice) + else: + gold, predicted = cnn_rnn.evaluate_model(model=model_to_eval, + test_captions=test[1], + test_images=test[0], + test_tags=None, + eval_dataset=which_dataset, + evaluator_choice=generate_choice) + # get the results path for our results dataframe + dev_gold_path = os.path.join(RESULTS_PATH, 'test_gold.csv') + dev_pred_path = os.path.join(RESULTS_PATH, 'test_pred.csv') + # save gold truth captions + df_gold = pd.DataFrame.from_dict(gold, orient="index") + df_gold.to_csv(dev_gold_path, sep='|', header=False) + # save predicted captions + df_pred = pd.DataFrame.from_dict(predicted, orient="index") + df_pred.to_csv(dev_pred_path, sep='|', header=False) + # score + scores = compute_scores(gts=dev_gold_path, res=dev_pred_path, scores_filename='test_set_cnn_rnn_scores', save_scores=True) + print('CNN_RNN scores in Test set') + pprint(scores) + + + def run_process(self) -> None: + """ Begins the whole process according to the user settings. + It employes the selected dataset in the selected model. + For the latter we have CNN-RNN and kNN. More details for each of these models are provided in my Thesis. + """ + which_dataset = self.parser.parse_args().dataset + employed_model = self.parser.parse_args().model_choice + + # case IU X-Ray + if which_dataset == "iu_xray": + image_vecs, captions, tags = self.__load_iuxray_data() + iu_xray_dataset = self.__create_iu_xray_dataset(image_vecs, captions, tags) + + # case CNN-RNN + if employed_model == 'cnn_rnn': + + # Train CNN-RNN model + cnn_rnn, trained_model = self.train_cnn_rnn(dataset=iu_xray_dataset) + + # Evaluate in model in Validation and Test set + self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=iu_xray_dataset) + else: + # case k-NN + k = self.parser.parse_args().k + multi_modal = self.parser.parse_args().multi_modal + kNN = KNN(dataset=iu_xray_dataset, k=k, similarity_function='cosine', text_model='clinical_bert') + # init the results path + results_path = os.path.join(RESULTS_PATH, 'iuxray_{k}-NN_test_captions.csv') + # and execute the k-NN algorithm + kNN.run_algo(multi_modal = multi_modal, results_dir_path=results_path) + else: + # case ImageCLEF + image_vecs, captions = self.__load_imageclef_data() + imageCLEF_dataset = self.__create_imageCLEF_dataset(image_vecs, captions) + + # case CNN-RNN + if employed_model == 'cnn_rnn': + + # Train CNN-RNN model + cnn_rnn, trained_model = self.train_cnn_rnn(dataset=imageCLEF_dataset) + + # Evaluate in model in Validation and Test set + self.eval_cnn_rnn(cnn_rnn=cnn_rnn, model_to_eval=trained_model, dataset=imageCLEF_dataset) + else: + # case k-NN + k = self.parser.parse_args().k + kNN = KNN(dataset=imageCLEF_dataset, k=k, similarity_function='cosine', text_model='clinical_bert') + # init the results path + results_path = os.path.join(RESULTS_PATH, 'imageclef_{k}-NN_test_captions.csv') + # and execute the k-NN algorithm + kNN.run_algo(results_dir_path=results_path) + + + def main(self) -> None: + """ Begins the process for this application + """ + # flags for GPU and CPU usage + use_CPU, use_GPU, _ = self.__init_device() + + + if use_CPU: + logging.info('Using CPU') + with tensorflow.device("/device:GPU:0"): + self.run_process() + elif use_GPU: + logging.info('Using single GPU') + with tensorflow.device("/device:GPU:0"): + self.run_process() + else: + logging.info('Using multi GPU') + tensorflow.debugging.set_log_device_placement(True) + gpus = tensorflow.config.list_logical_devices("GPU") + strategy = tensorflow.distribute.MirroredStrategy(gpus) + with strategy.scope(): + self.run_process() + + +if __name__ == '__main__': + logging.info(DATASET_PATH) + dc = DiagnosticCaptioning() + dc.main() + + + \ No newline at end of file