Thesis-Diagnostic-Caption / Git / [03245f] /modules/image

Models:
philipB/
Thesis-Diagnostic-Caption
Downloads: 1
[03245f]: / modules / image_encoder.py
History
Download this file
230 lines (189 with data), 9.9 kB

# os imports
import os

# numopy and progress bar imports
import numpy as np
from tqdm import tqdm
import pickle

# tensorflow imports
import tensorflow
# CNN image encoders
from tensorflow.keras.applications.densenet import DenseNet201 as dn201
from tensorflow.keras.applications.densenet import DenseNet121 as dn121
from tensorflow.keras.applications.densenet import DenseNet169 as dn169
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.efficientnet import EfficientNetB5 as enb5
from tensorflow.keras.applications.efficientnet import EfficientNetB7 as enb7
from tensorflow.keras.applications.resnet_v2 import ResNet50V2 as rn50v2
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2 as incres
# preprocessing functions
from tensorflow.keras.applications.densenet import preprocess_input as dense_preprocess
from tensorflow.keras.applications.efficientnet import preprocess_input as efficient_preprocess
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_preprocess
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inceptionresnet_preprocess

# layers imports
from tensorflow.keras.layers import Flatten, Dropout, Dense, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image as img

# cotnet import
# from keras_cv_attention_models import cotnet


def load_encoded_vecs(filename:str) -> dict:
    """ Loads the image embeddings for each image id, we extracted offline during my research

    Args:
        filename (str): the whole path of npy file

    Returns:
        dict: encoded_vectors from filename
    """
    with open(filename, 'rb') as f:
        print("Image Encoded Vectors loaded from directory path:", filename)
        return pickle.load(f)


def save_encoded_vecs(image_vecs:np.array, output_path:str, filename:str) -> None:
    """ Function which helps us to save the encoded images into a pickle file

    Args:
        image_vecs (np.array): the encoded images vectors that we extracted using the encode_images function
        output_path (str): the output path where we want to save our image embeddings
        filename (str): a name we want to use for our npy file (ex. densenet201_image_vecs). It's not necessary to write '.pkl' at the end!
    """
    path = output_path + filename + '.pkl'
    with open(path, 'wb') as f:
        pickle.dump(image_vecs, f, pickle.HIGHEST_PROTOCOL)
    print("Image Encoded Vectors stored in:", path)


class ImageEncoder:

    def __init__(self, encoder:str, images_dir_path:str, weights:str='imagenet'):
        """ This class helps us to extract image embeddings with different Keras CNNs.
        
        Args:
            encoder (str): encoder name you want to use (ex. densenet201 for DenseNet201)
            images_dir_path (str): The directory to store our extracted vectors
            weights (str, optional): the pretrained weights you want to use for your model. It's common to use imagenet as default pretrained weights.. Defaults to 'imagenet'.
        """
        self.encoder_weights = weights
        self.image_dir_path = images_dir_path

        # we extracted the last average pooling layer for each encoder

        if encoder == 'densenet201':
            self.image_shape = 224
            self.preprocess = 'densenet'
            model = dn201(include_top=True, weights=self.encoder_weights,
                          input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'densenet121':
            self.image_shape = 224
            self.preprocess = 'densenet'
            model = dn121(include_top=True, weights=self.encoder_weights,
                          input_shape=(self.image_shape, self.image_shape, 3))

            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'densenet169':
            self.image_shape = 224
            self.preprocess = 'densenet'
            model = dn169(include_top=True, weights=self.encoder_weights,
                          input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'efficientnet5':
            self.image_shape = 456
            self.preprocess = 'efficientnet'
            model = enb5(include_top=True, weights=self.encoder_weights,
                         input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'efficientnet0':
            self.image_shape = 224
            self.preprocess = 'efficientnet'
            model = EfficientNetB0(include_top=True, weights=self.encoder_weights,
                                   input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'resnet50v2':
            self.image_shape = 224
            self.preprocess = 'resnet'
            model = rn50v2(include_top=True, weights=self.encoder_weights,
                           input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)
        elif encoder == 'inceptionresnet':
            self.image_shape = 299
            self.preprocess = 'inceptionresnet'
            model = incres(include_top=True, weights=self.encoder_weights,
                           input_shape=(self.image_shape, self.image_shape, 3))
            self.image_encoder = Model(
                inputs=model.input, outputs=model.get_layer('avg_pool').output)

        elif encoder == 'cotnet':
            self.image_shape = 224
            self.preprocess = 'cotnet'
            model = cotnet.CotNet50(pretrained="imagenet", num_classes=0)
            self.image_encoder = Model(
                inputs=model.input, outputs=model.output)

        else:
            print("You have to initialize a valid version of image encoder\n"
                  "Choices are: [densenet201, densenet121, densenet169, efficientnet0, efficientnet5, resnet50v2, inceptionresnet, cotnet]")
            print("Exiting...")
            return

    def get_preprocessor(self) -> str:
        """ Gets the pre-processing function

        Returns:
            str: The pre-processing name we initialized
        """
        return self.preprocess

    def get_image_shape(self) -> int:
        """ Gets the input shape

        Returns:
            int: The input shape for the employed encoder
        """
        return self.image_shape

    def get_image_encoder(self) -> Model:
        """ Gets the image encoder we built

        Returns:
            Model: The CNN encoder
        """
        return self.image_encoder

    def get_images_dirpath(self) -> str:
        """ Gets the image directory path to store our vectors

        Returns:
            str: The image directory path
        """
        return self.image_dir_path

    def encode(self, _image:str, verbose:int=0) -> np.array:
        """ Loads an image and it passes it in CNN encoder to extract its image embeddings.

        Args:
            _image (str): The image id, for which its image we want to encode
            verbose (int, optional): If we want to display the extraction. Defaults to 0.

        Returns:
            np.array: The encoded version of the given image
        """
        # case CoTNet
        if self.get_preprocessor() == 'cotnet':
            image = img.load_img(self.image_dir_path + _image + '.jpg')
            image_array = img.img_to_array(image)

            imm = tensorflow.keras.applications.imagenet_utils.preprocess_input(image_array, mode='torch')
            image_encoded = self.image_encoder(
                tensorflow.expand_dims(tensorflow.image.resize(imm, self.image_encoder.input_shape[1:3]), 0)).numpy()
        else:
            # case othe encoders
            # load the image and convert it to np.array
            image = img.load_img(self.image_dir_path + _image + '.jpg',
                                 target_size=(self.image_shape, self.image_shape))
            image_array = img.img_to_array(image)
            image_array = np.expand_dims(image_array, axis=0)
            # pre-process array in order to fit with the employed encoder
            if self.get_preprocessor() == 'densenet':
                preprocessed_image_array = dense_preprocess(image_array)
            elif self.get_preprocessor() == 'efficientnet':
                preprocessed_image_array = efficient_preprocess(image_array)
            elif self.get_preprocessor() == 'resnet':
                preprocessed_image_array = resnet_preprocess(image_array)
            elif self.get_preprocessor() == 'inceptionresnet':
                preprocessed_image_array = inceptionresnet_preprocess(
                    image_array)
            # extract image embeddings
            image_encoded = self.image_encoder.predict(preprocessed_image_array, verbose=verbose)
        return image_encoded

    def encode_images(self, images:list) -> np.array:
        """ Loads an image list with image ids, and extract their image embeddings

        Args:
            images (list): Image IDs list

        Returns:
            np.array: All image vectors
        """
        image_vecs = {_image: self.encode(_image) for _image in
                      tqdm(images, desc="Encoding images", position=0, leave=True)}
        return image_vecs