[03245f]: / modules / image_encoder.py

Download this file

230 lines (189 with data), 9.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# os imports
import os
# numopy and progress bar imports
import numpy as np
from tqdm import tqdm
import pickle
# tensorflow imports
import tensorflow
# CNN image encoders
from tensorflow.keras.applications.densenet import DenseNet201 as dn201
from tensorflow.keras.applications.densenet import DenseNet121 as dn121
from tensorflow.keras.applications.densenet import DenseNet169 as dn169
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.efficientnet import EfficientNetB5 as enb5
from tensorflow.keras.applications.efficientnet import EfficientNetB7 as enb7
from tensorflow.keras.applications.resnet_v2 import ResNet50V2 as rn50v2
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2 as incres
# preprocessing functions
from tensorflow.keras.applications.densenet import preprocess_input as dense_preprocess
from tensorflow.keras.applications.efficientnet import preprocess_input as efficient_preprocess
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_preprocess
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inceptionresnet_preprocess
# layers imports
from tensorflow.keras.layers import Flatten, Dropout, Dense, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image as img
# cotnet import
# from keras_cv_attention_models import cotnet
def load_encoded_vecs(filename:str) -> dict:
""" Loads the image embeddings for each image id, we extracted offline during my research
Args:
filename (str): the whole path of npy file
Returns:
dict: encoded_vectors from filename
"""
with open(filename, 'rb') as f:
print("Image Encoded Vectors loaded from directory path:", filename)
return pickle.load(f)
def save_encoded_vecs(image_vecs:np.array, output_path:str, filename:str) -> None:
""" Function which helps us to save the encoded images into a pickle file
Args:
image_vecs (np.array): the encoded images vectors that we extracted using the encode_images function
output_path (str): the output path where we want to save our image embeddings
filename (str): a name we want to use for our npy file (ex. densenet201_image_vecs). It's not necessary to write '.pkl' at the end!
"""
path = output_path + filename + '.pkl'
with open(path, 'wb') as f:
pickle.dump(image_vecs, f, pickle.HIGHEST_PROTOCOL)
print("Image Encoded Vectors stored in:", path)
class ImageEncoder:
def __init__(self, encoder:str, images_dir_path:str, weights:str='imagenet'):
""" This class helps us to extract image embeddings with different Keras CNNs.
Args:
encoder (str): encoder name you want to use (ex. densenet201 for DenseNet201)
images_dir_path (str): The directory to store our extracted vectors
weights (str, optional): the pretrained weights you want to use for your model. It's common to use imagenet as default pretrained weights.. Defaults to 'imagenet'.
"""
self.encoder_weights = weights
self.image_dir_path = images_dir_path
# we extracted the last average pooling layer for each encoder
if encoder == 'densenet201':
self.image_shape = 224
self.preprocess = 'densenet'
model = dn201(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'densenet121':
self.image_shape = 224
self.preprocess = 'densenet'
model = dn121(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'densenet169':
self.image_shape = 224
self.preprocess = 'densenet'
model = dn169(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'efficientnet5':
self.image_shape = 456
self.preprocess = 'efficientnet'
model = enb5(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'efficientnet0':
self.image_shape = 224
self.preprocess = 'efficientnet'
model = EfficientNetB0(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'resnet50v2':
self.image_shape = 224
self.preprocess = 'resnet'
model = rn50v2(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'inceptionresnet':
self.image_shape = 299
self.preprocess = 'inceptionresnet'
model = incres(include_top=True, weights=self.encoder_weights,
input_shape=(self.image_shape, self.image_shape, 3))
self.image_encoder = Model(
inputs=model.input, outputs=model.get_layer('avg_pool').output)
elif encoder == 'cotnet':
self.image_shape = 224
self.preprocess = 'cotnet'
model = cotnet.CotNet50(pretrained="imagenet", num_classes=0)
self.image_encoder = Model(
inputs=model.input, outputs=model.output)
else:
print("You have to initialize a valid version of image encoder\n"
"Choices are: [densenet201, densenet121, densenet169, efficientnet0, efficientnet5, resnet50v2, inceptionresnet, cotnet]")
print("Exiting...")
return
def get_preprocessor(self) -> str:
""" Gets the pre-processing function
Returns:
str: The pre-processing name we initialized
"""
return self.preprocess
def get_image_shape(self) -> int:
""" Gets the input shape
Returns:
int: The input shape for the employed encoder
"""
return self.image_shape
def get_image_encoder(self) -> Model:
""" Gets the image encoder we built
Returns:
Model: The CNN encoder
"""
return self.image_encoder
def get_images_dirpath(self) -> str:
""" Gets the image directory path to store our vectors
Returns:
str: The image directory path
"""
return self.image_dir_path
def encode(self, _image:str, verbose:int=0) -> np.array:
""" Loads an image and it passes it in CNN encoder to extract its image embeddings.
Args:
_image (str): The image id, for which its image we want to encode
verbose (int, optional): If we want to display the extraction. Defaults to 0.
Returns:
np.array: The encoded version of the given image
"""
# case CoTNet
if self.get_preprocessor() == 'cotnet':
image = img.load_img(self.image_dir_path + _image + '.jpg')
image_array = img.img_to_array(image)
imm = tensorflow.keras.applications.imagenet_utils.preprocess_input(image_array, mode='torch')
image_encoded = self.image_encoder(
tensorflow.expand_dims(tensorflow.image.resize(imm, self.image_encoder.input_shape[1:3]), 0)).numpy()
else:
# case othe encoders
# load the image and convert it to np.array
image = img.load_img(self.image_dir_path + _image + '.jpg',
target_size=(self.image_shape, self.image_shape))
image_array = img.img_to_array(image)
image_array = np.expand_dims(image_array, axis=0)
# pre-process array in order to fit with the employed encoder
if self.get_preprocessor() == 'densenet':
preprocessed_image_array = dense_preprocess(image_array)
elif self.get_preprocessor() == 'efficientnet':
preprocessed_image_array = efficient_preprocess(image_array)
elif self.get_preprocessor() == 'resnet':
preprocessed_image_array = resnet_preprocess(image_array)
elif self.get_preprocessor() == 'inceptionresnet':
preprocessed_image_array = inceptionresnet_preprocess(
image_array)
# extract image embeddings
image_encoded = self.image_encoder.predict(preprocessed_image_array, verbose=verbose)
return image_encoded
def encode_images(self, images:list) -> np.array:
""" Loads an image list with image ids, and extract their image embeddings
Args:
images (list): Image IDs list
Returns:
np.array: All image vectors
"""
image_vecs = {_image: self.encode(_image) for _image in
tqdm(images, desc="Encoding images", position=0, leave=True)}
return image_vecs