[0be6a8]: / bc-count / data.py

Download this file

477 lines (388 with data), 18.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
##############################################
# #
# Custom data generator #
# #
# Author: Amine Neggazi #
# Email: neggazimedlamine@gmail/com #
# Nick: nemo256 #
# #
# Please read bc-count/LICENSE #
# #
##############################################
import os
import json
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
# custom imports
from config import *
def load_image_list(img_files, gray=False):
'''
This is the load image list function, which loads an enumerate
of images (param: img_files)
:param img_files --> the input image files which we want to read
:return imgs --> the images that we read
'''
imgs = []
if gray:
for image_file in img_files:
img = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]
imgs += [img]
else:
for image_file in img_files:
imgs += [cv2.imread(image_file)]
return imgs
def clahe_images(img_list):
'''
This is the clahe images function, which applies a clahe threshold
the input image list.
:param img_files --> the input image files which we want to read
:return img_list --> the output images
'''
for i, img in enumerate(img_list):
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
lab[..., 0] = clahe.apply(lab[..., 0])
img_list[i] = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
return img_list
def preprocess_image(imgs, padding=padding[1]):
'''
This is the preprocess data function, which adds a padding to
the input images, masks and edges if there are any.
:param imgs --> the input list of images.
:param padding --> the input padding which is going to be applied.
:return imgs --> output images with added padding.
'''
imgs = [np.pad(img, ((padding, padding),
(padding, padding), (0, 0)), mode='constant') for img in imgs]
return imgs
def preprocess_data(imgs, mask, edge=None, padding=padding[1]):
'''
This is the preprocess data function, which adds a padding to
the input images, masks and edges if there are any.
:param imgs --> the input list of images.
:param mask --> the input list of masks.
:param edge --> the input list of edges.
:param padding --> the input padding which is going to be applied.
:return tuple(imgs, mask, edge if exists) --> output images, masks and edges with padding added.
'''
imgs = [np.pad(img, ((padding, padding),
(padding, padding), (0, 0)), mode='constant') for img in imgs]
mask = [np.pad(mask, ((padding, padding),
(padding, padding)), mode='constant') for mask in mask]
if edge is not None:
edge = [np.pad(edge, ((padding, padding),
(padding, padding)), mode='constant') for edge in edge]
if edge is not None:
return imgs, mask, edge
return imgs, mask
def load_data(img_list, mask_list, edge_list=None, padding=padding[1]):
'''
This is the load data function, which will handle image loading and preprocessing.
:param img_list --> list of input images
:param mask_list --> list of input masks
:param edge_list --> list of input edges
:param padding --> padding to be applied on preprocessing
:return tuple(imgs, masks and edges if exists) --> the output preprocessed imgs, masks and edges.
'''
imgs = load_image_list(img_list)
imgs = clahe_images(imgs)
mask = load_image_list(mask_list, gray=True)
if edge_list:
edge = load_image_list(edge_list, gray=True)
else:
edge = None
return preprocess_data(imgs, mask, edge, padding=padding)
def load_image(img_list, padding=padding[1]):
'''
This is the load data function, which will handle image loading and preprocessing.
:param img_list --> list of input images
:param padding --> padding to be applied on preprocessing
:return imgs --> the output preprocessed imgs.
'''
imgs = load_image_list(img_list)
imgs = clahe_images(imgs)
return preprocess_image(imgs, padding=padding)
def aug_lum(image, factor=None):
'''
This is the augment luminosity function, which we apply to
augment the luminosity of an input image.
:param image --> the input image we want to augment
:param factor --> the factor of luminosity augment (default is 0.5 * random number)
:return image --> the output luminosity augmented image
'''
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
hsv = hsv.astype(np.float64)
if factor is None:
lum_offset = 0.5 + np.random.uniform()
else:
lum_offset = factor
hsv[..., 2] = hsv[..., 2] * lum_offset
hsv[..., 2][hsv[..., 2] > 255] = 255
hsv = hsv.astype(np.uint8)
return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
def aug_img(image):
'''
This is the augment colors function, which we apply to
augment the colors of an given image.
:param image --> the input image we want to augment
:return image --> the output colors augmented image
'''
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
hsv = hsv.astype(np.float64)
hue_offset = 0.8 + 0.4*np.random.uniform()
sat_offset = 0.5 + np.random.uniform()
lum_offset = 0.5 + np.random.uniform()
hsv[..., 0] = hsv[..., 0] * hue_offset
hsv[..., 1] = hsv[..., 1] * sat_offset
hsv[..., 2] = hsv[..., 2] * lum_offset
hsv[..., 0][hsv[..., 0] > 255] = 255
hsv[..., 1][hsv[..., 1] > 255] = 255
hsv[..., 2][hsv[..., 2] > 255] = 255
hsv = hsv.astype(np.uint8)
return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
def train_generator(imgs, mask, edge=None,
scale_range=None,
padding=padding[1],
input_size=input_shape[0],
output_size=output_shape[0],
skip_empty=False):
'''
This is the train generator function, which generates the train dataset.
:param imgs --> the input images
:param mask --> the input masks
:param edge --> the input edges if there are any (red blood cells only)
:param scale_range --> the factor (i, j) of rescaling.
:param padding --> the padding which will be applied to each image
:param input_size --> the input shape
:param output_size --> the output shape
:param skip_empty --> skip empty chips (random if not set)
:return chips --> yields an image, mask and edge chip each time it gets executed (called)
'''
if scale_range is not None:
scale_range = [1 - scale_range, 1 + scale_range]
while True:
# select which type of cell to return
chip_type = np.random.choice([True, False])
while True:
# pick random image
i = np.random.randint(len(imgs))
# pick random central location in the image (200 + 196/2)
center_offset = padding + (output_size / 2)
x = np.random.randint(center_offset, imgs[i].shape[0] - center_offset)
y = np.random.randint(center_offset, imgs[i].shape[1] - center_offset)
# scale the box randomly from x0.8 - 1.2x original size
scale = 1
if scale_range is not None:
scale = scale_range[0] + ((scale_range[0] - scale_range[0]) * np.random.random())
# find the edges of a box around the image chip and the mask chip
chip_x_l = int(x - ((input_size / 2) * scale))
chip_x_r = int(x + ((input_size / 2) * scale))
chip_y_l = int(y - ((input_size / 2) * scale))
chip_y_r = int(y + ((input_size / 2) * scale))
mask_x_l = int(x - ((output_size / 2) * scale))
mask_x_r = int(x + ((output_size / 2) * scale))
mask_y_l = int(y - ((output_size / 2) * scale))
mask_y_r = int(y + ((output_size / 2) * scale))
# take a slice of the image and mask accordingly
temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
if edge is not None:
temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
if skip_empty:
if ((temp_mask > 0).sum() > 5) is chip_type:
continue
# resize the image chip back to 380 and the mask chip to 196
temp_chip = cv2.resize(temp_chip,
(input_size, input_size),
interpolation=cv2.INTER_CUBIC)
temp_mask = cv2.resize(temp_mask,
(output_size, output_size),
interpolation=cv2.INTER_NEAREST)
if edge is not None:
temp_edge = cv2.resize(temp_edge,
(output_size, output_size),
interpolation=cv2.INTER_NEAREST)
# randomly rotate (like below)
rot = np.random.randint(4)
temp_chip = np.rot90(temp_chip, k=rot, axes=(0, 1))
temp_mask = np.rot90(temp_mask, k=rot, axes=(0, 1))
if edge is not None:
temp_edge = np.rot90(temp_edge, k=rot, axes=(0, 1))
# randomly flip
if np.random.random() > 0.5:
temp_chip = np.flip(temp_chip, axis=1)
temp_mask = np.flip(temp_mask, axis=1)
if edge is not None:
temp_edge = np.flip(temp_edge, axis=1)
# randomly luminosity augment
temp_chip = aug_lum(temp_chip)
# randomly augment chip
temp_chip = aug_img(temp_chip)
# rescale the image
temp_chip = temp_chip.astype(np.float32) * 2
temp_chip /= 255
temp_chip -= 1
# later on ... randomly adjust colours
if edge is not None:
yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis],
(temp_edge > 0).astype(float)[..., np.newaxis])
else:
yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis])
break
def test_chips(imgs, mask,
edge=None,
padding=padding[1],
input_size=input_shape[0],
output_size=output_shape[0]):
'''
This is the test chips function, which generates the test dataset.
:param imgs --> the input images
:param mask --> the input masks
:param edge --> the input edges if there are any (red blood cells only)
:param padding --> the padding which will be applied to each image
:param input_size --> the input shape
:param output_size --> the output shape
:return chips --> yields an image, mask and edge chip each time it gets executed (called)
'''
center_offset = padding + (output_size / 2)
for i, _ in enumerate(imgs):
for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
chip_x_l = int(x - (input_size / 2))
chip_x_r = int(x + (input_size / 2))
chip_y_l = int(y - (input_size / 2))
chip_y_r = int(y + (input_size / 2))
mask_x_l = int(x - (output_size / 2))
mask_x_r = int(x + (output_size / 2))
mask_y_l = int(y - (output_size / 2))
mask_y_r = int(y + (output_size / 2))
temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
if edge is not None:
temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
temp_chip = temp_chip.astype(np.float32) * 2
temp_chip /= 255
temp_chip -= 1
if edge is not None:
yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis],
(temp_edge > 0).astype(float)[..., np.newaxis])
else:
yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis])
break
def slice_image(imgs,
padding=padding[1],
input_size=input_shape[0],
output_size=output_shape[0]):
'''
This is the slice function, which slices each image into image chips.
:param imgs --> the input images
:param padding --> the padding which will be applied to each image
:param input_size --> the input shape
:param output_size --> the output shape
:return list tuple (list, list, list) --> the tuple list of output (image, mask and edge chips)
'''
img_chips = []
center_offset = padding + (output_size / 2)
for i, _ in enumerate(imgs):
for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
chip_x_l = int(x - (input_size / 2))
chip_x_r = int(x + (input_size / 2))
chip_y_l = int(y - (input_size / 2))
chip_y_r = int(y + (input_size / 2))
temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
temp_chip = temp_chip.astype(np.float32) * 2
temp_chip /= 255
temp_chip -= 1
img_chips += [temp_chip]
return np.array(img_chips)
def slice(imgs, mask,
edge=None,
padding=padding[1],
input_size=input_shape[0],
output_size=output_shape[0]):
'''
This is the slice function, which slices each image into image chips.
:param imgs --> the input images
:param mask --> the input masks
:param edge --> the input edges if there are any (red blood cells only)
:param padding --> the padding which will be applied to each image
:param input_size --> the input shape
:param output_size --> the output shape
:return list tuple (list, list, list) --> the tuple list of output (image, mask and edge chips)
'''
img_chips = []
mask_chips = []
if edge is not None:
edge_chips = []
center_offset = padding + (output_size / 2)
for i, _ in enumerate(imgs):
for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
chip_x_l = int(x - (input_size / 2))
chip_x_r = int(x + (input_size / 2))
chip_y_l = int(y - (input_size / 2))
chip_y_r = int(y + (input_size / 2))
mask_x_l = int(x - (output_size / 2))
mask_x_r = int(x + (output_size / 2))
mask_y_l = int(y - (output_size / 2))
mask_y_r = int(y + (output_size / 2))
temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
if edge is not None:
temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
temp_chip = temp_chip.astype(np.float32) * 2
temp_chip /= 255
temp_chip -= 1
img_chips += [temp_chip]
mask_chips += [(temp_mask > 0).astype(float)[..., np.newaxis]]
if edge is not None:
edge_chips += [(temp_edge > 0).astype(float)[..., np.newaxis]]
img_chips = np.array(img_chips)
mask_chips = np.array(mask_chips)
if edge is not None:
edge_chips = np.array(edge_chips)
if edge is not None:
return img_chips, mask_chips, edge_chips
return img_chips, mask_chips
def generator(img_list, mask_list, edge_list=None, type='train'):
'''
This is the generator function, which provides the list of image, mask and edge lists to the train generator and test chips functions.
:param img_list --> the input list of images
:param mask_list --> the input list of masks
:param edge_list --> the input list of edges if there are any
:param type --> can be either train or test, used to determine which generator function is to be called
:return tensorflow dataset --> the output generated functions fed to tensorflow
'''
if cell_type == 'rbc':
img, mask, edge = load_data(img_list, mask_list, edge_list)
elif cell_type == 'wbc' or cell_type == 'plt':
img, mask = load_data(img_list, mask_list)
edge = None
def gen():
if type == 'train':
return train_generator(img, mask, edge,
padding=padding[0],
input_size=input_shape[0],
output_size=output_shape[0])
elif type == 'test':
return test_chips(img, mask, edge,
padding=padding[0],
input_size=input_shape[0],
output_size=output_shape[0])
# load train dataset to tensorflow for training
if cell_type == 'rbc':
return tf.data.Dataset.from_generator(
gen,
(tf.float64, ((tf.float64), (tf.float64))),
(input_shape, (output_shape, output_shape))
)
elif cell_type == 'wbc' or cell_type == 'plt':
return tf.data.Dataset.from_generator(
gen,
(tf.float64, (tf.float64)),
(input_shape, (output_shape))
)