Diff of /bc-count/data.py [000000] .. [0be6a8]

Switch to unified view

a b/bc-count/data.py
1
##############################################
2
#                                            #
3
#           Custom data generator            #
4
#                                            #
5
# Author: Amine Neggazi                      #
6
# Email: neggazimedlamine@gmail/com          #
7
# Nick: nemo256                              #
8
#                                            #
9
# Please read bc-count/LICENSE               #
10
#                                            #
11
##############################################
12
13
import os
14
import json
15
16
import cv2
17
import numpy as np
18
import tensorflow as tf
19
from tensorflow import keras
20
21
# custom imports
22
from config import *
23
24
25
def load_image_list(img_files, gray=False):
26
    '''
27
    This is the load image list function, which loads an enumerate
28
    of images (param: img_files)
29
    :param img_files --> the input image files which we want to read
30
31
    :return imgs --> the images that we read
32
    '''
33
    imgs = []
34
    if gray:
35
        for image_file in img_files:
36
            img = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
37
            img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]
38
            imgs += [img]
39
40
    else:
41
        for image_file in img_files:
42
            imgs += [cv2.imread(image_file)]
43
    return imgs
44
45
46
def clahe_images(img_list):
47
    '''
48
    This is the clahe images function, which applies a clahe threshold
49
    the input image list.
50
    :param img_files --> the input image files which we want to read
51
52
    :return img_list --> the output images
53
    '''
54
    for i, img in enumerate(img_list):
55
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
56
57
        lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
58
        lab[..., 0] = clahe.apply(lab[..., 0])
59
        img_list[i] = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
60
    return img_list
61
62
63
def preprocess_image(imgs, padding=padding[1]):
64
    '''
65
    This is the preprocess data function, which adds a padding to 
66
    the input images, masks and edges if there are any.
67
    :param imgs --> the input list of images.
68
    :param padding --> the input padding which is going to be applied.
69
70
    :return imgs --> output images with added padding.
71
    '''
72
    imgs = [np.pad(img, ((padding, padding),
73
                         (padding, padding), (0, 0)), mode='constant') for img in imgs]
74
    return imgs
75
76
77
def preprocess_data(imgs, mask, edge=None, padding=padding[1]):
78
    '''
79
    This is the preprocess data function, which adds a padding to 
80
    the input images, masks and edges if there are any.
81
    :param imgs --> the input list of images.
82
    :param mask --> the input list of masks.
83
    :param edge --> the input list of edges.
84
    :param padding --> the input padding which is going to be applied.
85
86
    :return tuple(imgs, mask, edge if exists) --> output images, masks and edges with padding added.
87
    '''
88
    imgs = [np.pad(img, ((padding, padding),
89
                         (padding, padding), (0, 0)), mode='constant') for img in imgs]
90
    mask = [np.pad(mask, ((padding, padding),
91
                          (padding, padding)), mode='constant') for mask in mask]
92
    if edge is not None:
93
        edge = [np.pad(edge, ((padding, padding),
94
                              (padding, padding)), mode='constant') for edge in edge]
95
96
    if edge is not None:
97
        return imgs, mask, edge
98
99
    return imgs, mask
100
101
102
def load_data(img_list, mask_list, edge_list=None, padding=padding[1]):
103
    '''
104
    This is the load data function, which will handle image loading and preprocessing.
105
    :param img_list --> list of input images
106
    :param mask_list --> list of input masks
107
    :param edge_list --> list of input edges
108
    :param padding --> padding to be applied on preprocessing
109
110
    :return tuple(imgs, masks and edges if exists) --> the output preprocessed imgs, masks and edges.
111
    '''
112
    imgs = load_image_list(img_list)
113
    imgs = clahe_images(imgs)
114
115
    mask = load_image_list(mask_list, gray=True)
116
    if edge_list:
117
        edge = load_image_list(edge_list, gray=True)
118
    else:
119
        edge = None
120
121
    return preprocess_data(imgs, mask, edge, padding=padding)
122
123
124
def load_image(img_list, padding=padding[1]):
125
    '''
126
    This is the load data function, which will handle image loading and preprocessing.
127
    :param img_list --> list of input images
128
    :param padding --> padding to be applied on preprocessing
129
130
    :return imgs --> the output preprocessed imgs.
131
    '''
132
    imgs = load_image_list(img_list)
133
    imgs = clahe_images(imgs)
134
    return preprocess_image(imgs, padding=padding)
135
136
137
def aug_lum(image, factor=None):
138
    '''
139
    This is the augment luminosity function, which we apply to
140
    augment the luminosity of an input image.
141
    :param image --> the input image we want to augment
142
    :param factor --> the factor of luminosity augment (default is 0.5 * random number)
143
144
    :return image --> the output luminosity augmented image
145
    '''
146
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
147
    hsv = hsv.astype(np.float64)
148
149
    if factor is None:
150
        lum_offset = 0.5 + np.random.uniform()
151
    else:
152
        lum_offset = factor
153
154
    hsv[..., 2] = hsv[..., 2] * lum_offset
155
    hsv[..., 2][hsv[..., 2] > 255] = 255
156
    hsv = hsv.astype(np.uint8)
157
158
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
159
160
161
def aug_img(image):
162
    '''
163
    This is the augment colors function, which we apply to
164
    augment the colors of an given image.
165
    :param image --> the input image we want to augment
166
167
    :return image --> the output colors augmented image
168
    '''
169
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
170
    hsv = hsv.astype(np.float64)
171
172
    hue_offset = 0.8 + 0.4*np.random.uniform()
173
    sat_offset = 0.5 + np.random.uniform()
174
    lum_offset = 0.5 + np.random.uniform()
175
176
    hsv[..., 0] = hsv[..., 0] * hue_offset
177
    hsv[..., 1] = hsv[..., 1] * sat_offset
178
    hsv[..., 2] = hsv[..., 2] * lum_offset
179
180
    hsv[..., 0][hsv[..., 0] > 255] = 255
181
    hsv[..., 1][hsv[..., 1] > 255] = 255
182
    hsv[..., 2][hsv[..., 2] > 255] = 255
183
184
    hsv = hsv.astype(np.uint8)
185
186
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
187
188
189
def train_generator(imgs, mask, edge=None,
190
                    scale_range=None,
191
                    padding=padding[1],
192
                    input_size=input_shape[0],
193
                    output_size=output_shape[0],
194
                    skip_empty=False):
195
    '''
196
    This is the train generator function, which generates the train dataset.
197
    :param imgs --> the input images
198
    :param mask --> the input masks
199
    :param edge --> the input edges if there are any (red blood cells only)
200
    :param scale_range --> the factor (i, j) of rescaling.
201
    :param padding --> the padding which will be applied to each image
202
    :param input_size --> the input shape
203
    :param output_size --> the output shape
204
    :param skip_empty --> skip empty chips (random if not set)
205
206
    :return chips --> yields an image, mask and edge chip each time it gets executed (called)
207
    '''
208
    if scale_range is not None:
209
        scale_range = [1 - scale_range, 1 + scale_range]
210
    while True:
211
        # select which type of cell to return
212
        chip_type = np.random.choice([True, False])
213
214
        while True:
215
            # pick random image
216
            i = np.random.randint(len(imgs))
217
218
            # pick random central location in the image (200 + 196/2)
219
            center_offset = padding + (output_size / 2)
220
            x = np.random.randint(center_offset, imgs[i].shape[0] - center_offset)
221
            y = np.random.randint(center_offset, imgs[i].shape[1] - center_offset)
222
223
            # scale the box randomly from x0.8 - 1.2x original size
224
            scale = 1
225
            if scale_range is not None:
226
                scale = scale_range[0] + ((scale_range[0] - scale_range[0]) * np.random.random())
227
228
            # find the edges of a box around the image chip and the mask chip
229
            chip_x_l = int(x - ((input_size / 2) * scale))
230
            chip_x_r = int(x + ((input_size / 2) * scale))
231
            chip_y_l = int(y - ((input_size / 2) * scale))
232
            chip_y_r = int(y + ((input_size / 2) * scale))
233
234
            mask_x_l = int(x - ((output_size / 2) * scale))
235
            mask_x_r = int(x + ((output_size / 2) * scale))
236
            mask_y_l = int(y - ((output_size / 2) * scale))
237
            mask_y_r = int(y + ((output_size / 2) * scale))
238
239
            # take a slice of the image and mask accordingly
240
            temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
241
            temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
242
            if edge is not None:
243
                temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
244
245
            if skip_empty:
246
                if ((temp_mask > 0).sum() > 5) is chip_type:
247
                    continue
248
249
            # resize the image chip back to 380 and the mask chip to 196
250
            temp_chip = cv2.resize(temp_chip,
251
                                   (input_size, input_size),
252
                                   interpolation=cv2.INTER_CUBIC)
253
            temp_mask = cv2.resize(temp_mask,
254
                                   (output_size, output_size),
255
                                   interpolation=cv2.INTER_NEAREST)
256
            if edge is not None:
257
                temp_edge = cv2.resize(temp_edge,
258
                                       (output_size, output_size),
259
                                       interpolation=cv2.INTER_NEAREST)
260
261
            # randomly rotate (like below)
262
            rot = np.random.randint(4)
263
            temp_chip = np.rot90(temp_chip, k=rot, axes=(0, 1))
264
            temp_mask = np.rot90(temp_mask, k=rot, axes=(0, 1))
265
            if edge is not None:
266
                temp_edge = np.rot90(temp_edge, k=rot, axes=(0, 1))
267
268
            # randomly flip
269
            if np.random.random() > 0.5:
270
                temp_chip = np.flip(temp_chip, axis=1)
271
                temp_mask = np.flip(temp_mask, axis=1)
272
                if edge is not None:
273
                    temp_edge = np.flip(temp_edge, axis=1)
274
275
            # randomly luminosity augment
276
            temp_chip = aug_lum(temp_chip)
277
278
            # randomly augment chip
279
            temp_chip = aug_img(temp_chip)
280
281
            # rescale the image
282
            temp_chip = temp_chip.astype(np.float32) * 2
283
            temp_chip /= 255
284
            temp_chip -= 1
285
286
            # later on ... randomly adjust colours
287
            if edge is not None:
288
                yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis], 
289
                                  (temp_edge > 0).astype(float)[..., np.newaxis])
290
            else:
291
                yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis])
292
            break
293
294
295
def test_chips(imgs, mask,
296
               edge=None,
297
               padding=padding[1],
298
               input_size=input_shape[0],
299
               output_size=output_shape[0]):
300
    '''
301
    This is the test chips function, which generates the test dataset.
302
    :param imgs --> the input images
303
    :param mask --> the input masks
304
    :param edge --> the input edges if there are any (red blood cells only)
305
    :param padding --> the padding which will be applied to each image
306
    :param input_size --> the input shape
307
    :param output_size --> the output shape
308
309
    :return chips --> yields an image, mask and edge chip each time it gets executed (called)
310
    '''
311
    center_offset = padding + (output_size / 2)
312
    for i, _ in enumerate(imgs):
313
        for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
314
            for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
315
                chip_x_l = int(x - (input_size / 2))
316
                chip_x_r = int(x + (input_size / 2))
317
                chip_y_l = int(y - (input_size / 2))
318
                chip_y_r = int(y + (input_size / 2))
319
320
                mask_x_l = int(x - (output_size / 2))
321
                mask_x_r = int(x + (output_size / 2))
322
                mask_y_l = int(y - (output_size / 2))
323
                mask_y_r = int(y + (output_size / 2))
324
325
                temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
326
                temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
327
                if edge is not None:
328
                    temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
329
330
                temp_chip = temp_chip.astype(np.float32) * 2
331
                temp_chip /= 255
332
                temp_chip -= 1
333
334
                if edge is not None:
335
                    yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis], 
336
                                      (temp_edge > 0).astype(float)[..., np.newaxis])
337
                else:
338
                    yield temp_chip, ((temp_mask > 0).astype(float)[..., np.newaxis])
339
                break
340
341
342
def slice_image(imgs,
343
                padding=padding[1],
344
                input_size=input_shape[0],
345
                output_size=output_shape[0]):
346
    '''
347
    This is the slice function, which slices each image into image chips.
348
    :param imgs --> the input images
349
    :param padding --> the padding which will be applied to each image
350
    :param input_size --> the input shape
351
    :param output_size --> the output shape
352
353
    :return list tuple (list, list, list) --> the tuple list of output (image, mask and edge chips)
354
    '''
355
    img_chips = []
356
357
    center_offset = padding + (output_size / 2)
358
    for i, _ in enumerate(imgs):
359
        for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
360
            for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
361
                chip_x_l = int(x - (input_size / 2))
362
                chip_x_r = int(x + (input_size / 2))
363
                chip_y_l = int(y - (input_size / 2))
364
                chip_y_r = int(y + (input_size / 2))
365
366
                temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
367
368
                temp_chip = temp_chip.astype(np.float32) * 2
369
                temp_chip /= 255
370
                temp_chip -= 1
371
372
                img_chips += [temp_chip]
373
    return np.array(img_chips)
374
375
376
def slice(imgs, mask,
377
          edge=None,
378
          padding=padding[1],
379
          input_size=input_shape[0],
380
          output_size=output_shape[0]):
381
    '''
382
    This is the slice function, which slices each image into image chips.
383
    :param imgs --> the input images
384
    :param mask --> the input masks
385
    :param edge --> the input edges if there are any (red blood cells only)
386
    :param padding --> the padding which will be applied to each image
387
    :param input_size --> the input shape
388
    :param output_size --> the output shape
389
390
    :return list tuple (list, list, list) --> the tuple list of output (image, mask and edge chips)
391
    '''
392
    img_chips = []
393
    mask_chips = []
394
    if edge is not None:
395
        edge_chips = []
396
397
    center_offset = padding + (output_size / 2)
398
    for i, _ in enumerate(imgs):
399
        for x in np.arange(center_offset, imgs[i].shape[0] - input_size / 2, output_size):
400
            for y in np.arange(center_offset, imgs[i].shape[1] - input_size / 2, output_size):
401
                chip_x_l = int(x - (input_size / 2))
402
                chip_x_r = int(x + (input_size / 2))
403
                chip_y_l = int(y - (input_size / 2))
404
                chip_y_r = int(y + (input_size / 2))
405
406
                mask_x_l = int(x - (output_size / 2))
407
                mask_x_r = int(x + (output_size / 2))
408
                mask_y_l = int(y - (output_size / 2))
409
                mask_y_r = int(y + (output_size / 2))
410
411
                temp_chip = imgs[i][chip_x_l:chip_x_r, chip_y_l:chip_y_r]
412
                temp_mask = mask[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
413
                if edge is not None:
414
                    temp_edge = edge[i][mask_x_l:mask_x_r, mask_y_l:mask_y_r]
415
416
                temp_chip = temp_chip.astype(np.float32) * 2
417
                temp_chip /= 255
418
                temp_chip -= 1
419
420
                img_chips += [temp_chip]
421
                mask_chips += [(temp_mask > 0).astype(float)[..., np.newaxis]]
422
                if edge is not None:
423
                    edge_chips += [(temp_edge > 0).astype(float)[..., np.newaxis]]
424
425
    img_chips = np.array(img_chips)
426
    mask_chips = np.array(mask_chips)
427
    if edge is not None:
428
        edge_chips = np.array(edge_chips)
429
430
    if edge is not None:
431
        return img_chips, mask_chips, edge_chips
432
433
    return img_chips, mask_chips
434
435
436
def generator(img_list, mask_list, edge_list=None, type='train'):
437
    '''
438
    This is the generator function, which provides the list of image, mask and edge lists to the train generator and test chips functions.
439
    :param img_list --> the input list of images
440
    :param mask_list --> the input list of masks
441
    :param edge_list --> the input list of edges if there are any
442
    :param type --> can be either train or test, used to determine which generator function is to be called
443
444
    :return tensorflow dataset --> the output generated functions fed to tensorflow
445
    '''
446
    if cell_type == 'rbc':
447
        img, mask, edge = load_data(img_list, mask_list, edge_list)
448
    elif cell_type == 'wbc' or cell_type == 'plt':
449
        img, mask = load_data(img_list, mask_list)
450
        edge = None
451
452
    def gen():
453
        if type == 'train':
454
            return train_generator(img, mask, edge,
455
                                   padding=padding[0],
456
                                   input_size=input_shape[0],
457
                                   output_size=output_shape[0])
458
        elif type == 'test':
459
            return test_chips(img, mask, edge,
460
                              padding=padding[0],
461
                              input_size=input_shape[0],
462
                              output_size=output_shape[0])
463
464
    # load train dataset to tensorflow for training
465
    if cell_type == 'rbc':
466
        return tf.data.Dataset.from_generator(
467
            gen,
468
            (tf.float64, ((tf.float64), (tf.float64))),
469
            (input_shape, (output_shape, output_shape))
470
        )
471
    elif cell_type == 'wbc' or cell_type == 'plt':
472
        return tf.data.Dataset.from_generator(
473
            gen,
474
            (tf.float64, (tf.float64)),
475
            (input_shape, (output_shape))
476
        )