Diff of /CellDetector.py [000000] .. [5021a4]

Switch to unified view

a b/CellDetector.py
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Thu Jul  9 08:30:11 2020
5
6
@author: Billy
7
"""
8
9
import time
10
import os
11
import glob
12
import math
13
import skimage.measure as measure
14
from skimage.segmentation import watershed
15
import pandas as pd
16
from PIL import Image
17
import numpy as np
18
import matplotlib.pyplot as plt
19
import cv2
20
import random
21
22
23
24
class CellDetector:
25
    
26
    #initialise the Cell Detector
27
    #This class converts AI outputs into Datasets
28
    #one must initialise this class with the folder of the AI outputs
29
    def __init__(self, SaveLoc,outputs = 11, save_prefix = 'predicted'):
30
        assert type(SaveLoc) == type('')
31
        
32
        if not os.path.exists(SaveLoc):
33
            raise ValueError("Save directory is illegitimate:", SaveLoc,". Please enter the full filepath of an existing directory containing CellSegmentor's outputs.\n")
34
        else:
35
            os.chdir(SaveLoc)
36
            self.net_images = glob.glob("*.png") + glob.glob("*.tif") 
37
            if len(self.net_images) == 0:
38
                raise ValueError("Directory", SaveLoc," exists, but there are no Neural Network outputs in this location.")
39
            self.net_images = [k for k in self.net_images if save_prefix in k]
40
            if len(self.net_images) == 0:
41
                raise ValueError("Directory", SaveLoc," exists, but there are no valid images in this directory. Ensure \'", save_prefix,"\' appears in the save name for all Neural Network outputs. This should occur by default; please don't change file names.\n")
42
        
43
        print("Found the following outputs:", self.net_images,"\n")
44
        self.outputs = outputs
45
        self.save_fold = SaveLoc
46
        
47
        
48
        
49
    #this function detects all of the cells within a layer of an AI output image and return a dataframe of those cells' properties
50
    #this function recievees the file location of an image as input.
51
    #
52
    #layers: 0 = basal, 1= prickle, 2 = superficial
53
    def layerDetection(self, imdir, layer = 1, cell_only=True, by_nucleus=False,
54
                       plot =[], subplot_shape=(0,0), param_open =1, 
55
                       param_close = 0, param_erosion=3):
56
        
57
        assert type(imdir) == type('')
58
        
59
        if subplot_shape == (0,0):
60
            subplot_shape = (int(len(plot)/2)+1, 2)
61
        elif not(subplot_shape[0]*subplot_shape[1] == len(plot) or subplot_shape[0]*subplot_shape[1] == len(plot)-1):
62
            print(subplot_shape)
63
            print(subplot_shape[0]*subplot_shape[1])
64
            print(len(plot))
65
            print(subplot_shape[0]*subplot_shape[1] != len(plot) or subplot_shape[0]*subplot_shape[1] != len(plot)-1)
66
            print("Input Subplot shape is invalid. Automatically being changed to:",(int(len(plot)/2), 2))
67
            subplot_shape = (int(len(plot)/2)+1, 2)
68
        
69
        if not os.path.exists(imdir):
70
            raise ValueError("Image directory is illegitimate:", imdir,"\n")
71
            
72
        dicter = {}
73
        dicter[0] = 'Basal'
74
        dicter[1] = 'Prickle'
75
        dicter[2] = 'Superficial'
76
        
77
        if not (layer in dicter):
78
            raise ValueError("'Layer' arg must be an int between 0-2, corresponding to the layers like such:",dicter)
79
            
80
        parent_dir, file = os.path.split(imdir)
81
        imdata = np.array(Image.open(imdir).convert('L')).astype('uint8')
82
        
83
        if layer == 0:
84
            image = np.where(np.logical_or(imdata==255,imdata==230),255,0).astype('uint8')
85
            nuclei = np.where(imdata==255,255,0).astype('uint8')
86
        elif layer ==1:
87
            image = np.where(np.logical_or(imdata==204,imdata==179),255,0).astype('uint8')
88
            nuclei = np.where(imdata==204,255,0).astype('uint8')
89
        else:
90
            image = np.where(np.logical_or(imdata==153,imdata==128),255,0).astype('uint8')
91
            nuclei = np.where(imdata==128,255,0).astype('uint8')
92
    
93
        kernel = np.ones((3,3),np.uint8)
94
        opening = cv2.morphologyEx(image,cv2.MORPH_CLOSE,kernel, iterations = param_open)
95
        erosion = cv2.erode(opening, kernel,iterations = param_erosion)
96
        closing = cv2.morphologyEx(erosion,cv2.MORPH_OPEN,kernel, iterations = param_close)
97
        
98
        if not by_nucleus:
99
            ret, markers = cv2.connectedComponents(closing)
100
        else:
101
            nuclei = cv2.morphologyEx(nuclei,cv2.MORPH_OPEN,kernel, iterations = 1)
102
            ret, markers = cv2.connectedComponents(nuclei)
103
104
        # Marker labelling
105
        markers = markers.astype(np.int32)
106
        
107
        labels = watershed(image, markers, mask=cv2.morphologyEx(image,cv2.MORPH_OPEN,kernel, iterations = 1))
108
        
109
        plot_dict = {'AI':imdata,'Watershed':labels,'Original':np.asarray(Image.open(os.path.join(parent_dir,file[10:]))), 'Markers':markers, 'Nuclei':nuclei, 'Binary':image}
110
        
111
        if len(plot)==0:
112
            pass
113
        elif len(plot) == 1:
114
            if plot[0] in plot_dict:
115
                plt.plot(plot_dict[plot[0]])
116
                plt.title(plot[0])
117
            else:
118
                print("Your desired plot",plot[0],"is not available. Chose from this list:",plot_dict.keys())
119
        else:
120
            
121
            index = 0
122
            while not (plot[index] in plot_dict.keys()):
123
                index+=1
124
                
125
            ax1 = plt.subplot(subplot_shape[0],subplot_shape[1], 1)
126
            ax1.title.set_text(plot[index])
127
            ax1.imshow(plot_dict[plot[index]])
128
                
129
            for i in range(len(plot)):
130
                i = i+index
131
                try:
132
                    plt.subplot(subplot_shape[0],subplot_shape[1],i+1, sharex=ax1, sharey=ax1)
133
                    plt.imshow(plot_dict[plot[i]])
134
                    plt.title(plot[i])
135
                except:
136
                    print(plot[i],"is not available to plot, so has been skipped. Only choose values from:",plot_dict.keys())
137
        
138
        table = measure.regionprops(labels)
139
        num_cells = len(table)
140
141
        if not cell_only:
142
            table_nuclei = measure.regionprops(watershed(markers, markers, mask=markers))
143
            table.extend(table_nuclei)
144
145
        lister =[]
146
        if layer == 0:
147
            layer='Basal'
148
        elif layer == 1:
149
            layer = 'Prickle'
150
        else:
151
            layer = 'Superficial'
152
        
153
        for i,cell in enumerate(table):
154
            entry ={}
155
            if i+1>num_cells:
156
                entry['Type'] = 'nucleus'
157
            else:
158
                entry['Type'] = 'cell'
159
            circ = (4*np.pi*cell['Area'])/(cell['Perimeter']**2)
160
            if circ >100:
161
                circ = np.nan
162
            area = (cell['convex_area']/2)+cell['perimeter']
163
            entry['Origin_Image'] = os.path.splitext(file)[0]
164
            entry['Layer'] = layer
165
            entry['Identifier'] = cell['label']
166
            entry['Area'] = area
167
            if circ == np.nan:
168
                entry['Perimeter'] = cell['perimeter']*1.1
169
            else:
170
                entry['Perimeter'] = math.sqrt((2*area)/circ)
171
            entry['Centroid_y'] = cell['centroid'][0]
172
            entry['Centroid_x'] = cell['centroid'][1]
173
            entry['Solidity'] = cell['solidity']
174
            entry['Major axis diameter'] = cell['major_axis_length']/2
175
            entry['Minor axis diameter'] = cell['minor_axis_length']/2
176
            entry['Circularity'] = circ
177
            lister.append(entry)
178
            
179
        return lister
180
    
181
    
182
    #extracts all of the cell data from each layer in an image and saves a dataframe of that data
183
    def predictOne(self, imdir = None,isRandom = False,cell_only=True, isOpt=True):
184
    
185
        if isOpt: optimise = [(0,3,0),(1,0,1),(1,2,1),(2,0,2),(1,3,2),(4,1,3),(1,3,0)]
186
        if imdir == None:
187
            if isRandom:
188
                img_choice = random.randint(0,len(self.net_images)-1)
189
            else:
190
                choice_dict = {}
191
                for i,file in enumerate(self.net_images):
192
                    choice_dict[i] = file
193
                ask_str = "Choose one of the following images:\n"+ str(choice_dict)+ " \n"
194
                img_choice = int(input(ask_str))
195
                
196
            imdir = os.path.join(self.save_fold, self.net_images[img_choice])
197
        
198
        data = []
199
        log = []
200
        if isOpt:
201
            for j in range(3):
202
                data_store = {}
203
                arr_store = {}
204
                if j ==0:
205
                    area_boundary = 100
206
                else:
207
                    area_boundary = 250
208
                for arangement in optimise:
209
                    opening,erosion,closing = arangement
210
                    datum = cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=False,
211
                                              param_open=opening, param_close = closing, param_erosion = erosion)
212
                    key = len(list(filter(lambda d: d['Area'] > area_boundary, datum)))
213
                    
214
                    while key in arr_store.keys():
215
                        key = key+0.01
216
                    data_store[key] =datum
217
                    arr_store[key] = str(arangement)+" False"
218
                    
219
                    if j ==0:
220
                        datum = cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=True,
221
                                                  param_open=opening, param_close = closing, param_erosion = erosion)
222
                        key = len(list(filter(lambda d: d['Area'] > area_boundary, datum)))
223
                        while key in arr_store.keys():
224
                            key = key+0.01
225
                        data_store[key] = datum
226
                        arr_store[key] = str(arangement)+" True"
227
                key = np.max(list(arr_store.keys()))
228
                log.append("Layer "+str(j)+", arrangment: "+arr_store[key])
229
                data.extend(data_store[key])
230
        else:
231
            for i in range(len(self.net_images)): 
232
                imdir = os.path.join(self.save_fold, self.net_images[i])
233
                for j in range(3):
234
                    data.extend(cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=False))
235
            
236
        df = pd.DataFrame(data)
237
        save = os.path.join(self.save_fold, "nc_measurements_"+os.path.splitext(os.path.split(imdir)[1])[0]+'.xlsx')
238
        df.to_excel(save)
239
        return df
240
    
241
    
242
    #extracts all of the cell data from each layer in each image in a folder and saves a dataframe of that data
243
    #this function recieves the file location of a folder of images as input
244
    def predictAll(self, save_loc=None,cell_only=True, isOpt=False):
245
        #The typical most-succesful configurations of image-tuning parameters
246
        if isOpt: optimise = [(0,3,0),(1,0,1),(1,2,1),(2,0,2),(1,3,2),(4,1,3),(1,3,0)]
247
            
248
        if save_loc == None:
249
            save_loc = self.save_fold
250
            
251
        if not os.path.exists(save_loc):
252
            raise ValueError("Save directory is illegitimate:", save_loc,". Please enter the full filepath of an existing directory containing CellSegmentor's outputs.\n")
253
        log = []
254
        if isOpt:
255
            for i in range(len(self.net_images)): 
256
                data = []
257
                imdir = os.path.join(self.save_fold, self.net_images[i])
258
                print("Scraping cell data from:",imdir)
259
                for j in range(3):
260
                    data_store = {}
261
                    arr_store = {}
262
                    if j ==0:
263
                        area_boundary = 70
264
                    else:
265
                        area_boundary = 250
266
                    for arangement in optimise:
267
                        opening,erosion,closing = arangement
268
                        datum = cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=False,
269
                                                  param_open=opening, param_close = closing, param_erosion = erosion)
270
                        key = len(list(filter(lambda d: d['Area'] > area_boundary, datum)))
271
                        
272
                        while key in arr_store.keys():
273
                            key = key+0.01
274
                        data_store[key] =datum
275
                        arr_store[key] = str(arangement)+" False"
276
                        
277
                        if j ==0:
278
                            datum = cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=True,
279
                                                      param_open=opening, param_close = closing, param_erosion = erosion)
280
                            key = len(list(filter(lambda d: d['Area'] > area_boundary, datum)))
281
                            while key in arr_store.keys():
282
                                key = key+0.01
283
                            data_store[key] = datum
284
                            arr_store[key] = str(arangement)+" True"
285
                    key = np.max(list(arr_store.keys()))
286
                    log.append("Layer "+str(j)+", arrangment: "+arr_store[key])
287
                    data.extend(data_store[key])
288
                    
289
                df = pd.DataFrame(data)
290
                save = os.path.join(save_loc, "nc_measurements_"+os.path.splitext(self.net_images[i])[0]+'.xlsx')
291
                df.to_excel(save)
292
                print("Successfully scraped data. Saving to... ",save)
293
        else:
294
            for i in range(len(self.net_images)): 
295
                data = []
296
                imdir = os.path.join(self.save_fold, self.net_images[i])
297
                for j in range(3):
298
                    data.extend(cD.layerDetection(imdir, layer=j, cell_only=cell_only, by_nucleus=False))
299
                df = pd.DataFrame(data)
300
                save = os.path.join(save_loc, "nc_measurements_"+os.path.splitext(self.net_images[i])[0]+'.xlsx')
301
                df.to_excel(save)
302
                print("Successfully scraped data. Saving to... ",save)
303
        print(log)
304
        return df
305
    
306
    
307
    #aims to clean data, by destroying anaomalous results in a given dataset. This function recieves the file location of the dataset as input.
308
    # anomalous results are defined by having highly atypical cell area, perimeter and circularity characterisitics.
309
    def dataCleaner(self,save_loc = None,filename=None,cell_only=True, basal_circularity_bounds=(0.5,1.05), basal_area_bounds = (40,350),
310
                    prickle_circularity_bounds = (0.5,1.05),prickle_area_bounds=(60,1400), superficial_circularity_bounds=(0.5,1.05),
311
                    superficial_area_bounds=(60,1500), solidity_bound = 0.7):
312
        
313
            
314
        if save_loc == None:
315
            save_loc = self.save_fold
316
            
317
        if not os.path.exists(save_loc):
318
            raise ValueError("Save directory is illegitimate:", save_loc,". Please enter the full filepath of an existing directory containing CellSegmentor's outputs.\n")
319
        
320
        if filename != None:
321
            open_ = os.path.join(save_loc, filename)
322
            non_cleaned = [open_]
323
        else:
324
            os.chdir(save_loc)
325
            non_cleaned = glob.glob("*xlsx*")
326
            non_cleaned = [k for k in non_cleaned if 'nc_measurement' in k]
327
            
328
        
329
        print(non_cleaned)
330
        layers = ['Basal','Prickle','Superficial']
331
        cyto_bounds =[[basal_circularity_bounds,basal_area_bounds],
332
                       [prickle_circularity_bounds, prickle_area_bounds],
333
                       [superficial_circularity_bounds, superficial_area_bounds]]
334
        
335
        nuc_bounds = [[basal_circularity_bounds,(basal_area_bounds[0]/1.5,basal_area_bounds[1])],
336
                       [prickle_circularity_bounds, (prickle_area_bounds[0]/1.5, prickle_area_bounds[1]/1.5)],
337
                       [superficial_circularity_bounds, (superficial_area_bounds[0]*1.5,superficial_area_bounds[1]/1.5) ]]
338
        
339
        for image in non_cleaned:
340
            non_clean =  os.path.join(save_loc,image)
341
            df = pd.read_excel( non_clean,index=False)
342
            save_name = os.path.join(save_loc,"clean_data_"+os.path.splitext(image)[0].replace("nc_measurements_","")+'.xlsx')
343
            cell_store = {'Basal':0, 'Prickle':0, 'Superficial':0}
344
            for i,layer in enumerate(layers):
345
                cells = df[(df['Layer']==layer) & (df['Type'] == 'cell')]
346
                cells.reset_index(drop=True, inplace=True)
347
                cells = cells[(cells['Circularity']>cyto_bounds[i][0][0]) & (cells['Circularity']<cyto_bounds[i][0][1])]
348
                cells = cells[(cells['Area']>cyto_bounds[i][1][0]) & (cells['Area']<cyto_bounds[i][1][1])]
349
                cells = cells[(cells['Circularity']>cyto_bounds[i][0][0]*1.1) & (cells['Area']<cyto_bounds[i][1][1]*0.9)]
350
                cells = cells[(cells['Circularity']<cyto_bounds[i][0][1]*0.9) & (cells['Area']>cyto_bounds[i][1][0]*1.1)]
351
                cells = cells[((cells['Solidity']>solidity_bound) | (cells['Area']> 2*cyto_bounds[i][1][0]))]
352
                cell_store[layer] = cells
353
                
354
            if not cell_only:
355
                nuclei_store = {'Basal':0, 'Prickle':0, 'Superficial':0}
356
                for i,layer in enumerate(layers):
357
                    nuclei = df[(df['Layer']==layer) & (df['Type'] == 'nucleus')]
358
                    nuclei.reset_index(drop=True, inplace=True) 
359
                    nuclei = nuclei[(nuclei['Circularity']>nuc_bounds[i][0][0]) & (nuclei['Circularity']<nuc_bounds[i][0][1])]
360
                    nuclei = nuclei[(nuclei['Area']>nuc_bounds[i][1][0]) & (nuclei['Area']<nuc_bounds[i][1][1])]
361
                    nuclei = nuclei[(nuclei['Circularity']>nuc_bounds[i][0][0]*1.1) & (nuclei['Area']<nuc_bounds[i][1][1]*0.9)]
362
                    nuclei = nuclei[(nuclei['Circularity']<nuc_bounds[i][0][1]*0.9) & (nuclei['Area']>nuc_bounds[i][1][0]*1.1)]
363
                    nuclei = nuclei[((nuclei['Solidity']>solidity_bound) | (nuclei['Area']> 2*cyto_bounds[i][1][0]))]
364
                    nuclei_store[layer] = nuclei
365
                frames = list(cell_store.values())+list(nuclei_store.values())
366
            else:
367
                frames = list(cell_store.values())
368
            output_df = pd.concat(frames)
369
            output_df.to_excel(save_name)
370
            os.remove(non_clean)
371
    
372
        
373
if __name__ == '__main__':
374
    output_loc = "C:\\Users\\Billy\\Downloads\\Prepared_SVS"
375
    picLoc = os.path.join(output_loc, 'predicted_cropped_Normal buccal mucosa_0_mag20.png')
376
    
377
    cD = CellDetector(output_loc)
378
    time.sleep(2)
379
    
380
    cD.predictOne(picLoc)
381
382
    cD.predictAll(output_loc, isOpt=True, cell_only=False)
383
    
384
    cD.layerDetection(picLoc,layer=0, plot=['AI','Watershed','Original','Binary'],
385
                      by_nucleus=False, cell_only=True,subplot_shape=(2,2),
386
                      param_open=0,  param_erosion = 3, param_close = 0)
387
    
388
    data = cD.dataCleaner(output_loc, cell_only=False)
389
390
    
391
    
392