Diff of /DataPreparation.py [000000] .. [5021a4]

Switch to unified view

a b/DataPreparation.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Jul 10 13:14:31 2020
4
5
@author: Billy
6
"""
7
8
import slideio
9
import glob
10
import os
11
import time
12
import numpy as np
13
import gc
14
import progressbar
15
from PIL import Image
16
from skimage import filters
17
18
19
class DataPreparation:
20
    
21
    #this function initialises a data cleaner
22
    #the job of this class is to generate x20 magnification .png images from svs slides,
23
    #whilst cutting out as much background and non-epithelial area as possible.
24
    #
25
    #this function needs the location of the folder that contains svs images as an input
26
    def __init__(self, svs_loc, png_loc = None):
27
        assert type(svs_loc) == type('')
28
        
29
    
30
        if not os.path.exists(svs_loc):
31
            raise ValueError("SVS directory is illegitimate:", svs_loc,". Please enter the full filepath of an existing directory containing .svs files.\n")
32
        else:
33
            os.chdir(svs_loc)
34
            self.svs_images = glob.glob("*.svs")
35
            if len(self.svs_images) == 0:
36
                raise ValueError("Directory", svs_loc," exists, but there are no .svs images in this location.")
37
            self.svs_loc = svs_loc
38
39
        print("Found the following outputs:", self.svs_images,"\n")
40
        
41
        if png_loc == None:
42
            parent_dir, dir_ = os.path.split(svs_loc)
43
            png_loc = os.path.join(parent_dir,"Prepared_SVS")
44
        
45
        if not os.path.exists(png_loc):
46
            os.mkdir(png_loc)
47
            
48
        self.png_fold = png_loc
49
50
51
    #this function receives the file location of a folder that contains svs images,
52
    #and generates a subling subfolder populated with png images.
53
    #
54
    #this function also saves a log file, to inform the user about the new image's geometric properties.
55
    def AutocropAll(self, svs_loc = None, png_loc= None, max_mag= 20):
56
        
57
            
58
        if svs_loc == None:
59
            svs_loc = self.svs_loc
60
        if png_loc == None:
61
            png_loc = self.png_fold
62
            
63
        if not os.path.exists(svs_loc):
64
            raise ValueError("SVS directory is illegitimate:", svs_loc,". Please enter the full filepath of an existing directory containing .svs files.\n")
65
66
        if not os.path.exists(png_loc):
67
            print("File Location", png_loc,"does not exist. Making this directory...")
68
            os.mkdir(png_loc)
69
            print("Successfully create .png save directory.")
70
            
71
72
        
73
        def consecutive(data, stepsize=1):
74
            arr_consec= np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
75
            return max(arr_consec, key = len) 
76
77
        widgets = [
78
                'Cropping: ', progressbar.Percentage(),
79
                ' ', progressbar.AnimatedMarker(),
80
                ' ', progressbar.ETA(),
81
            ]
82
83
84
        bar = progressbar.ProgressBar(
85
        widgets=widgets,
86
        maxval=len(self.svs_images)).start()
87
        log_loc = os.path.join(png_loc, 'log.txt')
88
        
89
        with open(log_loc, 'w') as filetowrite:
90
            for i in range(len(self.svs_images)): 
91
                bar.update(i)
92
                information ={}
93
                
94
                file,svs = os.path.splitext(self.svs_images[i])
95
                pic1_loc = os.path.join(svs_loc, self.svs_images[i])
96
                
97
                
98
                slide= slideio.open_slide(pic1_loc, 'SVS')
99
                scene = slide.get_scene(0)
100
                mag = scene.magnification
101
                pixel_size = scene.resolution[0]
102
                _,_,width,height = scene.rect
103
                
104
                img_fold = os.path.join(png_loc, file)
105
                if not os.path.exists(img_fold):
106
                    os.makedirs(img_fold)
107
                    
108
                while mag>max_mag:
109
                    width = int(np.round(width/2))
110
                    height = int(np.round(height/2))
111
                    mag = mag/2
112
                    pixel_size = pixel_size*2
113
                    
114
                image= scene.read_block(scene.rect,(width,height))
115
                image_data_bw = image.min(axis=2)
116
                
117
                information['ImageName'] = file
118
                information['Magnification'] = mag
119
                information['ImagePixelHeight'] = height
120
                information['ImagePixelWidth'] = width
121
                information['PixelSizeMeters'] = pixel_size
122
                filetowrite.write(str(information))
123
                filetowrite.write(' \n ')
124
                filetowrite.write('#####')         
125
                filetowrite.write(' \n ')
126
    
127
                object_h = self.ObjectSplitter(image_data_bw, axis = 0)
128
                width,height=np.shape(image_data_bw)
129
                for j,indices_v in enumerate(object_h):
130
                    real_objects = self.ObjectSplitter(image_data_bw[0:height,indices_v[0]:indices_v[1]], axis = 1)
131
                    
132
                    for k,indices_h in enumerate(real_objects):
133
                        full_path = os.path.join(img_fold, file+"_"+str(j)+"_mag"+str(int(mag))+".png")
134
                        if not os.path.exists(full_path):
135
                            self.BackgroundReducer(image[indices_h[0]:indices_h[1],indices_v[0]:indices_v[1]],  full_path)  
136
        bar.finish()
137
                            
138
139
    #return the indices splitting pairs of an image array depending on the percentage that of pixel 'completion' along a given axis.
140
    #This is to say, this function generates indices that an imaged should be cropped between,
141
    #either vertically or horziontally, based upon the percentage of white background in the image
142
    #the default percentage is 2%
143
    def ObjectSplitter(self, image_arr,percentage_threshold=2, axis =0):
144
        
145
        val = filters.threshold_otsu(image_arr)
146
        data = np.sum(image_arr < val,axis=axis)
147
        n = data.shape[0]
148
        data = np.where(data<(percentage_threshold/100)*n,0,1)
149
        
150
        loc_run_start = np.empty(n, dtype=bool)
151
        loc_run_start[0] = True
152
        np.not_equal(data[:-1], data[1:], out=loc_run_start[1:])
153
        run_starts = np.nonzero(loc_run_start)[0].tolist()
154
155
        # find run values
156
        run_values = data[loc_run_start].tolist()
157
158
        # find run lengths
159
        run_lengths = np.diff(np.append(run_starts, n)).tolist()
160
        
161
        counter = 0
162
        
163
        for i in range(len(run_starts)):
164
            idx = i-counter
165
            if run_lengths[idx]<0.02*n:
166
                if idx==0:continue
167
                if run_lengths[idx-1]>0.05*n and run_values[idx-1]==1:
168
                    run_lengths[idx-1] += run_lengths[idx]
169
                    run_lengths.pop(idx)
170
                    run_starts.pop(idx)
171
                    run_values.pop(idx)
172
                    counter+=1
173
                    continue
174
                    
175
        
176
                if idx>=len(run_starts)-1:continue
177
                if run_lengths[idx+1]>0.05*n and run_values[idx+1]==1:
178
                    run_lengths[idx+1] += run_lengths[idx]
179
                    run_lengths.pop(idx)
180
                    run_starts.pop(idx)
181
                    run_values.pop(idx)
182
                    counter+=1
183
                    continue
184
                
185
                run_lengths[idx-1] += run_lengths[idx]
186
                run_lengths.pop(idx)
187
                run_starts.pop(idx)
188
                run_values.pop(idx)
189
                counter+=1
190
               
191
        object_pairs = []
192
        for i in range(len(run_values)):
193
            if not run_values[i]==1:continue
194
            object_pairs.append((run_starts[i], run_starts[i]+run_lengths[i]))
195
        
196
        return object_pairs
197
        
198
        
199
    #this function uses the indices splitting pairs to split input images.
200
    def BackgroundReducer(self, image, png_save, true_boundary = 0.01):
201
        
202
        def consecutive(data, stepsize=1):
203
            arr_consec= np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
204
            return max(arr_consec, key = len) 
205
        
206
        image_data_bw = image.min(axis=2)
207
        
208
209
        gc.collect()
210
        non_empty = np.where(image_data_bw<220,True, False)
211
        non_empty_columns = np.where(np.sum(non_empty,axis=0)>true_boundary*np.shape(non_empty)[1])
212
        non_empty_rows = np.where(np.sum(non_empty,axis=1)>true_boundary*np.shape(non_empty)[0])
213
        
214
        non_empty_cols_consec = consecutive(non_empty_columns)
215
        non_empty_rows_consec = consecutive(non_empty_rows) 
216
        
217
        try:
218
            cropBox = (np.min(non_empty_rows_consec), np.max(non_empty_rows_consec), np.min(non_empty_cols_consec), np.max(non_empty_cols_consec))
219
        except:
220
            print("Improper Object found. Moving on...")
221
            gc.collect()
222
            return
223
        Image.fromarray(image[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]).save(png_save)
224
225
        gc.collect()
226
        
227
                
228
if __name__ == '__main__':
229
    a= DataPreparation("C:\\Users\\Billy\\Downloads\\Data")
230
    time.sleep(2)
231
    a.AutocropAll()
232