# -*- coding: utf-8 -*-
"""
Created on Fri Jul 10 13:14:31 2020
@author: Billy
"""
import slideio
import glob
import os
import time
import numpy as np
import gc
import progressbar
from PIL import Image
from skimage import filters
class DataPreparation:
#this function initialises a data cleaner
#the job of this class is to generate x20 magnification .png images from svs slides,
#whilst cutting out as much background and non-epithelial area as possible.
#
#this function needs the location of the folder that contains svs images as an input
def __init__(self, svs_loc, png_loc = None):
assert type(svs_loc) == type('')
if not os.path.exists(svs_loc):
raise ValueError("SVS directory is illegitimate:", svs_loc,". Please enter the full filepath of an existing directory containing .svs files.\n")
else:
os.chdir(svs_loc)
self.svs_images = glob.glob("*.svs")
if len(self.svs_images) == 0:
raise ValueError("Directory", svs_loc," exists, but there are no .svs images in this location.")
self.svs_loc = svs_loc
print("Found the following outputs:", self.svs_images,"\n")
if png_loc == None:
parent_dir, dir_ = os.path.split(svs_loc)
png_loc = os.path.join(parent_dir,"Prepared_SVS")
if not os.path.exists(png_loc):
os.mkdir(png_loc)
self.png_fold = png_loc
#this function receives the file location of a folder that contains svs images,
#and generates a subling subfolder populated with png images.
#
#this function also saves a log file, to inform the user about the new image's geometric properties.
def AutocropAll(self, svs_loc = None, png_loc= None, max_mag= 20):
if svs_loc == None:
svs_loc = self.svs_loc
if png_loc == None:
png_loc = self.png_fold
if not os.path.exists(svs_loc):
raise ValueError("SVS directory is illegitimate:", svs_loc,". Please enter the full filepath of an existing directory containing .svs files.\n")
if not os.path.exists(png_loc):
print("File Location", png_loc,"does not exist. Making this directory...")
os.mkdir(png_loc)
print("Successfully create .png save directory.")
def consecutive(data, stepsize=1):
arr_consec= np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
return max(arr_consec, key = len)
widgets = [
'Cropping: ', progressbar.Percentage(),
' ', progressbar.AnimatedMarker(),
' ', progressbar.ETA(),
]
bar = progressbar.ProgressBar(
widgets=widgets,
maxval=len(self.svs_images)).start()
log_loc = os.path.join(png_loc, 'log.txt')
with open(log_loc, 'w') as filetowrite:
for i in range(len(self.svs_images)):
bar.update(i)
information ={}
file,svs = os.path.splitext(self.svs_images[i])
pic1_loc = os.path.join(svs_loc, self.svs_images[i])
slide= slideio.open_slide(pic1_loc, 'SVS')
scene = slide.get_scene(0)
mag = scene.magnification
pixel_size = scene.resolution[0]
_,_,width,height = scene.rect
img_fold = os.path.join(png_loc, file)
if not os.path.exists(img_fold):
os.makedirs(img_fold)
while mag>max_mag:
width = int(np.round(width/2))
height = int(np.round(height/2))
mag = mag/2
pixel_size = pixel_size*2
image= scene.read_block(scene.rect,(width,height))
image_data_bw = image.min(axis=2)
information['ImageName'] = file
information['Magnification'] = mag
information['ImagePixelHeight'] = height
information['ImagePixelWidth'] = width
information['PixelSizeMeters'] = pixel_size
filetowrite.write(str(information))
filetowrite.write(' \n ')
filetowrite.write('#####')
filetowrite.write(' \n ')
object_h = self.ObjectSplitter(image_data_bw, axis = 0)
width,height=np.shape(image_data_bw)
for j,indices_v in enumerate(object_h):
real_objects = self.ObjectSplitter(image_data_bw[0:height,indices_v[0]:indices_v[1]], axis = 1)
for k,indices_h in enumerate(real_objects):
full_path = os.path.join(img_fold, file+"_"+str(j)+"_mag"+str(int(mag))+".png")
if not os.path.exists(full_path):
self.BackgroundReducer(image[indices_h[0]:indices_h[1],indices_v[0]:indices_v[1]], full_path)
bar.finish()
#return the indices splitting pairs of an image array depending on the percentage that of pixel 'completion' along a given axis.
#This is to say, this function generates indices that an imaged should be cropped between,
#either vertically or horziontally, based upon the percentage of white background in the image
#the default percentage is 2%
def ObjectSplitter(self, image_arr,percentage_threshold=2, axis =0):
val = filters.threshold_otsu(image_arr)
data = np.sum(image_arr < val,axis=axis)
n = data.shape[0]
data = np.where(data<(percentage_threshold/100)*n,0,1)
loc_run_start = np.empty(n, dtype=bool)
loc_run_start[0] = True
np.not_equal(data[:-1], data[1:], out=loc_run_start[1:])
run_starts = np.nonzero(loc_run_start)[0].tolist()
# find run values
run_values = data[loc_run_start].tolist()
# find run lengths
run_lengths = np.diff(np.append(run_starts, n)).tolist()
counter = 0
for i in range(len(run_starts)):
idx = i-counter
if run_lengths[idx]<0.02*n:
if idx==0:continue
if run_lengths[idx-1]>0.05*n and run_values[idx-1]==1:
run_lengths[idx-1] += run_lengths[idx]
run_lengths.pop(idx)
run_starts.pop(idx)
run_values.pop(idx)
counter+=1
continue
if idx>=len(run_starts)-1:continue
if run_lengths[idx+1]>0.05*n and run_values[idx+1]==1:
run_lengths[idx+1] += run_lengths[idx]
run_lengths.pop(idx)
run_starts.pop(idx)
run_values.pop(idx)
counter+=1
continue
run_lengths[idx-1] += run_lengths[idx]
run_lengths.pop(idx)
run_starts.pop(idx)
run_values.pop(idx)
counter+=1
object_pairs = []
for i in range(len(run_values)):
if not run_values[i]==1:continue
object_pairs.append((run_starts[i], run_starts[i]+run_lengths[i]))
return object_pairs
#this function uses the indices splitting pairs to split input images.
def BackgroundReducer(self, image, png_save, true_boundary = 0.01):
def consecutive(data, stepsize=1):
arr_consec= np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
return max(arr_consec, key = len)
image_data_bw = image.min(axis=2)
gc.collect()
non_empty = np.where(image_data_bw<220,True, False)
non_empty_columns = np.where(np.sum(non_empty,axis=0)>true_boundary*np.shape(non_empty)[1])
non_empty_rows = np.where(np.sum(non_empty,axis=1)>true_boundary*np.shape(non_empty)[0])
non_empty_cols_consec = consecutive(non_empty_columns)
non_empty_rows_consec = consecutive(non_empty_rows)
try:
cropBox = (np.min(non_empty_rows_consec), np.max(non_empty_rows_consec), np.min(non_empty_cols_consec), np.max(non_empty_cols_consec))
except:
print("Improper Object found. Moving on...")
gc.collect()
return
Image.fromarray(image[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]).save(png_save)
gc.collect()
if __name__ == '__main__':
a= DataPreparation("C:\\Users\\Billy\\Downloads\\Data")
time.sleep(2)
a.AutocropAll()