[e9500f]: / deprecated_work / deprecated.py

Download this file

147 lines (134 with data), 7.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def extract_patch_info(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0.):
#from collections import OrderedDict
#annotations=OrderedDict(annotations)
#from dask.multiprocessing import get
import time
from dask import dataframe as dd
import multiprocessing
from shapely.ops import unary_union
from shapely.geometry import MultiPolygon
from itertools import product
arr, masks = load_dataset(join(input_dir,'{}.zarr'.format(basename)),join(input_dir,'{}_mask.pkl'.format(basename)))
if 'annotations' in masks:
segmentation = True
if generate_finetune_segmentation:
segmentation_mask = npy2da(join(input_dir,'{}_mask.npy'.format(basename)))
else:
segmentation = False
#masks=np.load(masks['annotations'])
#npy_file = join(input_dir,'{}.npy'.format(basename))
purple_mask = create_purple_mask(arr)
x_max = float(arr.shape[0])
y_max = float(arr.shape[1])
x_steps = int((x_max-patch_size) / patch_size )
y_steps = int((y_max-patch_size) / patch_size )
for annotation in annotations:
try:
masks[annotation]=[unary_union(masks[annotation])] if masks[annotation] else []
except:
masks[annotation]=[MultiPolygon(masks[annotation])] if masks[annotation] else []
#@pysnooper.snoop("process_line.log")
def return_line_info(row):
xs = row['x']
ys = row['y']
xf = xs + patch_size
yf = ys + patch_size
print(basename,xs,ys)
#if is_valid_patch((purple_mask[xs:xf,ys:yf]>=intensity_threshold).compute(), threshold):#.compute()
#print(xs,ys, 'valid_patch')
if segmentation:
row['annotation']='segment'
#info=[basename,xs,ys,patch_size,'segment']
seg=segmentation_mask[xs:xf,ys:yf].compute()
#info=info+
row.iloc[-target_class:]=[(seg==i).mean() for i in range(target_class)]
#if generate_finetune_segmentation:
else:
row.iloc[-len(annotations):]=[is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
row['annotation']=annotations[row.iloc[-len(annotations):].argmax()]#[np.argmax(annotation_areas)]
#info=[basename,xs,ys,patch_size,main_annotation]+annotation_areas
"""else:
if segmentation:
info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(target_class)]
else:
info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(len(annotations))]"""
return row#info
def seg_line(xs,ys,patch_size,segmentation_mask,target_class):
xf=xs+patch_size
yf=ys+patch_size
seg=segmentation_mask[xs:xf,ys:yf]
return [(seg==i).mean() for i in range(target_class)]
def annot_line(xs,ys,patch_size,masks,annotations):
return [is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
patch_info=pd.DataFrame([([basename,i*patch_size,j*patch_size,patch_size,'NA']+[0.]*(target_class if segmentation else len(annotations))) for i,j in product(range(x_steps+1),range(y_steps+1))],columns=(['ID','x','y','patch_size','annotation']+(annotations if not segmentation else list([str(i) for i in range(target_class)]))))#[dask.delayed(return_line_info)(i,j) for (i,j) in product(range(x_steps+1),range(y_steps+1))]
valid_patches=[]
for xs,ys in patch_info[['x','y']].values.tolist():
valid_patches.append(dask.delayed(is_valid_patch)(xs,ys,patch_size,purple_mask,intensity_threshold,threshold))
patch_info=patch_info.loc[np.array(dask.compute(valid_patches))]
area_info=[]
if segmentation:
patch_info.loc[:,'annotation']='segment'
for xs,ys in patch_info[['x','y']].values.tolist():
area_info.append(dask.delayed(seg_line)(xs,ys,patch_size,segmentation_mask,target_class))
else:
for xs,ys in patch_info[['x','y']].values.tolist():
area_info.append([dask.delayed(is_coords_in_box)(xs,ys,patch_size,masks,annotation) for annotation in annotations])
patch_info.iloc[:,6:]=np.array(dask.compute(area_info))
annot=list(patch_info.iloc[:,6:])
patch_info.loc[:,'annotation']=np.vectorize(lambda i: annot[patch_info.iloc[i,6:].argmax()])(np.arange(patch_info.shape[0]))#patch_info[np.arange(target_class).astype(str).tolist()].values.argmax(1).astype(str)
if 0:
patch_info=dd.from_pandas(patch_info, npartitions=2*multiprocessing.cpu_count())
meta_info=[('ID',str),('x',int),('y',int),('patch_size',int),('annotation',str)]+([(annotation,np.float) for annotation in annotations] if not segmentation else list([(str(i),np.float) for i in range(target_class)]))
#patch_info = dd.from_delayed(patch_info,meta=meta_info).compute()
patch_info = patch_info.map_partitions(lambda df: df.apply(return_line_info,axis=1), meta=meta_info)\
.compute(scheduler='processes')#.values
#patch_info=patch_info.apply(return_line_info,axis=1)
patch_info=patch_info.loc[patch_info['annotation']!='NA']
if segmentation:
a=1
if 0:
patch_info=dd.from_pandas(patch_info, npartitions=2*multiprocessing.cpu_count())
meta_info=[('ID',str),('x',int),('y',int),('patch_size',int),('annotation',str)]+([(annotation,np.float) for annotation in annotations] if not segmentation else list([(str(i),np.float) for i in range(target_class)]))
#patch_info = dd.from_delayed(patch_info,meta=meta_info).compute()
patch_info = patch_info.map_partitions(lambda df: df.apply(return_line_info,axis=1), meta=meta_info)\
.compute(scheduler='processes')#.values
#patch_info=patch_info.apply(return_line_info,axis=1)
patch_info=patch_info.loc[patch_info['annotation']!='NA']
if segmentation:
a=1
if 0:
from parallel_utils import extract_patch_info
patch_info=extract_patch_info(basename, input_dir, annotations, threshold, patch_size, generate_finetune_segmentation, target_class, intensity_threshold, target_threshold)
#@pysnooper.snoop("process_line.log")
def return_line_info(row):
xs = row['x']
ys = row['y']
xf = xs + patch_size
yf = ys + patch_size
print(basename,xs,ys)
#if is_valid_patch((purple_mask[xs:xf,ys:yf]>=intensity_threshold).compute(), threshold):#.compute()
#print(xs,ys, 'valid_patch')
if segmentation:
row['annotation']='segment'
#info=[basename,xs,ys,patch_size,'segment']
seg=segmentation_mask[xs:xf,ys:yf].compute()
#info=info+
row.iloc[-target_class:]=[(seg==i).mean() for i in range(target_class)]
#if generate_finetune_segmentation:
else:
row.iloc[-len(annotations):]=[is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
row['annotation']=annotations[row.iloc[-len(annotations):].argmax()]#[np.argmax(annotation_areas)]
#info=[basename,xs,ys,patch_size,main_annotation]+annotation_areas
"""else:
if segmentation:
info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(target_class)]
else:
info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(len(annotations))]"""
return row#info
def seg_line(xs,ys,patch_size,segmentation_mask,target_class):
xf=xs+patch_size
yf=ys+patch_size
seg=segmentation_mask[xs:xf,ys:yf]
return [(seg==i).mean() for i in range(target_class)]
def annot_line(xs,ys,patch_size,masks,annotations):
return [is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]