Switch to side-by-side view

--- a
+++ b/deprecated_work/deprecated.py
@@ -0,0 +1,146 @@
+
+def extract_patch_info(basename, input_dir='./', annotations=[], threshold=0.5, patch_size=224, generate_finetune_segmentation=False, target_class=0, intensity_threshold=100., target_threshold=0.):
+	#from collections import OrderedDict
+	#annotations=OrderedDict(annotations)
+	#from dask.multiprocessing import get
+
+	import time
+	from dask import dataframe as dd
+	import multiprocessing
+	from shapely.ops import unary_union
+	from shapely.geometry import MultiPolygon
+	from itertools import product
+
+	arr, masks = load_dataset(join(input_dir,'{}.zarr'.format(basename)),join(input_dir,'{}_mask.pkl'.format(basename)))
+	if 'annotations' in masks:
+		segmentation = True
+		if generate_finetune_segmentation:
+			segmentation_mask = npy2da(join(input_dir,'{}_mask.npy'.format(basename)))
+	else:
+		segmentation = False
+		#masks=np.load(masks['annotations'])
+	#npy_file = join(input_dir,'{}.npy'.format(basename))
+	purple_mask = create_purple_mask(arr)
+	x_max = float(arr.shape[0])
+	y_max = float(arr.shape[1])
+	x_steps = int((x_max-patch_size) / patch_size )
+	y_steps = int((y_max-patch_size) / patch_size )
+	for annotation in annotations:
+		try:
+			masks[annotation]=[unary_union(masks[annotation])] if masks[annotation] else []
+		except:
+			masks[annotation]=[MultiPolygon(masks[annotation])] if masks[annotation] else []
+
+	#@pysnooper.snoop("process_line.log")
+	def return_line_info(row):
+		xs = row['x']
+		ys = row['y']
+		xf = xs + patch_size
+		yf = ys + patch_size
+		print(basename,xs,ys)
+		#if is_valid_patch((purple_mask[xs:xf,ys:yf]>=intensity_threshold).compute(), threshold):#.compute()
+		#print(xs,ys, 'valid_patch')
+		if segmentation:
+			row['annotation']='segment'
+			#info=[basename,xs,ys,patch_size,'segment']
+			seg=segmentation_mask[xs:xf,ys:yf].compute()
+			#info=info+
+			row.iloc[-target_class:]=[(seg==i).mean() for i in range(target_class)]
+			#if generate_finetune_segmentation:
+		else:
+			row.iloc[-len(annotations):]=[is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
+			row['annotation']=annotations[row.iloc[-len(annotations):].argmax()]#[np.argmax(annotation_areas)]
+			#info=[basename,xs,ys,patch_size,main_annotation]+annotation_areas
+		"""else:
+			if segmentation:
+				info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(target_class)]
+			else:
+				info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(len(annotations))]"""
+		return row#info
+
+	def seg_line(xs,ys,patch_size,segmentation_mask,target_class):
+		xf=xs+patch_size
+		yf=ys+patch_size
+		seg=segmentation_mask[xs:xf,ys:yf]
+		return [(seg==i).mean() for i in range(target_class)]
+
+	def annot_line(xs,ys,patch_size,masks,annotations):
+		return [is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
+
+	patch_info=pd.DataFrame([([basename,i*patch_size,j*patch_size,patch_size,'NA']+[0.]*(target_class if segmentation else len(annotations))) for i,j in product(range(x_steps+1),range(y_steps+1))],columns=(['ID','x','y','patch_size','annotation']+(annotations if not segmentation else list([str(i) for i in range(target_class)]))))#[dask.delayed(return_line_info)(i,j) for (i,j) in product(range(x_steps+1),range(y_steps+1))]
+	valid_patches=[]
+	for xs,ys in patch_info[['x','y']].values.tolist():
+		valid_patches.append(dask.delayed(is_valid_patch)(xs,ys,patch_size,purple_mask,intensity_threshold,threshold))
+	patch_info=patch_info.loc[np.array(dask.compute(valid_patches))]
+	area_info=[]
+	if segmentation:
+		patch_info.loc[:,'annotation']='segment'
+		for xs,ys in patch_info[['x','y']].values.tolist():
+			area_info.append(dask.delayed(seg_line)(xs,ys,patch_size,segmentation_mask,target_class))
+	else:
+		for xs,ys in patch_info[['x','y']].values.tolist():
+			area_info.append([dask.delayed(is_coords_in_box)(xs,ys,patch_size,masks,annotation) for annotation in annotations])
+	patch_info.iloc[:,6:]=np.array(dask.compute(area_info))
+	annot=list(patch_info.iloc[:,6:])
+	patch_info.loc[:,'annotation']=np.vectorize(lambda i: annot[patch_info.iloc[i,6:].argmax()])(np.arange(patch_info.shape[0]))#patch_info[np.arange(target_class).astype(str).tolist()].values.argmax(1).astype(str)
+	if 0:
+		patch_info=dd.from_pandas(patch_info, npartitions=2*multiprocessing.cpu_count())
+		meta_info=[('ID',str),('x',int),('y',int),('patch_size',int),('annotation',str)]+([(annotation,np.float) for annotation in annotations] if not segmentation else list([(str(i),np.float) for i in range(target_class)]))
+		#patch_info = dd.from_delayed(patch_info,meta=meta_info).compute()
+		patch_info = patch_info.map_partitions(lambda df: df.apply(return_line_info,axis=1), meta=meta_info)\
+										.compute(scheduler='processes')#.values
+		#patch_info=patch_info.apply(return_line_info,axis=1)
+		patch_info=patch_info.loc[patch_info['annotation']!='NA']
+		if segmentation:
+			a=1
+
+
+	if 0:
+		patch_info=dd.from_pandas(patch_info, npartitions=2*multiprocessing.cpu_count())
+		meta_info=[('ID',str),('x',int),('y',int),('patch_size',int),('annotation',str)]+([(annotation,np.float) for annotation in annotations] if not segmentation else list([(str(i),np.float) for i in range(target_class)]))
+		#patch_info = dd.from_delayed(patch_info,meta=meta_info).compute()
+		patch_info = patch_info.map_partitions(lambda df: df.apply(return_line_info,axis=1), meta=meta_info)\
+										.compute(scheduler='processes')#.values
+		#patch_info=patch_info.apply(return_line_info,axis=1)
+		patch_info=patch_info.loc[patch_info['annotation']!='NA']
+		if segmentation:
+			a=1
+	if 0:
+		from parallel_utils import extract_patch_info
+		patch_info=extract_patch_info(basename, input_dir, annotations, threshold, patch_size, generate_finetune_segmentation, target_class, intensity_threshold, target_threshold)
+
+	#@pysnooper.snoop("process_line.log")
+	def return_line_info(row):
+		xs = row['x']
+		ys = row['y']
+		xf = xs + patch_size
+		yf = ys + patch_size
+		print(basename,xs,ys)
+		#if is_valid_patch((purple_mask[xs:xf,ys:yf]>=intensity_threshold).compute(), threshold):#.compute()
+		#print(xs,ys, 'valid_patch')
+		if segmentation:
+			row['annotation']='segment'
+			#info=[basename,xs,ys,patch_size,'segment']
+			seg=segmentation_mask[xs:xf,ys:yf].compute()
+			#info=info+
+			row.iloc[-target_class:]=[(seg==i).mean() for i in range(target_class)]
+			#if generate_finetune_segmentation:
+		else:
+			row.iloc[-len(annotations):]=[is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]
+			row['annotation']=annotations[row.iloc[-len(annotations):].argmax()]#[np.argmax(annotation_areas)]
+			#info=[basename,xs,ys,patch_size,main_annotation]+annotation_areas
+		"""else:
+			if segmentation:
+				info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(target_class)]
+			else:
+				info=[basename,xs,ys,patch_size,'NA']+[0. for i in range(len(annotations))]"""
+		return row#info
+
+	def seg_line(xs,ys,patch_size,segmentation_mask,target_class):
+		xf=xs+patch_size
+		yf=ys+patch_size
+		seg=segmentation_mask[xs:xf,ys:yf]
+		return [(seg==i).mean() for i in range(target_class)]
+
+	def annot_line(xs,ys,patch_size,masks,annotations):
+		return [is_coords_in_box(coords=np.array([xs,ys]),patch_size=patch_size,boxes=masks[annotation]) for annotation in annotations]