|
a |
|
b/features.py |
|
|
1 |
import os |
|
|
2 |
from multiprocessing import Pool as ProcessPool |
|
|
3 |
|
|
|
4 |
import cv2 as cv |
|
|
5 |
import numpy as np |
|
|
6 |
import pandas as pd |
|
|
7 |
import scipy.ndimage |
|
|
8 |
from PIL import Image |
|
|
9 |
from tqdm import tqdm |
|
|
10 |
|
|
|
11 |
Image.MAX_IMAGE_PIXELS = None |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
def get_x_and_y(name): |
|
|
15 |
x, y = os.path.splitext(name)[0].split('_')[-2:] |
|
|
16 |
return int(x), int(y) |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
def flatten_list(l): |
|
|
20 |
return [item for sublist in l for item in sublist] |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
class NucleiFeatures: |
|
|
24 |
|
|
|
25 |
def position(self, img, orig, **kwargs): |
|
|
26 |
x, y = scipy.ndimage.measurements.center_of_mass(img) |
|
|
27 |
x = x + self.x_min + kwargs['x_tile'] * img.shape[0] |
|
|
28 |
y = y + self.y_min + kwargs['y_tile'] * img.shape[1] |
|
|
29 |
return [x, y] |
|
|
30 |
|
|
|
31 |
def ellips(self, img, orig, **kwargs): |
|
|
32 |
try: |
|
|
33 |
cont = cv.findContours(img.astype(np.uint8).T.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)[1][0][:, 0, :] |
|
|
34 |
ellipse_center, axles, angle = cv.fitEllipse(cont) |
|
|
35 |
x, y = ellipse_center |
|
|
36 |
x = x + self.x_min + kwargs['x_tile'] * img.shape[0] |
|
|
37 |
y = y + self.y_min + kwargs['y_tile'] * img.shape[1] |
|
|
38 |
if axles[1] > 100: |
|
|
39 |
axles = (30, 30) |
|
|
40 |
return [*axles, x, y, angle] |
|
|
41 |
except: |
|
|
42 |
return [0] * 5 |
|
|
43 |
|
|
|
44 |
def size(self, img, orig, **kwargs): |
|
|
45 |
return [img.sum()] |
|
|
46 |
|
|
|
47 |
def color(self, img, orig, **kwargs): |
|
|
48 |
cell_pixels = orig[img] |
|
|
49 |
return [*cell_pixels.mean(axis=0), *cell_pixels.std(axis=0)] |
|
|
50 |
|
|
|
51 |
def __init__(self, tif_folder, png_folder, features, x_min=0, y_min=0): |
|
|
52 |
self.tif_folder = tif_folder |
|
|
53 |
self.png_folder = png_folder |
|
|
54 |
self.computed_features = None |
|
|
55 |
self.feature_dict = {'position': (self.position, ['x', 'y']), |
|
|
56 |
'size': (self.size, ['size']), |
|
|
57 |
'ellipse': ( |
|
|
58 |
self.ellips, ['first_axis', 'second_axis', 'ellipse_x', 'ellipse_y', 'ellipse_angle']), |
|
|
59 |
'color': ( |
|
|
60 |
self.color, |
|
|
61 |
['Blue_mean', 'Red_mean', 'Green_mean', 'Blue_std', 'Red_std', 'Green_std']), |
|
|
62 |
'color_gray': ( |
|
|
63 |
self.color, |
|
|
64 |
['Color_mean', 'Color_std']) |
|
|
65 |
} |
|
|
66 |
if features == 'all': |
|
|
67 |
self.features = self.feature_dict.keys() |
|
|
68 |
else: |
|
|
69 |
self.features = features |
|
|
70 |
|
|
|
71 |
self.x_min = x_min |
|
|
72 |
self.y_min = y_min |
|
|
73 |
|
|
|
74 |
@property |
|
|
75 |
def feature_names(self): |
|
|
76 |
names = [] |
|
|
77 |
for f in self.features: |
|
|
78 |
names += self.feature_dict[f][1] |
|
|
79 |
return names |
|
|
80 |
|
|
|
81 |
def compute(self): |
|
|
82 |
self.computed_features = [] |
|
|
83 |
base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)] |
|
|
84 |
for filename in tqdm(base_names): |
|
|
85 |
img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1) |
|
|
86 |
orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1) |
|
|
87 |
|
|
|
88 |
img = np.rot90(img, k=3) |
|
|
89 |
img = np.flip(img, axis=1) |
|
|
90 |
orig = np.rot90(orig, k=3) |
|
|
91 |
orig = np.flip(orig, axis=1) |
|
|
92 |
for i in range(1, img.max()): |
|
|
93 |
tmp_img = (img == i) |
|
|
94 |
tmp = [] |
|
|
95 |
x_tile, y_tile = get_x_and_y(filename) |
|
|
96 |
for f in self.features: |
|
|
97 |
tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile) |
|
|
98 |
self.computed_features.append(tmp) |
|
|
99 |
return self |
|
|
100 |
|
|
|
101 |
def compute_multipricess(self, n_workers=10): |
|
|
102 |
|
|
|
103 |
orig_list = [] |
|
|
104 |
img_list = [] |
|
|
105 |
filenames = [] |
|
|
106 |
base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)] |
|
|
107 |
for filename in base_names: |
|
|
108 |
img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1) |
|
|
109 |
orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1) |
|
|
110 |
|
|
|
111 |
img = np.rot90(img, k=3) |
|
|
112 |
img = np.flip(img, axis=1) |
|
|
113 |
orig = np.rot90(orig, k=3) |
|
|
114 |
orig = np.flip(orig, axis=1) |
|
|
115 |
|
|
|
116 |
orig_list.append(orig) |
|
|
117 |
img_list.append(img) |
|
|
118 |
filenames.append(filename) |
|
|
119 |
|
|
|
120 |
global compute_one |
|
|
121 |
|
|
|
122 |
def compute_one(data): |
|
|
123 |
computed_features = [] |
|
|
124 |
img = data[0] |
|
|
125 |
orig = data[1] |
|
|
126 |
filename = data[2] |
|
|
127 |
|
|
|
128 |
for i in range(1, img.max()): |
|
|
129 |
tmp_img = (img == i) |
|
|
130 |
tmp = [] |
|
|
131 |
x_tile, y_tile = get_x_and_y(filename) |
|
|
132 |
for f in self.features: |
|
|
133 |
tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile) |
|
|
134 |
computed_features.append(tmp) |
|
|
135 |
return computed_features |
|
|
136 |
|
|
|
137 |
with ProcessPool(n_workers) as p: |
|
|
138 |
result = list(tqdm(p.imap(compute_one, zip(img_list, orig_list, filenames)), total=len(orig_list))) |
|
|
139 |
self.computed_features = flatten_list(result) |
|
|
140 |
|
|
|
141 |
return self |
|
|
142 |
|
|
|
143 |
def df(self): |
|
|
144 |
if self.computed_features is None: |
|
|
145 |
self.compute() |
|
|
146 |
df = pd.DataFrame(self.computed_features, columns=self.feature_names) |
|
|
147 |
return df |