Diff of /features.py [000000] .. [16dd74]

Switch to unified view

a b/features.py
1
import os
2
from multiprocessing import Pool as ProcessPool
3
4
import cv2 as cv
5
import numpy as np
6
import pandas as pd
7
import scipy.ndimage
8
from PIL import Image
9
from tqdm import tqdm
10
11
Image.MAX_IMAGE_PIXELS = None
12
13
14
def get_x_and_y(name):
15
    x, y = os.path.splitext(name)[0].split('_')[-2:]
16
    return int(x), int(y)
17
18
19
def flatten_list(l):
20
    return [item for sublist in l for item in sublist]
21
22
23
class NucleiFeatures:
24
25
    def position(self, img, orig, **kwargs):
26
        x, y = scipy.ndimage.measurements.center_of_mass(img)
27
        x = x + self.x_min + kwargs['x_tile'] * img.shape[0]
28
        y = y + self.y_min + kwargs['y_tile'] * img.shape[1]
29
        return [x, y]
30
31
    def ellips(self, img, orig, **kwargs):
32
        try:
33
            cont = cv.findContours(img.astype(np.uint8).T.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)[1][0][:, 0, :]
34
            ellipse_center, axles, angle = cv.fitEllipse(cont)
35
            x, y = ellipse_center
36
            x = x + self.x_min + kwargs['x_tile'] * img.shape[0]
37
            y = y + self.y_min + kwargs['y_tile'] * img.shape[1]
38
            if axles[1] > 100:
39
                axles = (30, 30)
40
            return [*axles, x, y, angle]
41
        except:
42
            return [0] * 5
43
44
    def size(self, img, orig, **kwargs):
45
        return [img.sum()]
46
47
    def color(self, img, orig, **kwargs):
48
        cell_pixels = orig[img]
49
        return [*cell_pixels.mean(axis=0), *cell_pixels.std(axis=0)]
50
51
    def __init__(self, tif_folder, png_folder, features, x_min=0, y_min=0):
52
        self.tif_folder = tif_folder
53
        self.png_folder = png_folder
54
        self.computed_features = None
55
        self.feature_dict = {'position': (self.position, ['x', 'y']),
56
                             'size': (self.size, ['size']),
57
                             'ellipse': (
58
                                 self.ellips, ['first_axis', 'second_axis', 'ellipse_x', 'ellipse_y', 'ellipse_angle']),
59
                             'color': (
60
                                 self.color,
61
                                 ['Blue_mean', 'Red_mean', 'Green_mean', 'Blue_std', 'Red_std', 'Green_std']),
62
                             'color_gray': (
63
                                 self.color,
64
                                 ['Color_mean', 'Color_std'])
65
                             }
66
        if features == 'all':
67
            self.features = self.feature_dict.keys()
68
        else:
69
            self.features = features
70
71
        self.x_min = x_min
72
        self.y_min = y_min
73
74
    @property
75
    def feature_names(self):
76
        names = []
77
        for f in self.features:
78
            names += self.feature_dict[f][1]
79
        return names
80
81
    def compute(self):
82
        self.computed_features = []
83
        base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)]
84
        for filename in tqdm(base_names):
85
            img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1)
86
            orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1)
87
88
            img = np.rot90(img, k=3)
89
            img = np.flip(img, axis=1)
90
            orig = np.rot90(orig, k=3)
91
            orig = np.flip(orig, axis=1)
92
            for i in range(1, img.max()):
93
                tmp_img = (img == i)
94
                tmp = []
95
                x_tile, y_tile = get_x_and_y(filename)
96
                for f in self.features:
97
                    tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile)
98
                self.computed_features.append(tmp)
99
        return self
100
101
    def compute_multipricess(self, n_workers=10):
102
103
        orig_list = []
104
        img_list = []
105
        filenames = []
106
        base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)]
107
        for filename in base_names:
108
            img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1)
109
            orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1)
110
111
            img = np.rot90(img, k=3)
112
            img = np.flip(img, axis=1)
113
            orig = np.rot90(orig, k=3)
114
            orig = np.flip(orig, axis=1)
115
116
            orig_list.append(orig)
117
            img_list.append(img)
118
            filenames.append(filename)
119
120
        global compute_one
121
122
        def compute_one(data):
123
            computed_features = []
124
            img = data[0]
125
            orig = data[1]
126
            filename = data[2]
127
128
            for i in range(1, img.max()):
129
                tmp_img = (img == i)
130
                tmp = []
131
                x_tile, y_tile = get_x_and_y(filename)
132
                for f in self.features:
133
                    tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile)
134
                computed_features.append(tmp)
135
            return computed_features
136
137
        with ProcessPool(n_workers) as p:
138
            result = list(tqdm(p.imap(compute_one, zip(img_list, orig_list, filenames)), total=len(orig_list)))
139
            self.computed_features = flatten_list(result)
140
141
        return self
142
143
    def df(self):
144
        if self.computed_features is None:
145
            self.compute()
146
        df = pd.DataFrame(self.computed_features, columns=self.feature_names)
147
        return df