Diff of /features.py [000000] .. [16dd74]

Switch to side-by-side view

--- a
+++ b/features.py
@@ -0,0 +1,147 @@
+import os
+from multiprocessing import Pool as ProcessPool
+
+import cv2 as cv
+import numpy as np
+import pandas as pd
+import scipy.ndimage
+from PIL import Image
+from tqdm import tqdm
+
+Image.MAX_IMAGE_PIXELS = None
+
+
+def get_x_and_y(name):
+    x, y = os.path.splitext(name)[0].split('_')[-2:]
+    return int(x), int(y)
+
+
+def flatten_list(l):
+    return [item for sublist in l for item in sublist]
+
+
+class NucleiFeatures:
+
+    def position(self, img, orig, **kwargs):
+        x, y = scipy.ndimage.measurements.center_of_mass(img)
+        x = x + self.x_min + kwargs['x_tile'] * img.shape[0]
+        y = y + self.y_min + kwargs['y_tile'] * img.shape[1]
+        return [x, y]
+
+    def ellips(self, img, orig, **kwargs):
+        try:
+            cont = cv.findContours(img.astype(np.uint8).T.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)[1][0][:, 0, :]
+            ellipse_center, axles, angle = cv.fitEllipse(cont)
+            x, y = ellipse_center
+            x = x + self.x_min + kwargs['x_tile'] * img.shape[0]
+            y = y + self.y_min + kwargs['y_tile'] * img.shape[1]
+            if axles[1] > 100:
+                axles = (30, 30)
+            return [*axles, x, y, angle]
+        except:
+            return [0] * 5
+
+    def size(self, img, orig, **kwargs):
+        return [img.sum()]
+
+    def color(self, img, orig, **kwargs):
+        cell_pixels = orig[img]
+        return [*cell_pixels.mean(axis=0), *cell_pixels.std(axis=0)]
+
+    def __init__(self, tif_folder, png_folder, features, x_min=0, y_min=0):
+        self.tif_folder = tif_folder
+        self.png_folder = png_folder
+        self.computed_features = None
+        self.feature_dict = {'position': (self.position, ['x', 'y']),
+                             'size': (self.size, ['size']),
+                             'ellipse': (
+                                 self.ellips, ['first_axis', 'second_axis', 'ellipse_x', 'ellipse_y', 'ellipse_angle']),
+                             'color': (
+                                 self.color,
+                                 ['Blue_mean', 'Red_mean', 'Green_mean', 'Blue_std', 'Red_std', 'Green_std']),
+                             'color_gray': (
+                                 self.color,
+                                 ['Color_mean', 'Color_std'])
+                             }
+        if features == 'all':
+            self.features = self.feature_dict.keys()
+        else:
+            self.features = features
+
+        self.x_min = x_min
+        self.y_min = y_min
+
+    @property
+    def feature_names(self):
+        names = []
+        for f in self.features:
+            names += self.feature_dict[f][1]
+        return names
+
+    def compute(self):
+        self.computed_features = []
+        base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)]
+        for filename in tqdm(base_names):
+            img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1)
+            orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1)
+
+            img = np.rot90(img, k=3)
+            img = np.flip(img, axis=1)
+            orig = np.rot90(orig, k=3)
+            orig = np.flip(orig, axis=1)
+            for i in range(1, img.max()):
+                tmp_img = (img == i)
+                tmp = []
+                x_tile, y_tile = get_x_and_y(filename)
+                for f in self.features:
+                    tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile)
+                self.computed_features.append(tmp)
+        return self
+
+    def compute_multipricess(self, n_workers=10):
+
+        orig_list = []
+        img_list = []
+        filenames = []
+        base_names = [os.path.splitext(i)[0] for i in os.listdir(self.tif_folder)]
+        for filename in base_names:
+            img = cv.imread(f'{self.tif_folder}/{filename}.tif', -1)
+            orig = cv.imread(f'{self.png_folder}/{filename}/images/{filename}.png', 1)
+
+            img = np.rot90(img, k=3)
+            img = np.flip(img, axis=1)
+            orig = np.rot90(orig, k=3)
+            orig = np.flip(orig, axis=1)
+
+            orig_list.append(orig)
+            img_list.append(img)
+            filenames.append(filename)
+
+        global compute_one
+
+        def compute_one(data):
+            computed_features = []
+            img = data[0]
+            orig = data[1]
+            filename = data[2]
+
+            for i in range(1, img.max()):
+                tmp_img = (img == i)
+                tmp = []
+                x_tile, y_tile = get_x_and_y(filename)
+                for f in self.features:
+                    tmp += self.feature_dict[f][0](tmp_img, orig, x_tile=x_tile, y_tile=y_tile)
+                computed_features.append(tmp)
+            return computed_features
+
+        with ProcessPool(n_workers) as p:
+            result = list(tqdm(p.imap(compute_one, zip(img_list, orig_list, filenames)), total=len(orig_list)))
+            self.computed_features = flatten_list(result)
+
+        return self
+
+    def df(self):
+        if self.computed_features is None:
+            self.compute()
+        df = pd.DataFrame(self.computed_features, columns=self.feature_names)
+        return df