|
a |
|
b/preprocess/measure_slices.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as pd |
|
|
3 |
from PIL import Image |
|
|
4 |
from tqdm import tqdm |
|
|
5 |
from preprocessingutils import pwr_transform |
|
|
6 |
import os |
|
|
7 |
from pathlib import Path |
|
|
8 |
|
|
|
9 |
from argparse import ArgumentParser |
|
|
10 |
parser = ArgumentParser() |
|
|
11 |
parser.add_argument('--dir', default='', type=str, |
|
|
12 |
help='location of files, should contain subdirs with splits with subdirs imgs and masks; if empty, starts from parent dir of this folder') |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
def measure_slice(x, mask=None): |
|
|
16 |
''' |
|
|
17 |
calculate slice size and variance |
|
|
18 |
''' |
|
|
19 |
assert isinstance(x, np.ndarray) |
|
|
20 |
if mask is None: |
|
|
21 |
mask = np.ones_like(x) |
|
|
22 |
assert isinstance(mask, np.ndarray) |
|
|
23 |
|
|
|
24 |
size = mask.sum() |
|
|
25 |
variance = x[np.nonzero(mask)].std() |
|
|
26 |
img_max = x[np.nonzero(mask)].max() |
|
|
27 |
img_min = x[np.nonzero(mask)].min() |
|
|
28 |
img_mean = x[np.nonzero(mask)].mean() |
|
|
29 |
|
|
|
30 |
return size, variance, img_max, img_min, img_mean |
|
|
31 |
|
|
|
32 |
args = parser.parse_args() |
|
|
33 |
if args.dir == '': |
|
|
34 |
args.dir = Path(Path.cwd().parent / 'data' / 'nodules2d') |
|
|
35 |
dfs = {} |
|
|
36 |
|
|
|
37 |
imgs = os.listdir(os.path.join(args.dir, "imgs")) |
|
|
38 |
imgs = [x for x in imgs if x.endswith('.png')] |
|
|
39 |
df = pd.DataFrame({'img_name': imgs, |
|
|
40 |
'size': np.zeros((len(imgs, ))), |
|
|
41 |
'variance': np.zeros((len(imgs,))), |
|
|
42 |
'max': np.zeros((len(imgs,))), |
|
|
43 |
'min': np.zeros((len(imgs,))), |
|
|
44 |
'mean': np.zeros((len(imgs,)))}) |
|
|
45 |
|
|
|
46 |
for i, img_name in tqdm(enumerate(imgs)): |
|
|
47 |
img = np.array(Image.open(os.path.join(args.dir, "imgs", img_name))) / 255 |
|
|
48 |
mask = (np.array(Image.open(os.path.join(args.dir, "masks", img_name))) / 255).astype(np.int16) |
|
|
49 |
|
|
|
50 |
size, variance, img_max, img_min, img_mean = measure_slice(img, mask) |
|
|
51 |
df.iloc[i, 1] = size |
|
|
52 |
df.iloc[i, 2] = variance |
|
|
53 |
df.iloc[i, 3] = img_max |
|
|
54 |
df.iloc[i, 4] = img_min |
|
|
55 |
df.iloc[i, 5] = img_mean |
|
|
56 |
|
|
|
57 |
df = df.rename(columns={'img_name': 'name'}) |
|
|
58 |
# df["name"] = df.name.apply(lambda x: os.path.join("imgs", x)) |
|
|
59 |
|
|
|
60 |
# normalize values |
|
|
61 |
# scalar_vars = ["size", "variance", "img_min", "img_max", "img_mean"] |
|
|
62 |
# df[scalar_vars] = df[scalar_vars].apply(pwr_transform) |
|
|
63 |
|
|
|
64 |
# print(df.head()) |
|
|
65 |
df.to_csv(os.path.join(args.dir, "measurements.csv"), index=False) |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|