a b/preprocess/measure_slices.py
1
import numpy as np
2
import pandas as pd
3
from PIL import Image
4
from tqdm import tqdm
5
from preprocessingutils import pwr_transform
6
import os
7
from pathlib import Path
8
9
from argparse import ArgumentParser
10
parser = ArgumentParser()
11
parser.add_argument('--dir', default='', type=str,
12
                    help='location of files, should contain subdirs with splits with subdirs imgs and masks; if empty, starts from parent dir of this folder')
13
14
15
def measure_slice(x, mask=None):
16
    '''
17
    calculate slice size and variance
18
    '''
19
    assert isinstance(x, np.ndarray)
20
    if mask is None:
21
        mask = np.ones_like(x)
22
    assert isinstance(mask, np.ndarray)
23
24
    size     = mask.sum()
25
    variance = x[np.nonzero(mask)].std() 
26
    img_max  = x[np.nonzero(mask)].max()
27
    img_min  = x[np.nonzero(mask)].min()
28
    img_mean = x[np.nonzero(mask)].mean()
29
30
    return size, variance, img_max, img_min, img_mean
31
32
args = parser.parse_args()
33
if args.dir == '':
34
    args.dir = Path(Path.cwd().parent / 'data' / 'nodules2d')
35
dfs = {}
36
37
imgs = os.listdir(os.path.join(args.dir, "imgs"))
38
imgs = [x for x in imgs if x.endswith('.png')]
39
df = pd.DataFrame({'img_name': imgs, 
40
                    'size': np.zeros((len(imgs, ))), 
41
                    'variance': np.zeros((len(imgs,))),
42
                    'max': np.zeros((len(imgs,))),
43
                    'min': np.zeros((len(imgs,))),
44
                    'mean': np.zeros((len(imgs,)))})
45
46
for i, img_name in tqdm(enumerate(imgs)):
47
    img  = np.array(Image.open(os.path.join(args.dir, "imgs", img_name))) / 255
48
    mask = (np.array(Image.open(os.path.join(args.dir, "masks", img_name))) / 255).astype(np.int16)
49
50
    size, variance, img_max, img_min, img_mean = measure_slice(img, mask)
51
    df.iloc[i, 1] = size
52
    df.iloc[i, 2] = variance
53
    df.iloc[i, 3] = img_max
54
    df.iloc[i, 4] = img_min
55
    df.iloc[i, 5] = img_mean
56
    
57
df = df.rename(columns={'img_name': 'name'})
58
# df["name"] = df.name.apply(lambda x: os.path.join("imgs", x))
59
60
# normalize values
61
# scalar_vars = ["size", "variance", "img_min", "img_max", "img_mean"]
62
# df[scalar_vars] = df[scalar_vars].apply(pwr_transform)
63
64
# print(df.head())
65
df.to_csv(os.path.join(args.dir, "measurements.csv"), index=False)
66
67
68