Diff of /data/batch_check.py [000000] .. [139527]

Switch to unified view

a b/data/batch_check.py
1
# Check batches
2
import numpy as np
3
import pandas as pd
4
from glob import glob
5
from tqdm import tqdm
6
tqdm.pandas()
7
from PIL import Image
8
import random
9
import sys
10
import os
11
import cv2
12
import pdb
13
import shutil
14
15
def batch_check(dataset, num_batches):
16
    data = pd.read_csv(dataset)
17
    batch_list = random.sample(range(data['batch'].min(), data['batch'].max()+1), num_batches)
18
19
    # Make masks folder path if it doesn't already exist
20
    if not os.path.exists("batch_check"):
21
        os.mkdir("batch_check")
22
23
24
    for b in batch_list:
25
        subset = data[data['batch']==b]
26
        subset['new_name'] = data['case'] + "_" + data['day'] + "_slice" + data['slice_id'].astype(str)
27
        subset = subset[['image_path', 'new_name', 'mask_path']]
28
        path = os.path.join("batch_check", "batch" + str(b))
29
30
        if not os.path.exists(path):
31
            os.makedirs(path)
32
        else:
33
            shutil.rmtree(path)           # Removes all the subdirectories!
34
            os.makedirs(path)
35
        
36
        empty_count = 0 
37
        for _, row in subset.iterrows():
38
            image, name, mask_path = row
39
            # save images in new folder
40
            shutil.copy(image, os.path.join(path, name + ".png"))
41
42
            # check if mask is all black
43
            mask = np.load(mask_path)
44
            if mask.sum() == 0:
45
                empty_count += 1
46
        
47
        empty_pct = 100*empty_count / len(subset)
48
        print("For batch {}, {}% of masks are empty".format(b, empty_pct))
49
50
51
if __name__ == '__main__':
52
    # usage: python batch_check.py dataset_name.csv num_batches
53
    dataset = sys.argv[1]
54
    num_batches = int(sys.argv[2])
55
56
    batch_check(dataset, num_batches)
57
58