[70b6b3]: / sandbox / validation_splits.py

Download this file

36 lines (26 with data), 873 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pathfinder
import glob
import random
import utils_lung
import utils
VALIDATION_SET_SIZE = 0.2
def read_split(path):
d = utils.load_pkl(path)
print d['valid']
# print d['train']
def make_luna_validation_split():
luna_path = pathfinder.LUNA_DATA_PATH
file_list = sorted(glob.glob(luna_path + "/*.mhd"))
random.seed(317070)
all_pids = [utils_lung.extract_pid_filename(f) for f in file_list]
validation_pids = random.sample(all_pids, int(VALIDATION_SET_SIZE * len(file_list)))
train_pids = list(set(all_pids) - set(validation_pids))
d = {}
d['valid'] = validation_pids
d['train'] = train_pids
utils.save_pkl(d, pathfinder.LUNA_VALIDATION_SPLIT_PATH)
def make_kaggle_validation_split():
pass
if __name__ == '__main__':
make_luna_validation_split()
read_split(pathfinder.LUNA_VALIDATION_SPLIT_PATH)