|
a |
|
b/sandbox/validation_splits.py |
|
|
1 |
import pathfinder |
|
|
2 |
import glob |
|
|
3 |
import random |
|
|
4 |
import utils_lung |
|
|
5 |
import utils |
|
|
6 |
|
|
|
7 |
VALIDATION_SET_SIZE = 0.2 |
|
|
8 |
|
|
|
9 |
|
|
|
10 |
def read_split(path): |
|
|
11 |
d = utils.load_pkl(path) |
|
|
12 |
print d['valid'] |
|
|
13 |
# print d['train'] |
|
|
14 |
|
|
|
15 |
|
|
|
16 |
def make_luna_validation_split(): |
|
|
17 |
luna_path = pathfinder.LUNA_DATA_PATH |
|
|
18 |
file_list = sorted(glob.glob(luna_path + "/*.mhd")) |
|
|
19 |
random.seed(317070) |
|
|
20 |
all_pids = [utils_lung.extract_pid_filename(f) for f in file_list] |
|
|
21 |
validation_pids = random.sample(all_pids, int(VALIDATION_SET_SIZE * len(file_list))) |
|
|
22 |
train_pids = list(set(all_pids) - set(validation_pids)) |
|
|
23 |
d = {} |
|
|
24 |
d['valid'] = validation_pids |
|
|
25 |
d['train'] = train_pids |
|
|
26 |
utils.save_pkl(d, pathfinder.LUNA_VALIDATION_SPLIT_PATH) |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
def make_kaggle_validation_split(): |
|
|
30 |
pass |
|
|
31 |
|
|
|
32 |
|
|
|
33 |
if __name__ == '__main__': |
|
|
34 |
make_luna_validation_split() |
|
|
35 |
read_split(pathfinder.LUNA_VALIDATION_SPLIT_PATH) |