[d986f2]: / experiments / toy_exp / generate_toys.py

Download this file

139 lines (107 with data), 5.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python
# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os, time
import numpy as np
import pandas as pd
import pickle
import argparse
from multiprocessing import Pool
DO_MP = True
def create_image(out_dir, six, foreground_margin, class_diameters, mode, noisy_bg):
print('\rprocessing {} {}'.format(out_dir, six), end="", flush=True)
img = np.random.rand(320, 320) if noisy_bg else np.zeros((320, 320))
seg = np.zeros((320, 320)).astype('uint8')
center_x = np.random.randint(foreground_margin, img.shape[0] - foreground_margin)
center_y = np.random.randint(foreground_margin, img.shape[1] - foreground_margin)
class_id = np.random.randint(0, 2)
for y in range(img.shape[0]):
for x in range(img.shape[0]):
if ((x - center_x) ** 2 + (y - center_y) ** 2 - class_diameters[class_id] ** 2) < 0:
img[y][x] += 0.2
seg[y][x] = 1
if 'donuts' in mode:
hole_diameter = 4
if class_id == 1:
for y in range(img.shape[0]):
for x in range(img.shape[0]):
if ((x - center_x) ** 2 + (y - center_y) ** 2 - hole_diameter ** 2) < 0:
img[y][x] -= 0.2
if mode == 'donuts_shape':
seg[y][x] = 0
out = np.concatenate((img[None], seg[None]))
out_path = os.path.join(out_dir, '{}.npy'.format(six))
np.save(out_path, out)
with open(os.path.join(out_dir, 'meta_info_{}.pickle'.format(six)), 'wb') as handle:
pickle.dump([out_path, class_id, str(six)], handle)
def generate_dataset(cf, exp_name, n_train_images, n_test_images, mode, class_diameters=(20, 20), noisy_bg=False):
train_dir = os.path.join(cf.root_dir, exp_name, 'train')
test_dir = os.path.join(cf.root_dir, exp_name, 'test')
if os.path.isdir(train_dir) or os.path.isdir(test_dir):
raise Exception("A dataset directory already exists at {}. ".format(cf.root_dir)+
"Please make sure to generate data in an empty or new directory.")
os.makedirs(train_dir, exist_ok=False)
os.makedirs(test_dir, exist_ok=False)
# enforced distance between object center and image edge.
foreground_margin = int(np.ceil(np.max(class_diameters) / 1.25))
info = []
info += [[train_dir, six, foreground_margin, class_diameters, mode, noisy_bg] for six in range(n_train_images)]
info += [[test_dir, six, foreground_margin, class_diameters, mode, noisy_bg] for six in range(n_test_images)]
print('starting creation of {} images'.format(len(info)))
if DO_MP:
pool = Pool(processes=os.cpu_count()-1)
pool.starmap(create_image, info, chunksize=1)
pool.close()
pool.join()
else:
for inputs in info:
create_image(*inputs)
print()
aggregate_meta_info(train_dir)
aggregate_meta_info(test_dir)
def aggregate_meta_info(exp_dir):
files = [os.path.join(exp_dir, f) for f in os.listdir(exp_dir) if 'meta_info' in f]
df = pd.DataFrame(columns=['path', 'class_id', 'pid'])
for f in files:
with open(f, 'rb') as handle:
df.loc[len(df)] = pickle.load(handle)
df.to_pickle(os.path.join(exp_dir, 'info_df.pickle'))
print("aggregated meta info to df with length", len(df))
if __name__ == '__main__':
stime = time.time()
import sys
sys.path.append("../..")
import utils.exp_utils as utils
parser = argparse.ArgumentParser()
mode_choices = ['donuts_shape', 'donuts_pattern', 'circles_scale']
parser.add_argument('-m', '--modes', nargs='+', type=str, default=mode_choices, choices=mode_choices)
parser.add_argument('--noise', action='store_true', help="if given, add noise to the sample bg.")
parser.add_argument('--n_train', type=int, default=2500, help="Nr. of train images to generate.")
parser.add_argument('--n_test', type=int, default=1000, help="Nr. of test images to generate.")
args = parser.parse_args()
cf_file = utils.import_module("cf", "configs.py")
cf = cf_file.configs()
class_diameters = {
'donuts_shape': (20, 20),
'donuts_pattern': (20, 20),
'circles_scale': (19, 20)
}
for mode in args.modes:
generate_dataset(cf, mode + ("_noise" if args.noise else ""), n_train_images=args.n_train, n_test_images=args.n_test, mode=mode,
class_diameters=class_diameters[mode], noisy_bg=args.noise)
mins, secs = divmod((time.time() - stime), 60)
h, mins = divmod(mins, 60)
t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs))
print("{} total runtime: {}".format(os.path.split(__file__)[1], t))