[bb7f56]: / experiments / lidc_exp / pack_dataset.py

Download this file

83 lines (65 with data), 2.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
from multiprocessing import Pool
import os
import subprocess
def get_case_identifiers(folder):
case_identifiers = [i[:-4] for i in os.listdir(folder) if i.endswith("npz")]
return case_identifiers
def convert_to_npy(npz_file):
if not os.path.isfile(npz_file[:-3] + "npy"):
a = np.load(npz_file)['data']
np.save(npz_file[:-3] + "npy", a)
def unpack_dataset(folder, threads=8):
case_identifiers = get_case_identifiers(folder)
p = Pool(threads)
npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers]
p.map(convert_to_npy, npz_files)
p.close()
p.join()
def delete_npy(folder):
case_identifiers = get_case_identifiers(folder)
npy_files = [os.path.join(folder, i + ".npy") for i in case_identifiers]
npy_files = [i for i in npy_files if os.path.isfile(i)]
for n in npy_files:
os.remove(n)
def mp_pack(inputs):
ix , f = inputs
file_path, source_dir, target_dir = f
print('packing file number: {}'.format(ix))
if 'npy' in file_path:
source_path = os.path.join(source_dir, file_path)
target_path = os.path.join(target_dir, file_path.split('.')[0] + '.npz')
arr = np.load(source_path, mmap_mode='r')
np.savez_compressed(target_path, data=arr)
print('target_path', target_path)
if __name__ == '__main__':
use_previous = False
source_dir = '/mnt/hdd2/lidc/test_pp_rounding/'
target_dir = '/mnt/hdd2/lidc/test_pp_rounding_packed/'
if use_previous:
file_list = [ii for ii in os.listdir(source_dir) if not ii in os.listdir(target_dir)]
else:
file_list = os.listdir(source_dir)
info_list = [[ii, source_dir, target_dir] for ii in file_list]
if not os.path.exists(target_dir):
os.mkdir(target_dir)
pool = Pool(processes=12)
p1 = pool.map(mp_pack, enumerate(info_list), chunksize=1)
pool.close()
pool.join()
subprocess.call('cp {} {}'.format(os.path.join(source_dir, 'info_df.pickle'), os.path.join(target_dir, 'info_df.pickle')), shell=True)