BleedDetection / Git / Diff of /experiments/lidc_exp/pack

Models:
DavidFeaster/
BleedDetection
Downloads: 1
Diff of /experiments/lidc_exp/pack_dataset.py [000000] .. [bb7f56]
Switch to side-by-side view

--- a
+++ b/experiments/lidc_exp/pack_dataset.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+from multiprocessing import Pool
+import os
+import subprocess
+
+
+def get_case_identifiers(folder):
+    case_identifiers = [i[:-4] for i in os.listdir(folder) if i.endswith("npz")]
+    return case_identifiers
+
+
+def convert_to_npy(npz_file):
+    if not os.path.isfile(npz_file[:-3] + "npy"):
+        a = np.load(npz_file)['data']
+        np.save(npz_file[:-3] + "npy", a)
+
+
+def unpack_dataset(folder, threads=8):
+    case_identifiers = get_case_identifiers(folder)
+    p = Pool(threads)
+    npz_files = [os.path.join(folder, i + ".npz") for i in case_identifiers]
+    p.map(convert_to_npy, npz_files)
+    p.close()
+    p.join()
+
+
+def delete_npy(folder):
+    case_identifiers = get_case_identifiers(folder)
+    npy_files = [os.path.join(folder, i + ".npy") for i in case_identifiers]
+    npy_files = [i for i in npy_files if os.path.isfile(i)]
+    for n in npy_files:
+        os.remove(n)
+
+
+def mp_pack(inputs):
+    ix , f = inputs
+    file_path, source_dir, target_dir = f
+    print('packing file number: {}'.format(ix))
+    if 'npy' in file_path:
+        source_path = os.path.join(source_dir, file_path)
+        target_path = os.path.join(target_dir, file_path.split('.')[0] + '.npz')
+        arr = np.load(source_path, mmap_mode='r')
+        np.savez_compressed(target_path, data=arr)
+        print('target_path', target_path)
+
+
+if __name__ == '__main__':
+
+    use_previous = False
+    source_dir = '/mnt/hdd2/lidc/test_pp_rounding/'
+    target_dir = '/mnt/hdd2/lidc/test_pp_rounding_packed/'
+
+    if use_previous:
+        file_list = [ii for ii in os.listdir(source_dir) if not ii in os.listdir(target_dir)]
+    else:
+        file_list = os.listdir(source_dir)
+    info_list = [[ii, source_dir, target_dir] for ii in file_list]
+
+    if not os.path.exists(target_dir):
+        os.mkdir(target_dir)
+
+    pool = Pool(processes=12)
+    p1 = pool.map(mp_pack, enumerate(info_list), chunksize=1)
+    pool.close()
+    pool.join()
+
+    subprocess.call('cp {} {}'.format(os.path.join(source_dir, 'info_df.pickle'), os.path.join(target_dir, 'info_df.pickle')), shell=True)
\ No newline at end of file