--- a +++ b/mat_to_numpy.py @@ -0,0 +1,83 @@ +""" +Read all the Matlab files in the 'data' directory and export 3 numpy arrays: +- labels.npy +- images.npy +- masks.npy +Usage: python matlab_to_numpy.py ~/brain_tumor_dataset +""" + +import os +import argparse +import sys +import numpy as np +import hdf5storage +import cv2 + + +class NoDataFound(Exception): + pass + + +def dir_path(path): + """Check the path and the existence of a data directory""" + # replace '\' in path for Windows users + path = path.replace('\\', '/') + data_path = os.path.join(path, 'data').replace('\\', '/') + + if os.path.isdir(data_path): + return path + elif os.path.isdir(path): + raise NoDataFound('Could not find a "data" folder inside directory. {} does not exist.' + .format(data_path)) + else: + raise NotADirectoryError(path) + + +parser = argparse.ArgumentParser() +parser.add_argument('path', help='path to the brain_tumor_dataset directory', type=dir_path) +parser.add_argument('--image-dimension', '-d', default=512, help='dimension of the image', type=int) +args = parser.parse_args() + +labels = [] +images = [] +masks = [] + +data_dir = os.path.join(args.path, 'data').replace('\\', '/') +files = os.listdir(data_dir) +for i, file in enumerate(files, start=1): + if i % 10 == 0: + # print the percentage of images loaded + sys.stdout.write('\r[{}/{}] images loaded: {:.1f} %' + .format(i, len(files), i / float(len(files)) * 100)) + sys.stdout.flush() + + # load matlab file with hdf5storage as scipy.io.loadmat does not support v7.3 files + mat_file = hdf5storage.loadmat(os.path.join(data_dir, file))['cjdata'][0] + + # resize image and mask to a unique size + image = cv2.resize(mat_file[2], dsize=(args.image_dimension, args.image_dimension), + interpolation=cv2.INTER_CUBIC) + mask = cv2.resize(mat_file[4].astype('uint8'), dsize=(args.image_dimension, args.image_dimension), + interpolation=cv2.INTER_CUBIC) + + labels.append(int(mat_file[0])) + images.append(image) + masks.append(mask.astype(bool)) + +sys.stdout.write('\r[{}/{}] images loaded: {:.1f} %' + .format(i, len(files), i / float(len(files)) * 100)) +sys.stdout.flush() + +labels = np.array(labels) +images = np.array(images) +masks = np.array(masks) + +print('\nlabels:', labels.shape) +print('images:', images.shape) +print('masks:', masks.shape) + +np.save(os.path.join(args.path, 'labels.npy'), labels) +np.save(os.path.join(args.path, 'images.npy'), images) +np.save(os.path.join(args.path, 'masks.npy'), masks) + +print('labels.npy, images.npy, masks.npy saved in', args.path)