--- a
+++ b/matlab_to_numpy.py
@@ -0,0 +1,84 @@
+"""
+Read all the Matlab files in the 'data' directory and export 3 numpy arrays:
+- labels.npy
+- images.npy
+- masks.npy
+
+Usage: python matlab_to_numpy.py ~/brain_tumor_dataset
+"""
+
+import os
+import argparse
+import sys
+import numpy as np
+import hdf5storage
+import cv2
+
+
+class NoDataFound(Exception):
+    pass
+
+
+def dir_path(path):
+    """Check the path and the existence of a data directory"""
+    # replace '\' in path for Windows users
+    path = path.replace('\\', '/')
+    data_path = os.path.join(path, 'data').replace('\\', '/')
+
+    if os.path.isdir(data_path):
+        return path
+    elif os.path.isdir(path):
+        raise NoDataFound('Could not find a "data" folder inside directory. {} does not exist.'
+                          .format(data_path))
+    else:
+        raise NotADirectoryError(path)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('path', help='path to the brain_tumor_dataset directory', type=dir_path)
+parser.add_argument('--image-dimension', '-d', default=512, help='dimension of the image', type=int)
+args = parser.parse_args()
+
+labels = []
+images = []
+masks = []
+
+data_dir = os.path.join(args.path, 'data').replace('\\', '/')
+files = os.listdir(data_dir)
+for i, file in enumerate(files, start=1):
+    if i % 10 == 0:
+        # print the percentage of images loaded
+        sys.stdout.write('\r[{}/{}] images loaded: {:.1f} %'
+                         .format(i, len(files), i / float(len(files)) * 100))
+        sys.stdout.flush()
+
+    # load matlab file with hdf5storage as scipy.io.loadmat does not support v7.3 files
+    mat_file = hdf5storage.loadmat(os.path.join(data_dir, file))['cjdata'][0]
+
+    # resize image and mask to a unique size
+    image = cv2.resize(mat_file[2], dsize=(args.image_dimension, args.image_dimension),
+                       interpolation=cv2.INTER_CUBIC)
+    mask = cv2.resize(mat_file[4].astype('uint8'), dsize=(args.image_dimension, args.image_dimension),
+                      interpolation=cv2.INTER_CUBIC)
+
+    labels.append(int(mat_file[0]))
+    images.append(image)
+    masks.append(mask.astype(bool))
+
+sys.stdout.write('\r[{}/{}] images loaded: {:.1f} %'
+                 .format(i, len(files), i / float(len(files)) * 100))
+sys.stdout.flush()
+
+labels = np.array(labels)
+images = np.array(images)
+masks = np.array(masks)
+
+print('\nlabels:', labels.shape)
+print('images:', images.shape)
+print('masks:', masks.shape)
+
+np.save(os.path.join(args.path, 'labels.npy'), labels)
+np.save(os.path.join(args.path, 'images.npy'), images)
+np.save(os.path.join(args.path, 'masks.npy'), masks)
+
+print('labels.npy, images.npy, masks.npy saved in', args.path)