--- a
+++ b/Segmentation/load_datasets.py
@@ -0,0 +1,237 @@
+import os
+import pandas as pd
+from glob2 import glob
+from tqdm import tqdm
+from Segmentation.model_keras import *
+from Radiomics.Nifti_float_to_16bit import get_foldernames, get_filenames
+from adaptive_hist import adaptive_hist_eq_3D
+
+
+def load_filenames(base_path):
+
+    # Get subdirs
+    subdirs = get_foldernames(base_path)
+
+    # Get filenames
+    image1T1_regexp = '*1_T1.nii'
+    image1T1c_regexp = '*1_T1*C*'
+    image1T2_regexp = '*1_T2*'
+    image2T1_regexp = '*2_T1.nii'
+    image2T1c_regexp = '*2_T1*C*'
+    image2T2_regexp = '*2_T2*'
+    label1_regexp = '*1-label.nii'
+    label2_regexp = '*2-label.nii'
+
+    image1T1 = [get_filenames(sub, image1T1_regexp) for sub in subdirs]
+    image1T1c = [get_filenames(sub, image1T1c_regexp) for sub in subdirs]
+    image1T2 = [get_filenames(sub, image1T2_regexp) for sub in subdirs]
+    image2T1 = [get_filenames(sub, image2T1_regexp) for sub in subdirs]
+    image2T1c = [get_filenames(sub, image2T1c_regexp) for sub in subdirs]
+    image2T2 = [get_filenames(sub, image2T2_regexp) for sub in subdirs]
+    label1 = [get_filenames(sub, label1_regexp) for sub in subdirs]
+    label2 = [get_filenames(sub, label2_regexp) for sub in subdirs]
+
+    # Concatenate lists
+    imageT1 = image1T1 + image2T1
+    imageT1c = image1T1c + image2T1c
+    imageT2 = image1T2 + image2T2
+    label = label1 + label2
+
+    # Flatten the list of filenames
+    imageT1 = [y for x in imageT1 for y in x]
+    imageT1c = [y for x in imageT1c for y in x]
+    imageT2 = [y for x in imageT2 for y in x]
+    label = [y for x in label for y in x]
+
+    # Check for consistency
+    import difflib
+
+    for i in range(len(imageT1)):
+
+        t1t2_seq = difflib.SequenceMatcher(None, imageT1[i].lower(), imageT2[
+            i].lower())
+        t1t1c_seq = difflib.SequenceMatcher(None, imageT1[i].lower(), imageT1c[
+            i].lower())
+        t1lab_seq = difflib.SequenceMatcher(None, imageT1[i].lower(), label[
+            i].lower())
+
+        seq = [t1t2_seq.ratio(), t1t1c_seq.ratio(), t1lab_seq.ratio()]
+        seqbool = [i < 0.85 for i in seq]
+        if any(seqbool):
+            print('File mismatch found!')
+            print('Ratio: ', seq)
+            print('%s\t%s\t%s\t%s' % (os.path.split(imageT1[i])[1],
+                                      os.path.split(imageT1c[i])[1],
+                                      os.path.split(imageT2[i])[1],
+                                      os.path.split(label[i])[1]))
+
+        filenames = []
+        for i in range(len(imageT1)):
+
+            filenames.append([imageT1[i], imageT1c[i], imageT2[i], label[i]])
+
+        print('%d files found!' % len(filenames))
+
+        return filenames
+
+
+def load_filenames_2nd(base_path, seg_list='/home/matt/Documents/SegSarcoma/anna_segmentations.txt'):
+
+    # Load first dataset - Stephanie
+    # Get subdirs
+    subdirs = get_foldernames(base_path)
+
+    # Get filenames
+    image1T1_regexp = '*1_T1.nii'
+    image1T1c_regexp = '*1_T1*C*'
+    image1T2_regexp = '*1_T2*'
+    image2T1_regexp = '*2_T1.nii'
+    image2T1c_regexp = '*2_T1*C*'
+    image2T2_regexp = '*2_T2*'
+    label1_regexp = '*1-label.nii'
+    label2_regexp = '*2-label.nii'
+
+    image1T1 = [get_filenames(sub, image1T1_regexp) for sub in subdirs]
+    image1T1c = [get_filenames(sub, image1T1c_regexp) for sub in subdirs]
+    image1T2 = [get_filenames(sub, image1T2_regexp) for sub in subdirs]
+    image2T1 = [get_filenames(sub, image2T1_regexp) for sub in subdirs]
+    image2T1c = [get_filenames(sub, image2T1c_regexp) for sub in subdirs]
+    image2T2 = [get_filenames(sub, image2T2_regexp) for sub in subdirs]
+    label1 = [get_filenames(sub, label1_regexp) for sub in subdirs]
+    label2 = [get_filenames(sub, label2_regexp) for sub in subdirs]
+
+    # Concatenate lists
+    imageT1 = image1T1 + image2T1
+    imageT1c = image1T1c + image2T1c
+    imageT2 = image1T2 + image2T2
+    label = label1 + label2
+
+    # Flatten the list of filenames
+    imageT1 = [y for x in imageT1 for y in x]
+    imageT1c = [y for x in imageT1c for y in x]
+    imageT2 = [y for x in imageT2 for y in x]
+    label = [y for x in label for y in x]
+
+    # Initialize and append to the list of filenames
+    filenames = []
+    for ii in range(len(imageT1)):
+
+        filenames.append([imageT1[ii], imageT1c[ii], imageT2[ii], label[ii]])
+
+    print('First batch of segmentations\n\t%d files found!' % len(filenames))
+
+
+    ######### Load 2nd dataset - Ana #########
+
+    # Load a list of fixed segmentation files
+    fname = seg_list
+    with open(fname, 'r') as f:
+        seg_files = f.readlines()
+
+    # Clean filenames
+    seg_files = [i.strip() for i in seg_files]
+
+    print('Second batch of segmentations\n\t{} files found!'.format(len(seg_files)))
+
+    # Loop over segmentations
+    for seg_file in seg_files:
+
+        # Split the path name
+        path, _ = os.path.split(seg_file)
+
+        # Get image names
+        imageT1 = glob(os.path.join(path, 'T1.nii*'))[0]
+        imageT1c = glob(os.path.join(path, 'T1c.nii*'))[0]
+        imageT2 = glob(os.path.join(path, 'T2_cor.nii*'))[0]
+
+        # Append names to the list of files
+        filenames.append([imageT1, imageT1c, imageT2, seg_file])
+
+    print('Total: %d files!' % len(filenames))
+
+    return filenames
+
+
+def load_data(filenames, block_size, oversamp, lab_trun, adptive_hist=False):
+    # print('Loading datasets')
+    X = np.empty(shape=(0, 0, 0, 0, 0))
+    for file in tqdm(filenames, ncols=100):
+
+        tmpx = load_data_3D(file[:-1])
+        sz = tmpx.shape
+
+        tmpy = load_label_3D(file[-1], sz)
+
+        # import matplotlib
+        # matplotlib.use('TkAgg')
+        # plt.figure()
+        # plt.imshow(np.concatenate((tmpx[35, :, :, 0].T, tmpy[35, :, :, 0].T), axis=1), cmap='gray')
+        # plt.show()
+
+        # If the image dimensions of the label and inputs match proceed
+        if tmpx.shape[:3] == tmpy.shape[:3]:
+
+            # Crop the data
+            mask = np.zeros(shape=(sz[1]), dtype=bool)
+            mask[30:-30] = True
+            tmpx = tmpx[:, :, mask, :]
+            tmpy = tmpy[:, :, mask, :]
+            orig_size = tmpx.shape
+
+            # Adaptive histogram equalization
+            if adptive_hist:
+                tmpx = adaptive_hist_eq_3D(tmpx, patch_size=[20, 20, 20])
+
+            tmpx, tmpy = make_train_3D(tmpx, tmpy, block_size=block_size,
+                                 oversamp=oversamp,
+                                 lab_trun=lab_trun)
+
+            try:
+                X = np.concatenate((X, tmpx), axis=0)
+                Y = np.concatenate((Y, tmpy), axis=0)
+            except ValueError:
+                X = tmpx
+                Y = tmpy
+
+    return X, Y, orig_size
+
+
+def keep_t2(filenames):
+    """
+    Keeps only filenames which pertain to T2 weighted images and labels
+    Args:
+        filenames (list): a list of a list of filenames. T2 images are found
+            in indexes filenames[i][2].
+
+    Returns:
+        list of list: a list with len(filenames). Each element is a list of
+            two strings, T2 and label images.
+
+    """
+
+    files = []
+
+    for file in filenames:
+
+        files.append(file[2:])
+
+    return files
+
+
+if __name__ == "__main__":
+
+    image_base_path = '/media/matt/Seagate Expansion Drive/MR Data/MR_Images_Sarcoma'
+
+    filenames = load_filenames_2nd(base_path=image_base_path)
+
+    epochs = 600
+    batch_size = 20
+    block_size = [18, 142, 142]
+    oversamp = 1.0
+    oversamp_test = 1.0
+    lab_trun = 2
+    im_freq = 50
+    val_split = 0.2
+    test_split = 0.1
+    lr = 2e-4
+    X, Y, orig_sz = load_data(filenames, block_size, oversamp, lab_trun)