--- a
+++ b/generating_matched_tiles_msi_he.py
@@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr  9 14:34:02 2019
+
+@author: m.beuque
+"""
+import numpy as np
+import cv2
+from pyimzML.ImzmlParser import ImzMLParser
+from matplotlib import pyplot as plt
+from PIL import Image, ImageDraw
+from utils import extract_transform_matrix,process_spotlist,polygon_extraction
+from tqdm import tqdm
+import os
+
+def generator_msi_data(slide, plot, generate,main_path):
+    # main_path : initialize the paths with the MSI slide folder
+    
+    #find all necessary files
+    slide_files = os.listdir(os.path.join(main_path,slide))
+    sample_name = slide
+    path_slide = os.path.join(main_path, slide)
+    for file in slide_files:
+        if file.endswith('imzML'):
+            imzMLfile = os.path.join(path_slide,file)
+        elif file.endswith('mis'):
+            path_mis = os.path.join(path_slide,file)
+        elif file.endswith('txt'):
+            path_spotlist = os.path.join(path_slide,file)
+        elif file.endswith('jpg'):
+            path_jpg = os.path.join(path_slide,file)
+    histo_img = cv2.imread(path_jpg)
+    path_tiles_dir = os.path.join(path_slide, 'tiles')
+    if not os.path.exists(path_tiles_dir):
+        os.makedirs(path_tiles_dir)
+    
+    ##get informations from IMZML file
+    imzML = ImzMLParser(imzMLfile)
+    
+    nr_of_datapoints = len(imzML.mzLengths)
+    nr_of_spectra = imzML.mzLengths[0] 
+    array_coord = np.array(imzML.coordinates)
+    cols  = max(array_coord[:,0])
+    rows  = max(array_coord[:,1])
+    
+    ##reading and storing MSI data and read pixel coordinates
+    msidata = np.zeros((nr_of_datapoints,nr_of_spectra))
+    xy_positions = np.zeros((nr_of_datapoints, 2))
+    for idx, (x,y,z) in enumerate(imzML.coordinates):
+        mzs, intensities = imzML.getspectrum(idx)
+        msidata[idx, :] = intensities
+        xy_positions[idx,:]= [x, y]
+    msidata = np.transpose(msidata)
+    
+    ##calculate total ion count values for normalization
+    tics = np.sum(msidata,0)
+    canvas = np.zeros((rows,cols))
+    for i in range(nr_of_datapoints):
+        canvas[int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)] = tics[i]
+    
+    if plot : 
+        plt.figure(figsize=(12,12))
+        plt.imshow(np.squeeze(canvas))
+        plt.title('Canvas')
+        plt.legend()
+        plt.show()
+        
+    if plot : 
+        plt.figure(figsize=(12,12))
+        plt.imshow(np.squeeze(histo_img))
+        plt.title('Canvas')
+        plt.legend()
+        plt.show()
+        
+    #normalize msi_data
+    repeated_tics =np.repeat(tics, list( msidata.shape)[0], axis = 0)
+    repeated_tics = repeated_tics.reshape(msidata.shape)
+    msidata = np.divide(msidata, repeated_tics)
+    
+    del repeated_tics
+   
+    ## create geometric transformations
+    fixed_points_optical, moving_points_motor,moving_points_optical,fixed_points_histo = extract_transform_matrix(path_mis)
+    motor2optical = cv2.getAffineTransform(np.float32(moving_points_motor), np.float32(fixed_points_optical))
+    optical2histo =  cv2.getAffineTransform(np.float32(moving_points_optical), np.float32(fixed_points_histo))
+    
+    spotlist = process_spotlist(path_spotlist)
+    
+    moving_points_msi = np.float32(spotlist[:,[2,3]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]])
+    fixed_points_motor = np.float32(spotlist[:,[0,1]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]])
+    msi2motor = cv2.getAffineTransform(moving_points_msi, fixed_points_motor)
+    
+    new_msi2motor = np.concatenate((np.transpose(msi2motor), np.array([[0],[0],[1]])), axis = 1)
+    new_motor2optical = np.concatenate((np.transpose(motor2optical), np.array([[0],[0],[1]])), axis = 1)
+    new_optical2histo = np.concatenate((np.transpose(optical2histo), np.array([[0],[0],[1]])), axis = 1)
+    
+    
+    new_msi3Dhisto = np.dot(np.dot(new_msi2motor,new_motor2optical), new_optical2histo)
+    new_msi2Dhisto = new_msi3Dhisto[:,0:2]
+    
+    #overlay MSI image and histopathology image
+    img_rows, img_cols, ch = histo_img.shape
+    transform_img = cv2.warpAffine(canvas,np.transpose(new_msi2Dhisto),(img_cols,img_rows))
+    overlay = cv2.addWeighted(np.float64(histo_img[:,:,1]), 1, transform_img, 0.01, 0) 
+    
+    if plot :
+        plt.figure()
+        plt.imshow(transform_img)
+        plt.title('resized canvas')
+        plt.legend()
+        plt.show()
+        plt.figure()
+        plt.imshow(overlay)
+        plt.title('resized canvas and overlay')
+        plt.legend()
+        plt.show()
+    
+    #create dictionnary with raster number and label
+    dict_roi = polygon_extraction(path_mis)
+    # consider region of interest 
+    #find maximum coordinates for polygon
+    max_x = 0
+    max_y = 0
+    for i, poly in enumerate(list(dict_roi.keys())) : 
+        polygon = np.array(dict_roi[poly])
+        if max(polygon[:,0]) > max_x :
+            max_x = max(polygon[:,0])
+        if max(polygon[:,1]) > max_y :
+            max_y = max(polygon[:,1])
+            
+    img_test = Image.new('L', (max_x,max_y), 0)
+    for i, poly in enumerate(list(dict_roi.keys())) : 
+        polygon = dict_roi[poly]
+        ImageDraw.Draw(img_test).polygon(polygon, outline=i+1, fill=i+1)
+    mask = np.array(img_test)
+    transform_poly = cv2.warpAffine(mask,np.transpose(new_optical2histo[:,0:2]),(img_cols,img_rows))
+    
+    
+    if plot :
+        plt.figure()
+        plt.imshow(mask)
+        plt.title('drawing of all the regions of interest')
+        plt.legend()
+        plt.show()
+        plt.figure()
+        plt.imshow(transform_poly)
+        plt.title('drawing of all the regions of interest resized')
+        plt.legend()
+        plt.show()
+    
+    
+    if generate:
+        #create the information table
+        collect_points = []
+        for i in tqdm(range(nr_of_datapoints)):
+            y_ms, x_ms = int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)
+            x_histo = x_ms*np.transpose(new_msi2Dhisto)[1,0] + y_ms*np.transpose(new_msi2Dhisto)[1,1] + np.transpose(new_msi2Dhisto)[1,2]
+            y_histo = x_ms*np.transpose(new_msi2Dhisto)[0,0] + y_ms*np.transpose(new_msi2Dhisto)[0,1] + np.transpose(new_msi2Dhisto)[0,2]
+            collect_points.append([x_histo, y_histo])
+            
+        collect_points = np.array(collect_points)
+        labels = []
+        size_image = 96
+        tile = np.float64(histo_img)
+        name_rois =list( dict_roi.keys())
+        histo_img = np.float64(histo_img)
+        with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file:
+            header = list(np.array(mzs, dtype = 'U25')) + ['label', 'image_name', 'x', 'y']
+            header = ';'.join(header)
+            file.write(header)
+            file.write("\n")
+            file.close()
+        for i in tqdm(range(nr_of_datapoints)):
+            with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file:
+                if int(collect_points[i,1] - size_image//2) > 0 and int(collect_points[i,0] - size_image//2) > 0 and collect_points[i,1] < img_cols and collect_points[i,0] < img_rows:
+                    labeled_tile = transform_poly[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ]
+                    length, width = labeled_tile.shape
+                    temp_label = transform_poly[int(collect_points[i,0]), int(collect_points[i,1])] 
+                    labels.append(temp_label)
+                    if temp_label > 0:
+                        temp_title = "tile_"+ str(int(xy_positions[i,1]-1)) + "_" + str(int(xy_positions[i,0]-1)) + ".jpg"
+                        if not os.path.isfile(os.path.join(path_tiles_dir,temp_title)):
+                            cv2.imwrite(os.path.join(path_tiles_dir,temp_title), tile[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ])
+                        row = np.concatenate((msidata[:,i], np.array([name_rois[temp_label-1], temp_title, int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)])), axis = None)
+                        row = ';'.join(list(row))
+                        file.write(row)
+                        file.write("\n")
+                        file.close()
+        del histo_img
+    return('sample ' + sample_name + ' done')
+    
+
+main_path = '.'    
+for slide in os.listdir(main_path):
+    done = generator_msi_data(slide, plot = False, generate = True,main_path)
+    print(done)
\ No newline at end of file