ML-and-MS-in-esophageal-c / Git / [0fa88a] /generating_matched_tiles_msi

Models:
DanielG/
ML-and-MS-in-esophageal-c
Downloads: 1
[0fa88a]: / generating_matched_tiles_msi_he.py
History
Download this file
196 lines (168 with data), 8.7 kB

# -*- coding: utf-8 -*-
"""
Created on Tue Apr  9 14:34:02 2019

@author: m.beuque
"""
import numpy as np
import cv2
from pyimzML.ImzmlParser import ImzMLParser
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw
from utils import extract_transform_matrix,process_spotlist,polygon_extraction
from tqdm import tqdm
import os

def generator_msi_data(slide, plot, generate,main_path):
    # main_path : initialize the paths with the MSI slide folder
    
    #find all necessary files
    slide_files = os.listdir(os.path.join(main_path,slide))
    sample_name = slide
    path_slide = os.path.join(main_path, slide)
    for file in slide_files:
        if file.endswith('imzML'):
            imzMLfile = os.path.join(path_slide,file)
        elif file.endswith('mis'):
            path_mis = os.path.join(path_slide,file)
        elif file.endswith('txt'):
            path_spotlist = os.path.join(path_slide,file)
        elif file.endswith('jpg'):
            path_jpg = os.path.join(path_slide,file)
    histo_img = cv2.imread(path_jpg)
    path_tiles_dir = os.path.join(path_slide, 'tiles')
    if not os.path.exists(path_tiles_dir):
        os.makedirs(path_tiles_dir)
    
    ##get informations from IMZML file
    imzML = ImzMLParser(imzMLfile)
    
    nr_of_datapoints = len(imzML.mzLengths)
    nr_of_spectra = imzML.mzLengths[0] 
    array_coord = np.array(imzML.coordinates)
    cols  = max(array_coord[:,0])
    rows  = max(array_coord[:,1])
    
    ##reading and storing MSI data and read pixel coordinates
    msidata = np.zeros((nr_of_datapoints,nr_of_spectra))
    xy_positions = np.zeros((nr_of_datapoints, 2))
    for idx, (x,y,z) in enumerate(imzML.coordinates):
        mzs, intensities = imzML.getspectrum(idx)
        msidata[idx, :] = intensities
        xy_positions[idx,:]= [x, y]
    msidata = np.transpose(msidata)
    
    ##calculate total ion count values for normalization
    tics = np.sum(msidata,0)
    canvas = np.zeros((rows,cols))
    for i in range(nr_of_datapoints):
        canvas[int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)] = tics[i]
    
    if plot : 
        plt.figure(figsize=(12,12))
        plt.imshow(np.squeeze(canvas))
        plt.title('Canvas')
        plt.legend()
        plt.show()
        
    if plot : 
        plt.figure(figsize=(12,12))
        plt.imshow(np.squeeze(histo_img))
        plt.title('Canvas')
        plt.legend()
        plt.show()
        
    #normalize msi_data
    repeated_tics =np.repeat(tics, list( msidata.shape)[0], axis = 0)
    repeated_tics = repeated_tics.reshape(msidata.shape)
    msidata = np.divide(msidata, repeated_tics)
    
    del repeated_tics
   
    ## create geometric transformations
    fixed_points_optical, moving_points_motor,moving_points_optical,fixed_points_histo = extract_transform_matrix(path_mis)
    motor2optical = cv2.getAffineTransform(np.float32(moving_points_motor), np.float32(fixed_points_optical))
    optical2histo =  cv2.getAffineTransform(np.float32(moving_points_optical), np.float32(fixed_points_histo))
    
    spotlist = process_spotlist(path_spotlist)
    
    moving_points_msi = np.float32(spotlist[:,[2,3]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]])
    fixed_points_motor = np.float32(spotlist[:,[0,1]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]])
    msi2motor = cv2.getAffineTransform(moving_points_msi, fixed_points_motor)
    
    new_msi2motor = np.concatenate((np.transpose(msi2motor), np.array([[0],[0],[1]])), axis = 1)
    new_motor2optical = np.concatenate((np.transpose(motor2optical), np.array([[0],[0],[1]])), axis = 1)
    new_optical2histo = np.concatenate((np.transpose(optical2histo), np.array([[0],[0],[1]])), axis = 1)
    
    
    new_msi3Dhisto = np.dot(np.dot(new_msi2motor,new_motor2optical), new_optical2histo)
    new_msi2Dhisto = new_msi3Dhisto[:,0:2]
    
    #overlay MSI image and histopathology image
    img_rows, img_cols, ch = histo_img.shape
    transform_img = cv2.warpAffine(canvas,np.transpose(new_msi2Dhisto),(img_cols,img_rows))
    overlay = cv2.addWeighted(np.float64(histo_img[:,:,1]), 1, transform_img, 0.01, 0) 
    
    if plot :
        plt.figure()
        plt.imshow(transform_img)
        plt.title('resized canvas')
        plt.legend()
        plt.show()
        plt.figure()
        plt.imshow(overlay)
        plt.title('resized canvas and overlay')
        plt.legend()
        plt.show()
    
    #create dictionnary with raster number and label
    dict_roi = polygon_extraction(path_mis)
    # consider region of interest 
    #find maximum coordinates for polygon
    max_x = 0
    max_y = 0
    for i, poly in enumerate(list(dict_roi.keys())) : 
        polygon = np.array(dict_roi[poly])
        if max(polygon[:,0]) > max_x :
            max_x = max(polygon[:,0])
        if max(polygon[:,1]) > max_y :
            max_y = max(polygon[:,1])
            
    img_test = Image.new('L', (max_x,max_y), 0)
    for i, poly in enumerate(list(dict_roi.keys())) : 
        polygon = dict_roi[poly]
        ImageDraw.Draw(img_test).polygon(polygon, outline=i+1, fill=i+1)
    mask = np.array(img_test)
    transform_poly = cv2.warpAffine(mask,np.transpose(new_optical2histo[:,0:2]),(img_cols,img_rows))
    
    
    if plot :
        plt.figure()
        plt.imshow(mask)
        plt.title('drawing of all the regions of interest')
        plt.legend()
        plt.show()
        plt.figure()
        plt.imshow(transform_poly)
        plt.title('drawing of all the regions of interest resized')
        plt.legend()
        plt.show()
    
    
    if generate:
        #create the information table
        collect_points = []
        for i in tqdm(range(nr_of_datapoints)):
            y_ms, x_ms = int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)
            x_histo = x_ms*np.transpose(new_msi2Dhisto)[1,0] + y_ms*np.transpose(new_msi2Dhisto)[1,1] + np.transpose(new_msi2Dhisto)[1,2]
            y_histo = x_ms*np.transpose(new_msi2Dhisto)[0,0] + y_ms*np.transpose(new_msi2Dhisto)[0,1] + np.transpose(new_msi2Dhisto)[0,2]
            collect_points.append([x_histo, y_histo])
            
        collect_points = np.array(collect_points)
        labels = []
        size_image = 96
        tile = np.float64(histo_img)
        name_rois =list( dict_roi.keys())
        histo_img = np.float64(histo_img)
        with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file:
            header = list(np.array(mzs, dtype = 'U25')) + ['label', 'image_name', 'x', 'y']
            header = ';'.join(header)
            file.write(header)
            file.write("\n")
            file.close()
        for i in tqdm(range(nr_of_datapoints)):
            with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file:
                if int(collect_points[i,1] - size_image//2) > 0 and int(collect_points[i,0] - size_image//2) > 0 and collect_points[i,1] < img_cols and collect_points[i,0] < img_rows:
                    labeled_tile = transform_poly[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ]
                    length, width = labeled_tile.shape
                    temp_label = transform_poly[int(collect_points[i,0]), int(collect_points[i,1])] 
                    labels.append(temp_label)
                    if temp_label > 0:
                        temp_title = "tile_"+ str(int(xy_positions[i,1]-1)) + "_" + str(int(xy_positions[i,0]-1)) + ".jpg"
                        if not os.path.isfile(os.path.join(path_tiles_dir,temp_title)):
                            cv2.imwrite(os.path.join(path_tiles_dir,temp_title), tile[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ])
                        row = np.concatenate((msidata[:,i], np.array([name_rois[temp_label-1], temp_title, int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)])), axis = None)
                        row = ';'.join(list(row))
                        file.write(row)
                        file.write("\n")
                        file.close()
        del histo_img
    return('sample ' + sample_name + ' done')
    

main_path = '.'    
for slide in os.listdir(main_path):
    done = generator_msi_data(slide, plot = False, generate = True,main_path)
    print(done)