|
a |
|
b/generating_matched_tiles_msi_he.py |
|
|
1 |
# -*- coding: utf-8 -*- |
|
|
2 |
""" |
|
|
3 |
Created on Tue Apr 9 14:34:02 2019 |
|
|
4 |
|
|
|
5 |
@author: m.beuque |
|
|
6 |
""" |
|
|
7 |
import numpy as np |
|
|
8 |
import cv2 |
|
|
9 |
from pyimzML.ImzmlParser import ImzMLParser |
|
|
10 |
from matplotlib import pyplot as plt |
|
|
11 |
from PIL import Image, ImageDraw |
|
|
12 |
from utils import extract_transform_matrix,process_spotlist,polygon_extraction |
|
|
13 |
from tqdm import tqdm |
|
|
14 |
import os |
|
|
15 |
|
|
|
16 |
def generator_msi_data(slide, plot, generate,main_path): |
|
|
17 |
# main_path : initialize the paths with the MSI slide folder |
|
|
18 |
|
|
|
19 |
#find all necessary files |
|
|
20 |
slide_files = os.listdir(os.path.join(main_path,slide)) |
|
|
21 |
sample_name = slide |
|
|
22 |
path_slide = os.path.join(main_path, slide) |
|
|
23 |
for file in slide_files: |
|
|
24 |
if file.endswith('imzML'): |
|
|
25 |
imzMLfile = os.path.join(path_slide,file) |
|
|
26 |
elif file.endswith('mis'): |
|
|
27 |
path_mis = os.path.join(path_slide,file) |
|
|
28 |
elif file.endswith('txt'): |
|
|
29 |
path_spotlist = os.path.join(path_slide,file) |
|
|
30 |
elif file.endswith('jpg'): |
|
|
31 |
path_jpg = os.path.join(path_slide,file) |
|
|
32 |
histo_img = cv2.imread(path_jpg) |
|
|
33 |
path_tiles_dir = os.path.join(path_slide, 'tiles') |
|
|
34 |
if not os.path.exists(path_tiles_dir): |
|
|
35 |
os.makedirs(path_tiles_dir) |
|
|
36 |
|
|
|
37 |
##get informations from IMZML file |
|
|
38 |
imzML = ImzMLParser(imzMLfile) |
|
|
39 |
|
|
|
40 |
nr_of_datapoints = len(imzML.mzLengths) |
|
|
41 |
nr_of_spectra = imzML.mzLengths[0] |
|
|
42 |
array_coord = np.array(imzML.coordinates) |
|
|
43 |
cols = max(array_coord[:,0]) |
|
|
44 |
rows = max(array_coord[:,1]) |
|
|
45 |
|
|
|
46 |
##reading and storing MSI data and read pixel coordinates |
|
|
47 |
msidata = np.zeros((nr_of_datapoints,nr_of_spectra)) |
|
|
48 |
xy_positions = np.zeros((nr_of_datapoints, 2)) |
|
|
49 |
for idx, (x,y,z) in enumerate(imzML.coordinates): |
|
|
50 |
mzs, intensities = imzML.getspectrum(idx) |
|
|
51 |
msidata[idx, :] = intensities |
|
|
52 |
xy_positions[idx,:]= [x, y] |
|
|
53 |
msidata = np.transpose(msidata) |
|
|
54 |
|
|
|
55 |
##calculate total ion count values for normalization |
|
|
56 |
tics = np.sum(msidata,0) |
|
|
57 |
canvas = np.zeros((rows,cols)) |
|
|
58 |
for i in range(nr_of_datapoints): |
|
|
59 |
canvas[int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)] = tics[i] |
|
|
60 |
|
|
|
61 |
if plot : |
|
|
62 |
plt.figure(figsize=(12,12)) |
|
|
63 |
plt.imshow(np.squeeze(canvas)) |
|
|
64 |
plt.title('Canvas') |
|
|
65 |
plt.legend() |
|
|
66 |
plt.show() |
|
|
67 |
|
|
|
68 |
if plot : |
|
|
69 |
plt.figure(figsize=(12,12)) |
|
|
70 |
plt.imshow(np.squeeze(histo_img)) |
|
|
71 |
plt.title('Canvas') |
|
|
72 |
plt.legend() |
|
|
73 |
plt.show() |
|
|
74 |
|
|
|
75 |
#normalize msi_data |
|
|
76 |
repeated_tics =np.repeat(tics, list( msidata.shape)[0], axis = 0) |
|
|
77 |
repeated_tics = repeated_tics.reshape(msidata.shape) |
|
|
78 |
msidata = np.divide(msidata, repeated_tics) |
|
|
79 |
|
|
|
80 |
del repeated_tics |
|
|
81 |
|
|
|
82 |
## create geometric transformations |
|
|
83 |
fixed_points_optical, moving_points_motor,moving_points_optical,fixed_points_histo = extract_transform_matrix(path_mis) |
|
|
84 |
motor2optical = cv2.getAffineTransform(np.float32(moving_points_motor), np.float32(fixed_points_optical)) |
|
|
85 |
optical2histo = cv2.getAffineTransform(np.float32(moving_points_optical), np.float32(fixed_points_histo)) |
|
|
86 |
|
|
|
87 |
spotlist = process_spotlist(path_spotlist) |
|
|
88 |
|
|
|
89 |
moving_points_msi = np.float32(spotlist[:,[2,3]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]]) |
|
|
90 |
fixed_points_motor = np.float32(spotlist[:,[0,1]][[0, int(nr_of_datapoints/2), nr_of_datapoints -1]]) |
|
|
91 |
msi2motor = cv2.getAffineTransform(moving_points_msi, fixed_points_motor) |
|
|
92 |
|
|
|
93 |
new_msi2motor = np.concatenate((np.transpose(msi2motor), np.array([[0],[0],[1]])), axis = 1) |
|
|
94 |
new_motor2optical = np.concatenate((np.transpose(motor2optical), np.array([[0],[0],[1]])), axis = 1) |
|
|
95 |
new_optical2histo = np.concatenate((np.transpose(optical2histo), np.array([[0],[0],[1]])), axis = 1) |
|
|
96 |
|
|
|
97 |
|
|
|
98 |
new_msi3Dhisto = np.dot(np.dot(new_msi2motor,new_motor2optical), new_optical2histo) |
|
|
99 |
new_msi2Dhisto = new_msi3Dhisto[:,0:2] |
|
|
100 |
|
|
|
101 |
#overlay MSI image and histopathology image |
|
|
102 |
img_rows, img_cols, ch = histo_img.shape |
|
|
103 |
transform_img = cv2.warpAffine(canvas,np.transpose(new_msi2Dhisto),(img_cols,img_rows)) |
|
|
104 |
overlay = cv2.addWeighted(np.float64(histo_img[:,:,1]), 1, transform_img, 0.01, 0) |
|
|
105 |
|
|
|
106 |
if plot : |
|
|
107 |
plt.figure() |
|
|
108 |
plt.imshow(transform_img) |
|
|
109 |
plt.title('resized canvas') |
|
|
110 |
plt.legend() |
|
|
111 |
plt.show() |
|
|
112 |
plt.figure() |
|
|
113 |
plt.imshow(overlay) |
|
|
114 |
plt.title('resized canvas and overlay') |
|
|
115 |
plt.legend() |
|
|
116 |
plt.show() |
|
|
117 |
|
|
|
118 |
#create dictionnary with raster number and label |
|
|
119 |
dict_roi = polygon_extraction(path_mis) |
|
|
120 |
# consider region of interest |
|
|
121 |
#find maximum coordinates for polygon |
|
|
122 |
max_x = 0 |
|
|
123 |
max_y = 0 |
|
|
124 |
for i, poly in enumerate(list(dict_roi.keys())) : |
|
|
125 |
polygon = np.array(dict_roi[poly]) |
|
|
126 |
if max(polygon[:,0]) > max_x : |
|
|
127 |
max_x = max(polygon[:,0]) |
|
|
128 |
if max(polygon[:,1]) > max_y : |
|
|
129 |
max_y = max(polygon[:,1]) |
|
|
130 |
|
|
|
131 |
img_test = Image.new('L', (max_x,max_y), 0) |
|
|
132 |
for i, poly in enumerate(list(dict_roi.keys())) : |
|
|
133 |
polygon = dict_roi[poly] |
|
|
134 |
ImageDraw.Draw(img_test).polygon(polygon, outline=i+1, fill=i+1) |
|
|
135 |
mask = np.array(img_test) |
|
|
136 |
transform_poly = cv2.warpAffine(mask,np.transpose(new_optical2histo[:,0:2]),(img_cols,img_rows)) |
|
|
137 |
|
|
|
138 |
|
|
|
139 |
if plot : |
|
|
140 |
plt.figure() |
|
|
141 |
plt.imshow(mask) |
|
|
142 |
plt.title('drawing of all the regions of interest') |
|
|
143 |
plt.legend() |
|
|
144 |
plt.show() |
|
|
145 |
plt.figure() |
|
|
146 |
plt.imshow(transform_poly) |
|
|
147 |
plt.title('drawing of all the regions of interest resized') |
|
|
148 |
plt.legend() |
|
|
149 |
plt.show() |
|
|
150 |
|
|
|
151 |
|
|
|
152 |
if generate: |
|
|
153 |
#create the information table |
|
|
154 |
collect_points = [] |
|
|
155 |
for i in tqdm(range(nr_of_datapoints)): |
|
|
156 |
y_ms, x_ms = int(xy_positions[i,1]-1), int(xy_positions[i,0]-1) |
|
|
157 |
x_histo = x_ms*np.transpose(new_msi2Dhisto)[1,0] + y_ms*np.transpose(new_msi2Dhisto)[1,1] + np.transpose(new_msi2Dhisto)[1,2] |
|
|
158 |
y_histo = x_ms*np.transpose(new_msi2Dhisto)[0,0] + y_ms*np.transpose(new_msi2Dhisto)[0,1] + np.transpose(new_msi2Dhisto)[0,2] |
|
|
159 |
collect_points.append([x_histo, y_histo]) |
|
|
160 |
|
|
|
161 |
collect_points = np.array(collect_points) |
|
|
162 |
labels = [] |
|
|
163 |
size_image = 96 |
|
|
164 |
tile = np.float64(histo_img) |
|
|
165 |
name_rois =list( dict_roi.keys()) |
|
|
166 |
histo_img = np.float64(histo_img) |
|
|
167 |
with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file: |
|
|
168 |
header = list(np.array(mzs, dtype = 'U25')) + ['label', 'image_name', 'x', 'y'] |
|
|
169 |
header = ';'.join(header) |
|
|
170 |
file.write(header) |
|
|
171 |
file.write("\n") |
|
|
172 |
file.close() |
|
|
173 |
for i in tqdm(range(nr_of_datapoints)): |
|
|
174 |
with open(os.path.join(r'.\msi_tables','table_for_sample_' + sample_name + '.txt'), "a", encoding="utf-8") as file: |
|
|
175 |
if int(collect_points[i,1] - size_image//2) > 0 and int(collect_points[i,0] - size_image//2) > 0 and collect_points[i,1] < img_cols and collect_points[i,0] < img_rows: |
|
|
176 |
labeled_tile = transform_poly[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ] |
|
|
177 |
length, width = labeled_tile.shape |
|
|
178 |
temp_label = transform_poly[int(collect_points[i,0]), int(collect_points[i,1])] |
|
|
179 |
labels.append(temp_label) |
|
|
180 |
if temp_label > 0: |
|
|
181 |
temp_title = "tile_"+ str(int(xy_positions[i,1]-1)) + "_" + str(int(xy_positions[i,0]-1)) + ".jpg" |
|
|
182 |
if not os.path.isfile(os.path.join(path_tiles_dir,temp_title)): |
|
|
183 |
cv2.imwrite(os.path.join(path_tiles_dir,temp_title), tile[int(collect_points[i,0] - size_image//2): int(collect_points[i,0] + size_image//2) ,int(collect_points[i,1] - size_image//2): int(collect_points[i,1] + size_image//2) ]) |
|
|
184 |
row = np.concatenate((msidata[:,i], np.array([name_rois[temp_label-1], temp_title, int(xy_positions[i,1]-1), int(xy_positions[i,0]-1)])), axis = None) |
|
|
185 |
row = ';'.join(list(row)) |
|
|
186 |
file.write(row) |
|
|
187 |
file.write("\n") |
|
|
188 |
file.close() |
|
|
189 |
del histo_img |
|
|
190 |
return('sample ' + sample_name + ' done') |
|
|
191 |
|
|
|
192 |
|
|
|
193 |
main_path = '.' |
|
|
194 |
for slide in os.listdir(main_path): |
|
|
195 |
done = generator_msi_data(slide, plot = False, generate = True,main_path) |
|
|
196 |
print(done) |