|
a |
|
b/code/esmlib.py |
|
|
1 |
#Warnings filter |
|
|
2 |
import warnings |
|
|
3 |
warnings.filterwarnings("ignore", category=DeprecationWarning) |
|
|
4 |
warnings.filterwarnings("ignore", category=RuntimeWarning) |
|
|
5 |
|
|
|
6 |
#General library |
|
|
7 |
import numpy as np |
|
|
8 |
import matplotlib.pyplot as plt |
|
|
9 |
|
|
|
10 |
#Path |
|
|
11 |
import os, os.path |
|
|
12 |
import pickle |
|
|
13 |
import shutil |
|
|
14 |
import glob |
|
|
15 |
|
|
|
16 |
# Interactive input |
|
|
17 |
from tkinter.filedialog import askopenfilename |
|
|
18 |
from tkinter.filedialog import askdirectory |
|
|
19 |
|
|
|
20 |
#Modeling |
|
|
21 |
from sklearn.cluster import KMeans |
|
|
22 |
from sklearn.cluster import DBSCAN |
|
|
23 |
from sklearn.decomposition import PCA |
|
|
24 |
from sklearn import svm, datasets |
|
|
25 |
from sklearn.metrics import auc |
|
|
26 |
from sklearn.metrics import plot_roc_curve |
|
|
27 |
from sklearn.model_selection import StratifiedKFold |
|
|
28 |
from sklearn.linear_model import LogisticRegression |
|
|
29 |
from sklearn.tree import DecisionTreeClassifier |
|
|
30 |
from sklearn.svm import SVC |
|
|
31 |
from sklearn.ensemble import RandomForestClassifier |
|
|
32 |
from sklearn.naive_bayes import GaussianNB |
|
|
33 |
from sklearn.model_selection import train_test_split |
|
|
34 |
from sklearn.metrics import accuracy_score |
|
|
35 |
from sklearn.metrics import classification_report |
|
|
36 |
from sklearn.metrics import plot_confusion_matrix |
|
|
37 |
from sklearn.metrics import confusion_matrix |
|
|
38 |
|
|
|
39 |
# Scipy |
|
|
40 |
from scipy.misc import face |
|
|
41 |
from scipy import ndimage as ndi |
|
|
42 |
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing |
|
|
43 |
from skimage.segmentation import clear_border, mark_boundaries |
|
|
44 |
from skimage.filters import roberts, sobel |
|
|
45 |
from scipy.signal.signaltools import wiener |
|
|
46 |
|
|
|
47 |
#Image |
|
|
48 |
import cv2 |
|
|
49 |
from PIL import Image |
|
|
50 |
import matplotlib.pyplot as plt |
|
|
51 |
|
|
|
52 |
#Medical Image |
|
|
53 |
import pydicom |
|
|
54 |
from pydicom import dcmread |
|
|
55 |
from pydicom.data import get_testdata_file |
|
|
56 |
from roipoly import RoiPoly |
|
|
57 |
import mahotas |
|
|
58 |
import mahotas.demos |
|
|
59 |
import mahotas as mh |
|
|
60 |
import numpy as np |
|
|
61 |
from pylab import imshow, show |
|
|
62 |
|
|
|
63 |
# ipywidgets for some interactive plots |
|
|
64 |
from ipywidgets.widgets import * |
|
|
65 |
import ipywidgets as widgets |
|
|
66 |
|
|
|
67 |
# Load the model |
|
|
68 |
pickle_in = open('svc_pca_classifier.pickle',"rb") |
|
|
69 |
model = pickle.load(pickle_in) |
|
|
70 |
|
|
|
71 |
# Load the PCA object |
|
|
72 |
pickle_in = open('pca.pickle',"rb") |
|
|
73 |
pca = pickle.load(pickle_in) |
|
|
74 |
|
|
|
75 |
# Function for read and selects three scans from a folder exam |
|
|
76 |
def select_paths(path): |
|
|
77 |
lista = glob.glob(path + "\*.dcm") |
|
|
78 |
list_idx = [] |
|
|
79 |
n_dcm = len(lista) |
|
|
80 |
if n_dcm > 50: |
|
|
81 |
n_dcm_interval = int(n_dcm / 12) |
|
|
82 |
else: |
|
|
83 |
print("The selected folder has less than 50 images.. try to select another folder that contain more scans") |
|
|
84 |
lista_idx = [n_dcm_interval*4, n_dcm_interval*5, n_dcm_interval*6] |
|
|
85 |
return lista, lista_idx |
|
|
86 |
|
|
|
87 |
# This function extract and print some metadata related to the patient |
|
|
88 |
def get_metadata(folder_path): |
|
|
89 |
lista = glob.glob(folder_path + "\\" + "*.dcm") |
|
|
90 |
n = len(lista) |
|
|
91 |
ds = dcmread(lista[0]) |
|
|
92 |
print("PatientID: " + str(ds.PatientID)) |
|
|
93 |
print("\tSex: " + str(ds.PatientSex)) |
|
|
94 |
print("\tAge: " + str(int(ds.PatientAge[:-1]))) |
|
|
95 |
print("\nStudy Description: " + str(ds.StudyDescription)) |
|
|
96 |
print("\tExam Modality: " + str(ds.Modality)) |
|
|
97 |
print("\tNumber of scans: " + str(len(lista))) |
|
|
98 |
print("\tImage size " + str(ds.Rows) + "x" + str(ds.Columns)) |
|
|
99 |
print("\tPatient Position: " + str(ds.PatientPosition)) |
|
|
100 |
print("\tSlice Thickness: " + str(ds.SliceThickness)) |
|
|
101 |
print("\tSpace between slices: " + str(ds.SpacingBetweenSlices)) |
|
|
102 |
return None |
|
|
103 |
|
|
|
104 |
# This function load and process the image related to the specified path |
|
|
105 |
def load_and_preprocess_dcm(path): |
|
|
106 |
ds = dcmread(path) |
|
|
107 |
img = ds.pixel_array |
|
|
108 |
img = ds.RescaleIntercept + img * ds.RescaleSlope |
|
|
109 |
minimum = np.amin(np.amin(img)) |
|
|
110 |
maximum = np.amax(np.amax(img)) |
|
|
111 |
img[img == minimum] = -1000 |
|
|
112 |
# Normalization |
|
|
113 |
img = (img - minimum) / maximum |
|
|
114 |
new_img = ((img - img.min()) * (1/(img.max() - img.min()) * 255)).astype('uint8') |
|
|
115 |
new_img = new_img[50:460,30:480] |
|
|
116 |
return new_img |
|
|
117 |
|
|
|
118 |
# A simple function to show the three images selected in a subplot |
|
|
119 |
def select_and_show_3(lista, lista_idx): |
|
|
120 |
fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12)) |
|
|
121 |
img1 = load_and_preprocess_dcm(lista[lista_idx[0]]) |
|
|
122 |
img2 = load_and_preprocess_dcm(lista[lista_idx[1]]) |
|
|
123 |
img3 = load_and_preprocess_dcm(lista[lista_idx[2]]) |
|
|
124 |
ax1.imshow(img1, cmap=plt.cm.gray) |
|
|
125 |
ax1.set_title("Scansione n: " + str(lista_idx[0])) |
|
|
126 |
ax2.imshow(img2, cmap=plt.cm.gray) |
|
|
127 |
ax2.set_title("Scansione n: " + str(lista_idx[1])) |
|
|
128 |
ax3.imshow(img3, cmap=plt.cm.gray) |
|
|
129 |
ax3.set_title("Scansione n: " + str(lista_idx[2])) |
|
|
130 |
return [img1, img2, img3] |
|
|
131 |
|
|
|
132 |
# A simple function to show the three mask in a subplot |
|
|
133 |
def show_3_mask(mask_list, idx_sel): |
|
|
134 |
fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12)) |
|
|
135 |
ax1.imshow(mask_list[0], cmap=plt.cm.gray) |
|
|
136 |
ax1.set_title("Maschera della scansione n: " + str(idx_sel[0])) |
|
|
137 |
ax2.imshow(mask_list[1], cmap=plt.cm.gray) |
|
|
138 |
ax2.set_title("Maschera della scansione n: " + str(idx_sel[1])) |
|
|
139 |
ax3.imshow(mask_list[2], cmap=plt.cm.gray) |
|
|
140 |
ax3.set_title("Maschera della scansione n: " + str(idx_sel[2])) |
|
|
141 |
return None |
|
|
142 |
|
|
|
143 |
# This function allow to create a lung mask with the kmeans clustering algorithm |
|
|
144 |
def mask_kmeans(img_array): |
|
|
145 |
img_reshaped = img_array.reshape(-1,1) |
|
|
146 |
kmeans = KMeans(n_clusters = 2, random_state = 99).fit(img_reshaped) |
|
|
147 |
kmeans_y = kmeans.fit_predict(img_reshaped) |
|
|
148 |
lab = kmeans.labels_ |
|
|
149 |
mask = lab.reshape(img_array.shape) |
|
|
150 |
# Control the mask |
|
|
151 |
if mask[1,1] == 0: |
|
|
152 |
mask = abs(mask-1) |
|
|
153 |
return mask |
|
|
154 |
|
|
|
155 |
# Function to make the mask more robust |
|
|
156 |
def fill_mask(first_mask): |
|
|
157 |
cleared = clear_border(first_mask) |
|
|
158 |
selem = disk(1.5) |
|
|
159 |
binary = binary_erosion(cleared, selem) |
|
|
160 |
selem = disk(10) |
|
|
161 |
binary = binary_closing(binary, selem) |
|
|
162 |
edges = roberts(binary) |
|
|
163 |
binary = ndi.binary_fill_holes(edges) |
|
|
164 |
final_mask = binary.astype('uint8') |
|
|
165 |
# Control the mask |
|
|
166 |
n_0 = len(final_mask[final_mask==0]) |
|
|
167 |
n_1 = len(final_mask[final_mask==1]) |
|
|
168 |
if (n_1/(n_0 + n_1)) < 0.1 or (n_1/(n_0 + n_1)) > 0.9: |
|
|
169 |
print("The selected scan is not good enough") |
|
|
170 |
return final_mask |
|
|
171 |
|
|
|
172 |
# Function to manually select the lungh mask or any other ROI |
|
|
173 |
def manually_roi_select(lista_img, mask_list, n_mask): |
|
|
174 |
fig = plt.figure() |
|
|
175 |
plt.imshow(lista_img[int(n_mask)-1], interpolation='nearest', cmap=plt.cm.gray) |
|
|
176 |
plt.colorbar() |
|
|
177 |
plt.title("left click: line segment right click or double click: close region") |
|
|
178 |
roi1 = RoiPoly(color='r', fig=fig) |
|
|
179 |
mask = roi1.get_mask(mask_list[int(n_mask)-1]) |
|
|
180 |
mask_list[int(n_mask)-1] = mask |
|
|
181 |
return roi1, mask |
|
|
182 |
|
|
|
183 |
# Function to show the manually selected ROI |
|
|
184 |
def show_roi_selected(roi1, mask, n_mask, lista_img): |
|
|
185 |
fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(15,12)) |
|
|
186 |
roi1.display_roi() |
|
|
187 |
ax1.imshow(mask, cmap=plt.cm.gray) |
|
|
188 |
ax1.set_title("Maschera") |
|
|
189 |
ax2.imshow(lista_img[int(n_mask)-1], cmap=plt.cm.gray) |
|
|
190 |
ax2.set_title("Maschera sull'immagine originale") |
|
|
191 |
return None |
|
|
192 |
|
|
|
193 |
# This function take the image and extract the Haralick Feature matrix (4x13) |
|
|
194 |
def extract_haralick(img): |
|
|
195 |
# adding gaussian filter |
|
|
196 |
img_test = mahotas.gaussian_filter(img, .5, cval = 100, mode = "reflect") |
|
|
197 |
# setting threshold (threshed is the mask) |
|
|
198 |
threshed = (img_test > np.quantile(img_test[img_test!=0],.95)) |
|
|
199 |
# making is labeled image |
|
|
200 |
feature_map, n = mahotas.label(threshed) |
|
|
201 |
# getting haralick features |
|
|
202 |
h_feature = mahotas.features.haralick(feature_map, distance = 2) |
|
|
203 |
return feature_map, h_feature |
|
|
204 |
|
|
|
205 |
# Function for extract and show the equalized and filtered image (dst) and the Haralick map |
|
|
206 |
# for the three scans selected for a patient |
|
|
207 |
def features_and_plot(lista_img, lista_mask, printing=False): |
|
|
208 |
|
|
|
209 |
id_feat_list = [] |
|
|
210 |
row, col = lista_img[0].shape |
|
|
211 |
|
|
|
212 |
for cont in range(0,len(lista_img)): |
|
|
213 |
# Equalized hist of image |
|
|
214 |
dst = cv2.equalizeHist(lista_img[cont]*lista_mask[cont]) |
|
|
215 |
# Wiener filter |
|
|
216 |
filtered_img = wiener(dst, (3, 3), noise=10) |
|
|
217 |
# Extract Haralick Features |
|
|
218 |
feature_map, h_feature = extract_haralick(filtered_img) |
|
|
219 |
id_feat_list.append(h_feature) |
|
|
220 |
|
|
|
221 |
# showing the features |
|
|
222 |
if printing: |
|
|
223 |
fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(12,10)) |
|
|
224 |
fig.suptitle("Scans: " + str(cont + 1), y = 0.8, fontsize=16) |
|
|
225 |
ax1.imshow(dst, cmap=plt.cm.gray) |
|
|
226 |
ax1.set_title("Equalized image (Masked)") |
|
|
227 |
ax2.imshow(feature_map, cmap=plt.cm.viridis) |
|
|
228 |
ax2.set_title("Haralick Features Map") |
|
|
229 |
plt.tight_layout() |
|
|
230 |
return id_feat_list |
|
|
231 |
|
|
|
232 |
# Function to predict the probability that the patient is positive or negative to Covid-19 |
|
|
233 |
def covid_predict(feat): |
|
|
234 |
lista_pat=[] |
|
|
235 |
for elem in feat: |
|
|
236 |
lista_pat.append(elem.reshape(-1)) |
|
|
237 |
pat_array = np.vstack(lista_pat) |
|
|
238 |
pat_array = np.reshape(lista_pat, (156)) |
|
|
239 |
pat_array = pat_array.reshape(1,-1) |
|
|
240 |
input_model = pca.transform(pat_array) |
|
|
241 |
idx_predicted = model.predict(input_model) |
|
|
242 |
proba = model.predict_proba(input_model)[0,idx_predicted] |
|
|
243 |
proba = np.around(proba,4) * 100 |
|
|
244 |
if idx_predicted == 0: |
|
|
245 |
print("The patient is negative to Covid-19 with a confidence of " + str(proba[0]) + "%") |
|
|
246 |
else: |
|
|
247 |
print("The patient is positive to Covid-19 with a confidence of " + str(proba[0]) + "%") |
|
|
248 |
return None |
|
|
249 |
|
|
|
250 |
|
|
|
251 |
|