Diff of /code/esmlib.py [000000] .. [8bc14c]

Switch to side-by-side view

--- a
+++ b/code/esmlib.py
@@ -0,0 +1,251 @@
+#Warnings filter
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+warnings.filterwarnings("ignore", category=RuntimeWarning) 
+
+#General library
+import numpy as np
+import matplotlib.pyplot as plt
+
+#Path
+import os, os.path
+import pickle
+import shutil
+import glob
+
+# Interactive input
+from tkinter.filedialog import askopenfilename
+from tkinter.filedialog import askdirectory
+
+#Modeling
+from sklearn.cluster import KMeans 
+from sklearn.cluster import DBSCAN 
+from sklearn.decomposition import PCA
+from sklearn import svm, datasets
+from sklearn.metrics import auc
+from sklearn.metrics import plot_roc_curve
+from sklearn.model_selection import StratifiedKFold
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import classification_report
+from sklearn.metrics import plot_confusion_matrix
+from sklearn.metrics import confusion_matrix
+
+# Scipy
+from scipy.misc import face
+from scipy import ndimage as ndi
+from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
+from skimage.segmentation import clear_border, mark_boundaries
+from skimage.filters import roberts, sobel
+from scipy.signal.signaltools import wiener
+
+#Image
+import cv2
+from PIL import Image
+import matplotlib.pyplot as plt
+
+#Medical Image
+import pydicom
+from pydicom import dcmread
+from pydicom.data import get_testdata_file
+from roipoly import RoiPoly
+import mahotas
+import mahotas.demos
+import mahotas as mh
+import numpy as np
+from pylab import imshow, show
+
+# ipywidgets for some interactive plots
+from ipywidgets.widgets import * 
+import ipywidgets as widgets
+
+# Load the model
+pickle_in = open('svc_pca_classifier.pickle',"rb")
+model = pickle.load(pickle_in)
+
+# Load the PCA object
+pickle_in = open('pca.pickle',"rb")
+pca = pickle.load(pickle_in)
+
+# Function for read and selects three scans from a folder exam
+def select_paths(path):
+    lista = glob.glob(path + "\*.dcm")
+    list_idx = []
+    n_dcm = len(lista)
+    if n_dcm > 50:
+        n_dcm_interval = int(n_dcm / 12)
+    else:
+        print("The selected folder has less than 50 images.. try to select another folder that contain more scans")
+    lista_idx = [n_dcm_interval*4, n_dcm_interval*5, n_dcm_interval*6]
+    return lista, lista_idx
+
+# This function extract and print some metadata related to the patient
+def get_metadata(folder_path):
+    lista = glob.glob(folder_path + "\\" + "*.dcm")
+    n = len(lista)
+    ds = dcmread(lista[0])
+    print("PatientID: " + str(ds.PatientID))
+    print("\tSex: " + str(ds.PatientSex))
+    print("\tAge: " + str(int(ds.PatientAge[:-1])))
+    print("\nStudy Description: " + str(ds.StudyDescription))
+    print("\tExam Modality: " + str(ds.Modality))
+    print("\tNumber of scans: " + str(len(lista)))
+    print("\tImage size " + str(ds.Rows) + "x" + str(ds.Columns))
+    print("\tPatient Position: " + str(ds.PatientPosition))
+    print("\tSlice Thickness: " + str(ds.SliceThickness))
+    print("\tSpace between slices: " + str(ds.SpacingBetweenSlices))
+    return None
+
+# This function load and process the image related to the specified path
+def load_and_preprocess_dcm(path):
+    ds = dcmread(path)
+    img = ds.pixel_array
+    img = ds.RescaleIntercept + img * ds.RescaleSlope
+    minimum = np.amin(np.amin(img))
+    maximum = np.amax(np.amax(img))
+    img[img == minimum] = -1000
+    # Normalization
+    img = (img - minimum) / maximum
+    new_img = ((img - img.min()) * (1/(img.max() - img.min()) * 255)).astype('uint8')
+    new_img = new_img[50:460,30:480]
+    return new_img
+
+# A simple function to show the three images selected in a subplot
+def select_and_show_3(lista, lista_idx):
+    fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12))
+    img1 = load_and_preprocess_dcm(lista[lista_idx[0]])
+    img2 = load_and_preprocess_dcm(lista[lista_idx[1]])
+    img3 = load_and_preprocess_dcm(lista[lista_idx[2]])
+    ax1.imshow(img1, cmap=plt.cm.gray)
+    ax1.set_title("Scansione n: " + str(lista_idx[0]))
+    ax2.imshow(img2, cmap=plt.cm.gray)
+    ax2.set_title("Scansione n: " + str(lista_idx[1]))
+    ax3.imshow(img3, cmap=plt.cm.gray)
+    ax3.set_title("Scansione n: " + str(lista_idx[2]))
+    return [img1, img2, img3]
+
+# A simple function to show the three mask in a subplot
+def show_3_mask(mask_list, idx_sel):
+    fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12))
+    ax1.imshow(mask_list[0], cmap=plt.cm.gray)
+    ax1.set_title("Maschera della scansione n: " + str(idx_sel[0]))
+    ax2.imshow(mask_list[1], cmap=plt.cm.gray)
+    ax2.set_title("Maschera della scansione n: " + str(idx_sel[1]))
+    ax3.imshow(mask_list[2], cmap=plt.cm.gray)
+    ax3.set_title("Maschera della scansione n: " + str(idx_sel[2]))
+    return None
+
+# This function allow to create a lung mask with the kmeans clustering algorithm
+def mask_kmeans(img_array):
+    img_reshaped = img_array.reshape(-1,1)
+    kmeans = KMeans(n_clusters = 2, random_state = 99).fit(img_reshaped)
+    kmeans_y = kmeans.fit_predict(img_reshaped)
+    lab = kmeans.labels_
+    mask = lab.reshape(img_array.shape)
+    # Control the mask
+    if mask[1,1] == 0:
+        mask = abs(mask-1)
+    return mask
+
+# Function to make the mask more robust
+def fill_mask(first_mask):
+    cleared = clear_border(first_mask)
+    selem = disk(1.5)
+    binary = binary_erosion(cleared, selem)
+    selem = disk(10)
+    binary = binary_closing(binary, selem)
+    edges = roberts(binary)
+    binary = ndi.binary_fill_holes(edges)
+    final_mask = binary.astype('uint8')
+    # Control the mask
+    n_0 = len(final_mask[final_mask==0])
+    n_1 = len(final_mask[final_mask==1])
+    if (n_1/(n_0 + n_1)) < 0.1 or (n_1/(n_0 + n_1)) > 0.9:
+        print("The selected scan is not good enough")
+    return final_mask
+
+# Function to manually select the lungh mask or any other ROI
+def manually_roi_select(lista_img, mask_list, n_mask):
+    fig = plt.figure()
+    plt.imshow(lista_img[int(n_mask)-1], interpolation='nearest', cmap=plt.cm.gray)
+    plt.colorbar()
+    plt.title("left click: line segment right click or double click: close region")
+    roi1 = RoiPoly(color='r', fig=fig)
+    mask = roi1.get_mask(mask_list[int(n_mask)-1])
+    mask_list[int(n_mask)-1] = mask
+    return roi1, mask
+
+# Function to show the manually selected ROI
+def show_roi_selected(roi1, mask, n_mask, lista_img):
+    fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(15,12))
+    roi1.display_roi()
+    ax1.imshow(mask, cmap=plt.cm.gray)
+    ax1.set_title("Maschera")
+    ax2.imshow(lista_img[int(n_mask)-1], cmap=plt.cm.gray)
+    ax2.set_title("Maschera sull'immagine originale")
+    return None
+
+# This function take the image and extract the Haralick Feature matrix (4x13)
+def extract_haralick(img):
+    # adding gaussian filter
+    img_test = mahotas.gaussian_filter(img, .5, cval = 100, mode = "reflect")
+    # setting threshold (threshed is the mask)
+    threshed = (img_test > np.quantile(img_test[img_test!=0],.95))
+    # making is labeled image
+    feature_map, n = mahotas.label(threshed)
+    # getting haralick features
+    h_feature = mahotas.features.haralick(feature_map, distance = 2)    
+    return feature_map, h_feature
+
+# Function for extract and show the equalized and filtered image (dst) and the Haralick map
+# for the three scans selected for a patient
+def features_and_plot(lista_img, lista_mask, printing=False):
+    
+    id_feat_list = []
+    row, col = lista_img[0].shape
+    
+    for cont in range(0,len(lista_img)):
+        # Equalized hist of image
+        dst = cv2.equalizeHist(lista_img[cont]*lista_mask[cont])
+        # Wiener filter
+        filtered_img = wiener(dst, (3, 3), noise=10)
+        # Extract Haralick Features
+        feature_map, h_feature = extract_haralick(filtered_img)
+        id_feat_list.append(h_feature)
+
+        # showing the features
+        if printing:
+            fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(12,10))
+            fig.suptitle("Scans: " + str(cont + 1), y = 0.8, fontsize=16)
+            ax1.imshow(dst, cmap=plt.cm.gray)
+            ax1.set_title("Equalized image (Masked)")
+            ax2.imshow(feature_map, cmap=plt.cm.viridis)
+            ax2.set_title("Haralick Features Map")
+            plt.tight_layout()         
+    return id_feat_list   
+
+# Function to predict the probability that the patient is positive or negative to Covid-19
+def covid_predict(feat):
+    lista_pat=[]
+    for elem in feat:
+        lista_pat.append(elem.reshape(-1))
+    pat_array = np.vstack(lista_pat)
+    pat_array = np.reshape(lista_pat, (156))
+    pat_array = pat_array.reshape(1,-1)
+    input_model = pca.transform(pat_array)
+    idx_predicted = model.predict(input_model)
+    proba = model.predict_proba(input_model)[0,idx_predicted]
+    proba = np.around(proba,4) * 100
+    if idx_predicted == 0:
+        print("The patient is negative to Covid-19 with a confidence of " + str(proba[0]) + "%")
+    else:
+        print("The patient is positive to Covid-19 with a confidence of " + str(proba[0]) + "%")
+    return None
+
+
+