Covid-Prediction-Lung-CT / Git / [8bc14c] /code/esmlib.py

Models:
RichardZick/
Covid-Prediction-Lung-CT
Downloads: 1
[8bc14c]: / code / esmlib.py
History
Download this file
252 lines (223 with data), 9.5 kB

#Warnings filter
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=RuntimeWarning) 

#General library
import numpy as np
import matplotlib.pyplot as plt

#Path
import os, os.path
import pickle
import shutil
import glob

# Interactive input
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory

#Modeling
from sklearn.cluster import KMeans 
from sklearn.cluster import DBSCAN 
from sklearn.decomposition import PCA
from sklearn import svm, datasets
from sklearn.metrics import auc
from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

# Scipy
from scipy.misc import face
from scipy import ndimage as ndi
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
from skimage.segmentation import clear_border, mark_boundaries
from skimage.filters import roberts, sobel
from scipy.signal.signaltools import wiener

#Image
import cv2
from PIL import Image
import matplotlib.pyplot as plt

#Medical Image
import pydicom
from pydicom import dcmread
from pydicom.data import get_testdata_file
from roipoly import RoiPoly
import mahotas
import mahotas.demos
import mahotas as mh
import numpy as np
from pylab import imshow, show

# ipywidgets for some interactive plots
from ipywidgets.widgets import * 
import ipywidgets as widgets

# Load the model
pickle_in = open('svc_pca_classifier.pickle',"rb")
model = pickle.load(pickle_in)

# Load the PCA object
pickle_in = open('pca.pickle',"rb")
pca = pickle.load(pickle_in)

# Function for read and selects three scans from a folder exam
def select_paths(path):
    lista = glob.glob(path + "\*.dcm")
    list_idx = []
    n_dcm = len(lista)
    if n_dcm > 50:
        n_dcm_interval = int(n_dcm / 12)
    else:
        print("The selected folder has less than 50 images.. try to select another folder that contain more scans")
    lista_idx = [n_dcm_interval*4, n_dcm_interval*5, n_dcm_interval*6]
    return lista, lista_idx

# This function extract and print some metadata related to the patient
def get_metadata(folder_path):
    lista = glob.glob(folder_path + "\\" + "*.dcm")
    n = len(lista)
    ds = dcmread(lista[0])
    print("PatientID: " + str(ds.PatientID))
    print("\tSex: " + str(ds.PatientSex))
    print("\tAge: " + str(int(ds.PatientAge[:-1])))
    print("\nStudy Description: " + str(ds.StudyDescription))
    print("\tExam Modality: " + str(ds.Modality))
    print("\tNumber of scans: " + str(len(lista)))
    print("\tImage size " + str(ds.Rows) + "x" + str(ds.Columns))
    print("\tPatient Position: " + str(ds.PatientPosition))
    print("\tSlice Thickness: " + str(ds.SliceThickness))
    print("\tSpace between slices: " + str(ds.SpacingBetweenSlices))
    return None

# This function load and process the image related to the specified path
def load_and_preprocess_dcm(path):
    ds = dcmread(path)
    img = ds.pixel_array
    img = ds.RescaleIntercept + img * ds.RescaleSlope
    minimum = np.amin(np.amin(img))
    maximum = np.amax(np.amax(img))
    img[img == minimum] = -1000
    # Normalization
    img = (img - minimum) / maximum
    new_img = ((img - img.min()) * (1/(img.max() - img.min()) * 255)).astype('uint8')
    new_img = new_img[50:460,30:480]
    return new_img

# A simple function to show the three images selected in a subplot
def select_and_show_3(lista, lista_idx):
    fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12))
    img1 = load_and_preprocess_dcm(lista[lista_idx[0]])
    img2 = load_and_preprocess_dcm(lista[lista_idx[1]])
    img3 = load_and_preprocess_dcm(lista[lista_idx[2]])
    ax1.imshow(img1, cmap=plt.cm.gray)
    ax1.set_title("Scansione n: " + str(lista_idx[0]))
    ax2.imshow(img2, cmap=plt.cm.gray)
    ax2.set_title("Scansione n: " + str(lista_idx[1]))
    ax3.imshow(img3, cmap=plt.cm.gray)
    ax3.set_title("Scansione n: " + str(lista_idx[2]))
    return [img1, img2, img3]

# A simple function to show the three mask in a subplot
def show_3_mask(mask_list, idx_sel):
    fig, ([ax1, ax2, ax3]) = plt.subplots(1,3, figsize=(15,12))
    ax1.imshow(mask_list[0], cmap=plt.cm.gray)
    ax1.set_title("Maschera della scansione n: " + str(idx_sel[0]))
    ax2.imshow(mask_list[1], cmap=plt.cm.gray)
    ax2.set_title("Maschera della scansione n: " + str(idx_sel[1]))
    ax3.imshow(mask_list[2], cmap=plt.cm.gray)
    ax3.set_title("Maschera della scansione n: " + str(idx_sel[2]))
    return None

# This function allow to create a lung mask with the kmeans clustering algorithm
def mask_kmeans(img_array):
    img_reshaped = img_array.reshape(-1,1)
    kmeans = KMeans(n_clusters = 2, random_state = 99).fit(img_reshaped)
    kmeans_y = kmeans.fit_predict(img_reshaped)
    lab = kmeans.labels_
    mask = lab.reshape(img_array.shape)
    # Control the mask
    if mask[1,1] == 0:
        mask = abs(mask-1)
    return mask

# Function to make the mask more robust
def fill_mask(first_mask):
    cleared = clear_border(first_mask)
    selem = disk(1.5)
    binary = binary_erosion(cleared, selem)
    selem = disk(10)
    binary = binary_closing(binary, selem)
    edges = roberts(binary)
    binary = ndi.binary_fill_holes(edges)
    final_mask = binary.astype('uint8')
    # Control the mask
    n_0 = len(final_mask[final_mask==0])
    n_1 = len(final_mask[final_mask==1])
    if (n_1/(n_0 + n_1)) < 0.1 or (n_1/(n_0 + n_1)) > 0.9:
        print("The selected scan is not good enough")
    return final_mask

# Function to manually select the lungh mask or any other ROI
def manually_roi_select(lista_img, mask_list, n_mask):
    fig = plt.figure()
    plt.imshow(lista_img[int(n_mask)-1], interpolation='nearest', cmap=plt.cm.gray)
    plt.colorbar()
    plt.title("left click: line segment right click or double click: close region")
    roi1 = RoiPoly(color='r', fig=fig)
    mask = roi1.get_mask(mask_list[int(n_mask)-1])
    mask_list[int(n_mask)-1] = mask
    return roi1, mask

# Function to show the manually selected ROI
def show_roi_selected(roi1, mask, n_mask, lista_img):
    fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(15,12))
    roi1.display_roi()
    ax1.imshow(mask, cmap=plt.cm.gray)
    ax1.set_title("Maschera")
    ax2.imshow(lista_img[int(n_mask)-1], cmap=plt.cm.gray)
    ax2.set_title("Maschera sull'immagine originale")
    return None

# This function take the image and extract the Haralick Feature matrix (4x13)
def extract_haralick(img):
    # adding gaussian filter
    img_test = mahotas.gaussian_filter(img, .5, cval = 100, mode = "reflect")
    # setting threshold (threshed is the mask)
    threshed = (img_test > np.quantile(img_test[img_test!=0],.95))
    # making is labeled image
    feature_map, n = mahotas.label(threshed)
    # getting haralick features
    h_feature = mahotas.features.haralick(feature_map, distance = 2)    
    return feature_map, h_feature

# Function for extract and show the equalized and filtered image (dst) and the Haralick map
# for the three scans selected for a patient
def features_and_plot(lista_img, lista_mask, printing=False):
    
    id_feat_list = []
    row, col = lista_img[0].shape
    
    for cont in range(0,len(lista_img)):
        # Equalized hist of image
        dst = cv2.equalizeHist(lista_img[cont]*lista_mask[cont])
        # Wiener filter
        filtered_img = wiener(dst, (3, 3), noise=10)
        # Extract Haralick Features
        feature_map, h_feature = extract_haralick(filtered_img)
        id_feat_list.append(h_feature)

        # showing the features
        if printing:
            fig, ([ax1, ax2]) = plt.subplots(1,2, figsize=(12,10))
            fig.suptitle("Scans: " + str(cont + 1), y = 0.8, fontsize=16)
            ax1.imshow(dst, cmap=plt.cm.gray)
            ax1.set_title("Equalized image (Masked)")
            ax2.imshow(feature_map, cmap=plt.cm.viridis)
            ax2.set_title("Haralick Features Map")
            plt.tight_layout()         
    return id_feat_list   

# Function to predict the probability that the patient is positive or negative to Covid-19
def covid_predict(feat):
    lista_pat=[]
    for elem in feat:
        lista_pat.append(elem.reshape(-1))
    pat_array = np.vstack(lista_pat)
    pat_array = np.reshape(lista_pat, (156))
    pat_array = pat_array.reshape(1,-1)
    input_model = pca.transform(pat_array)
    idx_predicted = model.predict(input_model)
    proba = model.predict_proba(input_model)[0,idx_predicted]
    proba = np.around(proba,4) * 100
    if idx_predicted == 0:
        print("The patient is negative to Covid-19 with a confidence of " + str(proba[0]) + "%")
    else:
        print("The patient is positive to Covid-19 with a confidence of " + str(proba[0]) + "%")
    return None