from __future__ import print_function, division
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import keras
import matplotlib.pyplot as plt
import pydicom as dicom
import shutil
import cv2
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
import os
import psutil
import sys
import tkinter as tk
import tkinter.font as tkFont
import random
from tkinter import ttk
import GUI
from statistics import mean
# un-comment to show all of pandas dataframe
#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)
# un-comment to show all of numpy array
#np.set_printoptions(threshold=sys.maxsize)
useDefaults = GUI.useDefaults
if useDefaults:
# if true, main GUI will be used to specify other variables
useFront = False
else:
useFront = True
if useFront == False:
# SPECIFY VARIABLES HERE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
save_fit = False
load_fit = False
model_save_loc = "D:\Cancer_Project\Team8_Cancer_ML\HNSCC-HN1\saved_model (CNN)"
main_data = "D:\Cancer_Project\Team8_Cancer_ML\HNSCC\Patient and Treatment Characteristics.csv"
sec_data = ""
test_file = "test_2.csv"
# list with strings or a single string may be inputted
target_variables = "Received Concurrent Chemoradiotherapy?"
# if true, converted images will be in png format instead of jpg
png = False
# folder containing Cancer Imagery
load_dir = "D:\Cancer_Project\\Cancer Imagery\\HNSCC"
# directory to save data such as converted images
save_dir = "D:\\Cancer_Project\\converted_img"
# directory to save imagery array
img_array_save = "D:\Cancer_Project\converted_img"
# if true, numpy image array will be searched for in img_array_save
load_numpy_img = True
# if true, attempt will be made to convert dicom files to jpg,png,or directly to npy
convert_imgs = False
#if true, converted dicom images will be deleted after use
del_converted_imgs = False
# if true, image model will be ran instead of clinical only model
run_img_model = True
# if true, two data files will be expected for input
two_datasets = False
# if true, an additional file will be expected for testing
use_additional_test_file = False
# where image id is located in image names (start,end)
# only applies if using image model
img_id_name_loc = (3,6)
# Column of IDs in dataset. Acceptable values include "index" or a column name.
ID_dataset_col = "TCIA ID"
# tuple with dimension of imagery. All images must equal this dimension
img_dimensions = (512, 512)
# if true, every column in data will be inputted for target variable
target_all = False
# save location for data/graphs
data_save_loc = "D:\\Cancer_Project\\Team8_Cancer_ML\\result_graphs"
# if true, graphs will be shown after training model
show_figs = True
# if true, graphs will be saved after training model
save_figs = True
# if true, convert dicom directly to numpy. Otherwise, convert to jpg or png first in save_dir
dcmDirect = True
# number of epochs in model
num_epochs = 10
# if true, CNN will be used
useCNN = True
# END VARIABLES - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
elif useFront == True:
boolList = GUI.boolList
# convert every element in boolList to a proper boolean
[bool(b) for b in boolList]
dictTxt = dict(zip(GUI.varList_txt,GUI.txtEntry_list))
dictBool = dict(zip(GUI.varList_bool,boolList))
save_fit = dictBool["save_fit "]
model_save_loc = dictTxt["model_save_loc "]
main_data = dictTxt["main_data "]
sec_data = dictTxt["sec_data "]
test_file = dictTxt["test_file "]
# list with strings or a single string may be inputted
# check if string is list. Find returns -1 if value cannot be found
if dictTxt["target_variables "].find("[") != -1 and dictTxt["target_variables "].find(",") != -1:
target_variables = list(dictTxt["target_variables "][1:-1].split(","))
# remove excess quotes
target_variables = ([v.strip("'") for v in target_variables])
target_variables = ([v.replace("'",'') for v in target_variables])
else:
target_variables = dictTxt["target_variables "]
# if true, converted images will be in png format instead of jpg
png = dictBool["png "]
# folder containing Cancer Imagery
load_dir = dictTxt["load_dir "]
# directory to save data such as converted images
save_dir = dictTxt["save_dir "]
# directory to save imagery array
img_array_save = dictTxt["img_array_save "]
# if true, numpy image array will be searched for in img_array_save
load_numpy_img = dictBool["load_numpy_img "]
# if true, attempt will be made to convert dicom files to jpg or png
convert_imgs = dictBool["convert_imgs "]
#if true, converted dicom images will be deleted after use
del_converted_imgs = dictBool["del_converted_imgs "]
# if true, image model will be ran instead of clinical only model
run_img_model = dictBool["run_img_model "]
# if true, two data files will be expected for input
two_datasets = dictBool["two_datasets "]
# if true, an additional file will be expected for testing
use_additional_test_file = dictBool["use_additional_test_file "]
# where image id is located in image names (start,end)
# only applies if using image model
img_id_name_loc = dictTxt["img_id_name_loc "]
# Column of IDs in dataset. Acceptable values include "index" or a column name.
ID_dataset_col = dictTxt["ID_dataset_col "]
# tuple with dimension of imagery. All images must equal this dimension
img_dimensions = dictTxt["img_dimensions "]
# if true, every column in data will be inputted for target variable
target_all = dictBool["target_all "]
# save location for data/graphs
data_save_loc = dictTxt["data_save_loc "]
# if true, graphs will be shown after training model
show_figs = dictBool["show_figs "]
# if true, graphs will be saved after training model
save_figs = dictBool["save_figs "]
# if true, convert dicom to standard format before put into numpy
dcmDirect = dictBool["dcmDirect"]
# number of epochs in model
num_epochs = int(dictTxt["num_epochs "])
mainPath = main_data
def cleanData(pd_dataset):
df = pd_dataset.dropna()
return df
codeDict = {}
def encodeText(dataset):
global codeDict
if str(type(dataset)) == "<class 'str'>":
dataset = pd.read_csv(dataset,low_memory=False)
dataset = cleanData(dataset)
dShape = dataset.shape
axis1 = dShape[0]
axis2 = dShape[1]
if axis1 >= axis2:
longestAxis = axis1
shortestAxis = axis2
else:
longestAxis = axis2
shortestAxis = axis1
for i in range(longestAxis):
for n in range(shortestAxis):
if longestAxis == axis1:
data = dataset.iloc[i,n]
else:
data = dataset.iloc[n,i]
if str(type(data)) == "<class 'str'>":
strData = ""
for c in data:
cInt = ord(c)
cLen = len(str(cInt))
strData = strData + str(cInt)
strData = int(strData)
# turn values into decimals to scale down
lenData = len(str(strData))
divisor = 10**lenData
strData = strData/divisor
codeDict[data] = strData
if longestAxis == axis1:
dataset.iloc[i,n] = strData
else:
dataset.iloc[n,i] = strData
for cols in list(dataset.columns):
colType = str(dataset[cols].dtype)
if colType == "object":
dataset[cols] = dataset[cols].astype(float)
return dataset
main_data = encodeText(main_data)
col = None
# function for determining if target variable(s) are binary val
# returns bool if single var
# returns list of bools in corresponding order to target variables list if multiple vars
def isBinary(target_var):
global col
orgPD = pd.read_csv(mainPath)
orgPD = orgPD.dropna()
# check if param is a list of multiple vars
if str(type(target_var)) == "<class 'list'>" and len(target_var) > 1:
for vars in target_var:
# initialize list to hold bools
areBinary = []
col = list(orgPD[vars])
# remove duplicates
col = list(set(col))
# check if data is numerical
for vals in col:
if str(type(vals)) == "<class 'int'>" or str(type(vals)) == "<class 'float'>":
numeric = True
else:
numeric = False
if not numeric:
if len(col) == 2:
isBinary = True
else:
isBinary = False
areBinary.append(isBinary)
else:
areBinary = False
isBinary = areBinary
else:
col = list(orgPD[target_var])
# remove duplicates
col = list(set(col))
# check if original data is numerical
for vals in col:
if str(type(vals)) == "<class 'int'>" or str(type(vals)) == "<class 'float'>":
numeric = True
else:
numeric = False
if not numeric:
if len(col) == 2:
isBinary = True
else:
isBinary = False
else:
isBinary = False
return isBinary
isBinary = isBinary(target_variables)
# make dictionary with definitions for only target var
convCol = main_data.loc[:,target_variables]
if str(type(target_variables)) == "<class 'list'>" and len(target_variables) > 1:
valList = []
for cols in convCol:
for vals in list(cols):
valList.append(vals)
valList = list(set(valList))
smNum = min(valList)
lgNum = max(valList)
valList[valList.index(smNum)] = 0
valList[valList.index(lgNum)] = 1
orgPD = pd.read_csv(mainPath)
orgPD = orgPD.dropna()
orgList = []
for cols in orgPD.loc[:,target_variables]:
for vals in list(cols):
orgList.append(vals)
orgList = list(set(orgList))
targetDict = dict(zip(valList,orgList))
else:
valList = []
for vals in list(convCol):
valList.append(vals)
valList = list(set(valList))
smNum = min(valList)
lgNum = max(valList)
valList[valList.index(smNum)] = 0
valList[valList.index(lgNum)] = 1
orgPD = pd.read_csv(mainPath)
orgPD = orgPD.dropna()
orgList = []
for vals in orgPD.loc[:,target_variables]:
orgList.append(vals)
orgList = list(set(orgList))
targetDict = dict(zip(valList,orgList))
# function to decode post-training vals into text
# only use with binary values
# function rounds vals to convert
def decode(iterable,codeDict):
if str(type(iterable)) == "<class 'list'>":
iterable = np.array(iterable)
initialShape = iterable.shape
iterable = iterable.flatten()
iterable = np.around(iterable,decimals=0)
dictKeys = list(codeDict.keys())
dictVals = list(codeDict.values())
# determine type of vals
# initialize text bool as false
textKeys = False
for keys in dictKeys:
if str(type(keys)) == "<class 'str'>":
textKeys = True
if not textKeys:
i = 0
for keys in dictKeys:
keys = round(keys,0)
dictKeys[i] = keys
i = i + 1
else:
i = 0
for vals in dictVals:
try:
vals = round(vals,0)
dictVals[i] = vals
except:
i = i + 1
roundedDict = dict(zip(dictKeys,dictVals))
def target_dict():
colData = main_data.loc[:,target_variables]
try:
for cols in list(colData.columns):
col = colData[cols].tolist()
col = list(set(col))
except:
col = colData.tolist()
col = list(set(col))
if isBinary:
target_dict()
convIt = []
for vals in iterable:
tran = roundedDict[vals]
convIt.append(tran)
convIt = np.array(convIt)
# make array back into initial shape
convIt = np.reshape(convIt,initialShape)
return convIt
# function that returns percentage accuracy from rounded values
def percentageAccuracy(iterable1,iterable2):
def roundList(iterable):
if str(type(iterable)) == "<class 'tensorflow.python.framework.ops.EagerTensor'>":
iterable = iterable.numpy()
roundVals = []
if int(iterable.ndim) == 1:
for i in iterable:
i = round(i,0)
roundVals.append(i)
elif int(iterable.ndim) == 2:
for arr in iterable:
for i in arr:
i = round(i,0)
roundVals.append(i)
elif int(iterable.ndim) == 3:
for dim in iterable:
for arr in dim:
for i in arr:
i = round(i,0)
roundVals.append(i)
elif int(iterable.ndim) == 4:
for d in iterable:
for dim in d:
for arr in dim:
for i in arr:
i = round(i,0)
roundVals.append(i)
else:
print("Too many dimensions--ERROR")
return roundVals
rounded1 = roundList(iterable1)
rounded2 = roundList(iterable2)
# remove negative zeros from lists
i = 0
for vals in rounded1:
if int(vals) == -0 or int(vals) == 0:
vals = abs(vals)
rounded1[i] = vals
i = i + 1
i = 0
for vals in rounded2:
if int(vals) == -0 or int(vals) == 0:
vals = abs(vals)
rounded2[i] = vals
i = i + 1
numCorrect = len([i for i, j in zip(rounded1, rounded2) if i == j])
listLen = len(rounded1)
percentCorr = numCorrect/listLen
percentCorr = percentCorr * 100
percentCorr = round(percentCorr,2)
return percentCorr
def GUI_varConnector(dataset1, dataset2):
if str(type(dataset1)) == "<class 'str'>":
dataset1 = pd.read_csv(dataset1)
if str(type(dataset2)) == "<class 'str'>":
dataset2 = pd.read_csv(dataset2)
vars1 = list(dataset1.columns)
vars2 = list(dataset2.columns)
vars1.remove(ID_dataset_col)
vars2.remove(ID_dataset_col)
for element in target_variables:
if element in vars1:
vars1.remove(element)
if element in vars2:
vars2.remove(element)
# list of colors for buttons to choose from
colors = ["red", "blue", "purple", "orange", "green", "gray",
"gainsboro", "dark salmon", "LemonChiffon2", "ivory3",
"SteelBlue1", "DarkOliveGreen3", "gold2", "plum1"]
window = tk.Tk()
window.title("Variable Connector")
window.iconbitmap("D:\Cancer_Project\Team8_Cancer_ML\cancer_icon.ico")
main_frame = tk.Frame(window)
main_frame.pack(fill=tk.BOTH,expand=1)
canvas = tk.Canvas(main_frame)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=1)
# Add a scrollbars to the canvas
scrollbar = ttk.Scrollbar(main_frame, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
scrollbar_x = ttk.Scrollbar(main_frame,orient=tk.HORIZONTAL, command=canvas.xview)
scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
# Configure the canvas
canvas.configure(xscrollcommand=scrollbar_x.set)
canvas.configure(yscrollcommand=scrollbar.set)
canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
second_frame = tk.Frame(canvas)
canvas.create_window((0,0), window=second_frame, anchor="nw")
buttonFont = tkFont.Font(family="Georgia", size=20)
font = tkFont.Font(family="Georgia",size=25)
title = tk.Label(text="Select matching variables", font=font, fg="#0352fc")
title.place(relx=0.2,rely=0)
button = None
pressedVars = []
buttonList = []
def makeButtons(var_name, x, y):
var = var_name
def trackVars():
pressedVars.append(var)
button.config(bg=random.choice(colors))
button = tk.Button(master=second_frame,text=var_name, fg="white", bg="black", width=30, height=1,
command=trackVars,font=buttonFont)
button.grid(column=x,row=y,padx=105,pady=50)
buttonList.append(button)
y = 1
for var in vars1:
makeButtons(var, 10, y)
y = y + 10
y = 1
for var2 in vars2:
makeButtons(var2, 20, y)
y = y + 10
exitButton = tk.Button(master=second_frame,text="Done",fg="white",bg="orange",width=30,height=3,
command=window.destroy)
exitButton.grid(row=1,column=100)
window.mainloop()
# function used to convert list to dictionary
def Convert(lst):
res_dct = {lst[i]: lst[i + 1] for i in range(0, len(lst), 2)}
return res_dct
pressedVars_dict = Convert(pressedVars)
return pressedVars_dict
if two_datasets == True:
varMatches = GUI_varConnector(main_data,sec_data)
print(varMatches)
def collect_img_dirs(data_folder):
img_directories = []
for root, dirs, files, in os.walk(data_folder):
for name in files:
dir = os.path.join(root,name)
img_directories.append(dir)
return img_directories
if convert_imgs == True:
load_dirs = collect_img_dirs(load_dir)
def convert_img(png_boolean,dirs_list,save_path):
png = png_boolean
print("starting image conversion process")
num_converted_img = 0
for image in dirs_list:
# filter out incompatible images
if os.path.basename(image) != "1-1.dcm":
ds = dicom.dcmread(image)
pixel_array_numpy = ds.pixel_array
if png == False:
image = image.replace(".dcm",".jpg")
elif png == True:
image = image.replace(".dcm",".png")
cv2.imwrite(os.path.join(save_path,ds.PatientID+"_"+os.path.basename(image)),pixel_array_numpy)
## Loading info
num_imgs = len(dirs_list)
num_converted_img = num_converted_img + 1
percentage_done = (num_converted_img/num_imgs) * 100
print(str(round(percentage_done,2)) + " percent completed")
def convert_npy(dirs_list,save_path):
print("appending dicom files directly to numpy array")
img_array = np.array([])
img_conv = 0
for f in dirs_list:
# filter incompatible images
if os.path.basename(f) != "1-1.dcm":
ds = dicom.dcmread(f)
pixel_array_numpy = ds.pixel_array
id = ds.PatientID
for s in id:
if not s.isdigit():
id = id.replace(s,'')
if id[0] == '0':
id = id[-4:]
if pixel_array_numpy.shape == img_dimensions:
pixel_array_numpy = pixel_array_numpy.flatten()
pixel_array_numpy = np.insert(pixel_array_numpy,len(pixel_array_numpy),id)
img_array = np.append(img_array,pixel_array_numpy)
print(psutil.virtual_memory().percent)
# memory optimization
if psutil.virtual_memory().percent >= 50:
break
## Loading info
num_imgs = len(dirs_list)
img_conv = img_conv + 1
percentage_done = (img_conv / num_imgs) * 100
print(str(round(percentage_done, 2)) + " percent completed")
np.save(os.path.join(save_path, "img_array"), img_array)
if convert_imgs == True and dcmDirect == False:
convert_img(png, load_dirs,save_dir)
elif convert_imgs == True and load_numpy_img == False and dcmDirect == True:
convert_npy(load_dirs,save_dir)
def prep_data(data_file_1,data_file_2):
if str(type(data_file_1)) != "<class 'pandas.core.frame.DataFrame'>":
file_1 = pd.read_csv(data_file_1)
else:
file_1 = data_file_1
common_ids = []
if ID_dataset_col != "index":
file_1 = file_1.set_index(ID_dataset_col)
ids_1 = file_1.index
if two_datasets == True:
if str(type(data_file_2)) != "<class 'pandas.core.frame.DataFrame'>":
file_2 = pd.read_csv(data_file_2)
else:
file_2 = data_file_2
file_2 = file_2.set_index(ID_dataset_col)
ids_2 = file_2.index
# determine the largest dataset to put first in the for statement
if ids_1.shape[0] > ids_2.shape[0]:
longest_ids = ids_1.values.tolist()
shortest_ids = ids_2.values.tolist()
elif ids_1.shape[0] < ids_2.shape[0]:
longest_ids = ids_2.values.tolist()
shortest_ids = ids_1.values.tolist()
elif ids_1.shape[0] == ids_2.shape[0]:
longest_ids = ids_1.values.tolist()
shortest_ids = ids_2.values.tolist()
for i in longest_ids:
for z in shortest_ids:
if int(i) == int(z):
common_ids.append(i)
adapted_1 = file_1.loc[common_ids]
adapted_2 = file_2.loc[common_ids]
combined_dataset = adapted_1.join(adapted_2)
# eliminate duplicate variables
for i in varMatches.values():
combined_dataset = combined_dataset.drop(i,axis=1)
data = combined_dataset
else:
data = file_1
return data
if two_datasets == True:
main_data = prep_data(main_data,sec_data)
elif two_datasets == False:
main_data = prep_data(main_data,None)
resultList = []
prediction = []
def feature_selection(pd_dataset,target_vars,num_features):
# initialize bool as false
multiple_targets = False
if str(type(target_vars)) == "<class 'list'>" and len(target_vars) > 1:
multiple_targets = True
corr = pd_dataset.corr()
# get the top features with the highest correlation
if multiple_targets == False:
features = list(pd_dataset.corr().abs().nlargest(num_features,target_vars).index)
else:
features = []
for vars in target_vars:
f = pd_dataset.corr().abs().nlargest(num_features,vars).index
f = list(f)
features.append(f)
features = sum(features,[])
# get the top correlation values
if multiple_targets:
corrVals=[]
for vars in target_vars:
c = pd_dataset.corr().abs().nlargest(num_features,vars).values[:,pd_dataset.shape[1]-1]
c = list(c)
corrVals.append(c)
corrVals = sum(corrVals,[])
else:
corrVals = list(pd_dataset.corr().abs().nlargest(num_features,target_vars).values[:,pd_dataset.shape[1]-1])
# make a dictionary out of the two lists
featureDict = dict(zip(features,corrVals))
return featureDict
def model(data_file, test_file, target_vars, epochs_num):
# initialize bool as false
multiple_targets = False
if str(type(target_vars)) == "<class 'list'>" and len(target_vars) > 1:
multiple_targets = True
if multiple_targets == False:
# get top 10 most correlated features to utilize
features = list(feature_selection(data_file,target_vars,10).keys())
else:
# initialize list
features = []
# make list with top 10 most correlated features from both vars.
# Ex. 20 total features for 2 target vars
for vars in target_vars:
featuresVar = list(feature_selection(data_file,vars,10).keys())
features = features + featuresVar
# remove duplicates
features = list(set(features))
# only use features determined by feature_selection
data_file = data_file[data_file.columns.intersection(features)]
def format_data(data_file, test_file, target_var):
if str(type(data_file)) == "<class 'pandas.core.frame.DataFrame'>":
df = data_file
elif main_data[-4:] == ".csv":
df = pd.read_csv(data_file)
if use_additional_test_file == True:
#Recognizing what variables are in the test data
input_data = pd.read_csv(test_file)
input_vars = input_data.columns.tolist()
#collect data for the variables from main dataset
dataset = df[input_vars]
# Append y data for target column into new dataset
y_data = df[target_var]
dataset = dataset.assign(target_variables=y_data)
target_name = str(target_var)
dataset = dataset.rename(columns={'target_variables':target_name},inplace=True)
elif use_additional_test_file == False:
dataset = df
return dataset
adapted_dataset = format_data(data_file, test_file, target_vars)
# initiate negative_vals as False
negative_vals = False
# determine activation function (relu or tanh) from if there are negative numbers in target variable
df_values = adapted_dataset.values
df_values = df_values.flatten()
for val in df_values:
val = float(val)
if val < 0:
negative_vals = True
if negative_vals == True:
act_func = "tanh"
else:
act_func = 'relu'
print(act_func)
def NN(data_file, target_vars, epochs_num,activation_function):
global resultList
global prediction
# Get data. Data must already be in a Pandas Dataframe
df = data_file
#y data
labels = df.loc[:,target_vars]
#x data
features = df.drop(target_vars,axis=1)
X = features
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
# split test data into validation and test
X_test, X_val = train_test_split(X_test, test_size=0.5, random_state=34)
y_test, y_val = train_test_split(y_test, test_size=0.5, random_state=34)
# normalize data
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)
X_val = min_max_scaler.fit_transform(X_val)
if multiple_targets:
y_test = min_max_scaler.fit_transform(y_test)
y_train = min_max_scaler.fit_transform(y_train)
y_val = min_max_scaler.fit_transform(y_val)
if str(type(y_train)) == "<class 'pandas.core.frame.DataFrame'>":
y_train = y_train.to_numpy()
if str(type(y_test)) == "<class 'pandas.core.frame.DataFrame'>":
y_test = y_test.to_numpy()
# check data for nans/non-compatible objects
def hasNan(array):
nan = np.isnan(array)
for arr in nan:
if array.ndim == 2:
for bool in arr:
if bool:
containsNan = True
else:
containsNan = False
elif array.ndim == 1:
if arr:
containsNan = True
else:
containsNan = False
# check that all data is floats or integers
if array.ndim == 1:
typeList = []
for vals in array:
valType = str(type(vals))
typeList.append(valType)
for types in typeList:
if types != "<class 'numpy.float64'>" and types != "<class 'numpy.int64'>":
containsNan = True
if containsNan:
print("Data contains nan values")
else:
print("Data does not contain nan values")
hasNan(y_train)
if not load_fit:
if str(type(target_vars))=="<class 'list'>" and len(target_vars) > 1:
input = keras.Input(shape=X_train.shape[1],)
def add_target(Input):
x = layers.Dense(40,activation=activation_function)(Input)
x = layers.Dense(40,activation=activation_function)(x)
x = layers.Dense(35,activation=activation_function)(x)
x = layers.Dense(35,activation=activation_function)(x)
return x
output_list = []
for vars in range(len(target_vars)):
x = add_target(input)
output_list.append(x)
x = layers.Concatenate()(output_list)
output_list.clear()
x = layers.Dense(12,activation='relu')(x)
for vars in range(len(target_vars)):
y = layers.Dense(1,activation='linear')(x)
output_list.append(y)
model = keras.Model(inputs=input,outputs=output_list)
model.compile(optimizer='SGD',
loss='mean_absolute_error',
metrics=['accuracy'])
fit = model.fit(X_train, y_train, epochs=epochs_num, batch_size=5)
else:
print(X_train.shape[1])
# set input shape to dimension of data
input = keras.layers.Input(shape=(X_train.shape[1],))
x = Dense(20,activation=activation_function)(input)
x = Dense(15,activation=activation_function)(x)
x = Dense(6,activation=activation_function)(x)
x = Dense(4,activation=activation_function)(x)
x = Dense(2,activation=activation_function)(x)
output = Dense(1, activation='linear')(x)
model = keras.Model(input, output)
model.compile(optimizer='SGD',
loss='mean_squared_error',
metrics=['accuracy'])
fit = model.fit(X_train, y_train, epochs=epochs_num, batch_size=32)
# plotting
history = fit
def plot(model_history,metric,graph_title):
history = model_history
plt.plot(history.history[metric])
plt.title(graph_title)
plt.ylabel(metric)
plt.xlabel('epoch')
save_path = os.path.join(data_save_loc, str(target_vars) + " " + metric + ".jpg")
if "?" in save_path:
save_path = save_path.replace("?","")
if save_figs == True:
plt.savefig(save_path)
if show_figs == True:
plt.show()
else:
plt.clf()
plot(history,'loss','model loss')
def save_fitted_model(model,save_location):
model.save(save_location)
if save_fit == True:
save_fitted_model(model,model_save_loc)
else:
model = keras.models.load_model(model_save_loc)
# utilize validation data
prediction = model.predict(X_val, batch_size=1)
roundedPred = np.around(prediction,0)
if multiple_targets == False and roundedPred.ndim == 1:
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
else:
preShape = roundedPred.shape
# if array has multiple dimensions, flatten the array
roundedPred = roundedPred.flatten()
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
if len(preShape) == 3:
if preShape[2] == 1:
# reshape array to previous shape without the additional dimension
roundedPred = np.reshape(roundedPred, preShape[:2])
else:
roundedPred = np.reshape(roundedPred, preShape)
else:
roundedPred = np.reshape(roundedPred, preShape)
print("Validation Metrics")
print("- - - - - - - - - - - - - Unrounded Prediction - - - - - - - - - - - - -")
print(prediction)
print("- - - - - - - - - - - - - Rounded Prediction - - - - - - - - - - - - -")
print(roundedPred)
print("- - - - - - - - - - - - - y val - - - - - - - - - - - - -")
print(y_val)
if str(type(prediction)) == "<class 'list'>":
prediction = np.array([prediction])
percentAcc = percentageAccuracy(roundedPred, y_val)
print("- - - - - - - - - - - - - Percentage Accuracy - - - - - - - - - - - - -")
print(percentAcc)
resultList.append(str(prediction))
resultList.append(str(roundedPred))
resultList.append(str(y_val))
resultList.append(str(percentAcc))
# utilize test data
prediction = model.predict(X_test,batch_size=1)
roundedPred = np.around(prediction,0)
if multiple_targets == False and roundedPred.ndim == 1:
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
else:
preShape = roundedPred.shape
# if array has multiple dimensions, flatten the array
roundedPred = roundedPred.flatten()
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
if len(preShape) == 3:
if preShape[2] == 1:
# reshape array to previous shape without the additional dimension
roundedPred = np.reshape(roundedPred,preShape[:2])
else:
roundedPred = np.reshape(roundedPred,preShape)
else:
roundedPred = np.reshape(roundedPred,preShape)
print("Test Metrics")
print("- - - - - - - - - - - - - Unrounded Prediction - - - - - - - - - - - - -")
print(prediction)
print("- - - - - - - - - - - - - Rounded Prediction - - - - - - - - - - - - -")
print(roundedPred)
print("- - - - - - - - - - - - - y test - - - - - - - - - - - - -")
print(y_test)
if str(type(prediction)) == "<class 'list'>":
prediction = np.array([prediction])
percentAcc = percentageAccuracy(roundedPred,y_test)
print("- - - - - - - - - - - - - Percentage Accuracy - - - - - - - - - - - - -")
print(percentAcc)
resultList.append(str(prediction))
resultList.append(str(roundedPred))
resultList.append(str(y_test))
resultList.append(str(percentAcc))
if multiple_targets == True and str(type(isBinary)) == "<class 'list'>":
# initialize var as error message
decodedPrediction = "One or all of the target variables are non-binary and/or numeric"
i = 0
for bools in isBinary:
if bools == True:
decodedPrediction = decode(prediction[0,i],targetDict)
i = i + 1
else:
if isBinary:
decodedPrediction = decode(prediction,targetDict)
else:
decodedPrediction = "One or all of the target variables are non-binary and/or numeric"
print("- - - - - - - - - - - - - Translated Prediction - - - - - - - - - - - - -")
print(decodedPrediction)
NN(adapted_dataset, target_vars, epochs_num, act_func)
if run_img_model == False and target_all == False:
model(main_data,test_file,target_variables,num_epochs)
elif run_img_model == False and target_all == True:
# collect columns in data
cols = list(main_data.columns)
for column in cols:
model(main_data,test_file,column,num_epochs)
def image_model(save_loc,data_file,test_file,target_vars,epochs_num):
print("starting image model")
features = list(feature_selection(data_file, target_vars,10).keys())
# only use features determined by feature_selection in clinical data
data_file = data_file[data_file.columns.intersection(features)]
def format_data(data_file, test_file, target_vars):
if str(type(data_file)) == "<class 'pandas.core.frame.DataFrame'>":
df = data_file
elif main_data[-4:] == ".csv":
df = pd.read_csv(data_file)
if use_additional_test_file == True:
#Recognizing what variables are in the input data
input_data = pd.read_csv(test_file)
input_vars = input_data.columns.tolist()
#collect data for the variables from main dataset
dataset = df[input_vars]
# Append y data for target column into new dataset
y_data = df[target_vars]
dataset = dataset.assign(target_variables=y_data)
target_name = str(target_vars)
dataset.rename(columns={'target_variables':target_name},inplace=True)
elif use_additional_test_file == False:
dataset = df
return dataset
adapted_dataset = format_data(data_file, test_file,target_vars)
adapted_dataset.index.names = ["ID"]
img_array = np.array([])
matching_ids = []
img_list = os.listdir(save_loc)
# number of images that match proper resolution
num_usable_img = 0
# used for loading info
imgs_processed = 0
if load_numpy_img == True:
img_array = np.load(os.path.join(img_array_save,os.listdir(img_array_save)[0]))
if len(img_dimensions) == 3:
flat_res = int((img_dimensions[0]*img_dimensions[1]*img_dimensions[2])+1)
elif len(img_dimensions) == 2:
flat_res = int((img_dimensions[0]*img_dimensions[1])+1)
num_img = int(img_array.shape[0]/flat_res)
img_array = np.reshape(img_array,(num_img,flat_res))
## retrieving ids
img_df = pd.DataFrame(data=img_array)
cols = list(img_df.columns)
id_col = img_df[cols[-1]].tolist()
dataset_id = adapted_dataset.index.tolist()
# determine what to put first in loop
if len(id_col) >= len(dataset_id):
longest = id_col
shortest = dataset_id
elif len(dataset_id) > len(id_col):
longest = dataset_id
shortest = id_col
for id in longest:
for id2 in shortest:
if int(id) == int(id2):
matching_ids.append(id)
elif load_numpy_img == False:
for imgs in img_list:
# find matching ids
for ids in adapted_dataset.index:
ids = int(ids)
if ids == int(imgs[img_id_name_loc[0]:img_id_name_loc[1]]):
matching_ids.append(ids)
matching_ids = list(dict.fromkeys(matching_ids))
# Collect/convert corresponding imagery
print("starting data preparation process")
for ids in matching_ids:
if ids == int(imgs[img_id_name_loc[0]:img_id_name_loc[1]]):
img = load_img(os.path.join(save_loc, imgs))
img_numpy_array = img_to_array(img)
if img_numpy_array.shape == img_dimensions:
img_numpy_array = img_numpy_array.flatten()
img_numpy_array = np.insert(img_numpy_array,len(img_numpy_array),ids)
num_usable_img = num_usable_img + 1
img_array = np.append(img_array,img_numpy_array,axis=0)
imgs_processed = imgs_processed + 1
else:
matching_ids.remove(ids)
## Memory optimization
if psutil.virtual_memory().percent >= 50:
break
## loading info
total_img = len(img_list)
percent_conv = (imgs_processed / total_img) * 100
print(str(round(percent_conv,2)) + " percent converted")
print(str(psutil.virtual_memory()))
# save the array
np.save(os.path.join(img_array_save, "img_array"), img_array)
# reshape into legal dimensions
img_array = np.reshape(img_array,(num_usable_img,int(img_array.size/num_usable_img)))
adapted_dataset = adapted_dataset.loc[matching_ids]
# initialize negative_vals as false
negative_vals = False
# determine activation function (relu or tanh) from if there are negative numbers in target variable
df_values = adapted_dataset.values
df_values = df_values.flatten()
for val in df_values:
val = float(val)
if val < 0:
negative_vals = True
if negative_vals == True:
act_func = "tanh"
else:
act_func = 'relu'
def model(pd_data,input_imagery,target_vars,activation_function):
global resultList
global prediction
# initialize bool as false
multiple_targets = False
if str(type(target_vars)) == "<class 'list'>" and len(target_vars) > 1:
multiple_targets = True
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Clinical
# Get data
df = pd_data
# y data
labels = df[target_vars]
# x data
features = df.drop(target_vars,axis=1)
X = features
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
# split test data into validation and test
X_test, X_val = train_test_split(X_test, test_size=0.5, random_state=53)
y_test, y_val = train_test_split(y_test, test_size=0.5, random_state=53)
# normalize data
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)
X_val = min_max_scaler.fit_transform(X_val)
if multiple_targets:
y_test = min_max_scaler.fit_transform(y_test)
y_train = min_max_scaler.fit_transform(y_train)
y_val = min_max_scaler.fit_transform(y_val)
if str(type(y_train)) == "<class 'pandas.core.frame.DataFrame'>":
y_train = y_train.to_numpy()
if str(type(y_test)) == "<class 'pandas.core.frame.DataFrame'>":
y_test = y_test.to_numpy()
y_test = np.asarray(y_test).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
y_test = tf.convert_to_tensor(y_test)
y_train = tf.convert_to_tensor(y_train)
X_train = tf.convert_to_tensor(X_train)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Image
X_train_img, X_test_img = train_test_split(input_imagery,test_size=0.4,random_state=42)
X_test_img, X_val_img = train_test_split(X_test_img,test_size=0.5,random_state=34)
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def remove_ids(dataset):
# initialize empty array
newImg = np.empty((0, img_dimensions[0] * img_dimensions[1]))
# remove ids from img data
i = 0
for arr in dataset:
arr = np.delete(arr, -1)
newImg = np.insert(newImg, i, arr, axis=0)
i = i + 1
return newImg
if useCNN:
X_train_img = remove_ids(X_train_img)
X_test_img = remove_ids(X_test_img)
X_val_img = remove_ids(X_val_img)
# normalize data
min_max_scaler = MinMaxScaler()
X_train_img = min_max_scaler.fit_transform(X_train_img)
X_test_img = min_max_scaler.fit_transform(X_test_img)
X_val_img = min_max_scaler.fit_transform(X_val_img)
X_train_img = np.reshape(X_train_img,(X_train_img.shape[0],img_dimensions[0],img_dimensions[1],1))
X_test_img = np.reshape(X_test_img,(X_test_img.shape[0],img_dimensions[0],img_dimensions[1],1))
X_val_img = np.reshape(X_val_img,(X_val_img.shape[0],img_dimensions[0],img_dimensions[1],1))
X_train = X_train_img
X_test = X_test_img
X_val = X_val_img
if not useCNN:
X_train_img = remove_ids(X_train_img)
X_test_img = remove_ids(X_test_img)
X_val_img = remove_ids(X_val_img)
X_train = np.concatenate((X_train_img,X_train),axis=1)
X_test = np.concatenate((X_test,X_test_img),axis=1)
X_val = np.concatenate((X_val,X_val_img),axis=1)
# normalize data
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)
X_val = min_max_scaler.fit_transform(X_val)
if multiple_targets:
y_test = min_max_scaler.fit_transform(y_test)
y_train = min_max_scaler.fit_transform(y_train)
y_val = min_max_scaler.fit_transform(y_val)
print(activation_function)
if not load_fit:
if not useCNN:
if str(type(target_vars))!="<class 'list'>" or len(target_vars) == 1:
# set input shape to dimension of data
input = keras.layers.Input(shape=(X_train.shape[1],))
x = Dense(150, activation=activation_function)(input)
x = Dense(150, activation=activation_function)(x)
x = Dense(150, activation=activation_function)(x)
x = Dense(120, activation=activation_function)(x)
x = Dense(120, activation=activation_function)(x)
x = Dense(100, activation=activation_function)(x)
x = Dense(100, activation=activation_function)(x)
x = Dense(80, activation=activation_function)(x)
x = Dense(80, activation=activation_function)(x)
x = Dense(45, activation=activation_function)(x)
output = Dense(1, activation='linear')(x)
model = keras.Model(input, output)
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
fit = model.fit(X_train,y_train,epochs=epochs_num,batch_size=64)
else:
input = keras.layers.Input(shape=(X_train.shape[1],))
def add_target(Input):
x = layers.Dense(90,activation=activation_function)(Input)
x = layers.Dense(60, activation=activation_function)(x)
x = layers.Dense(45, activation=activation_function)(x)
x = layers.Dense(35, activation=activation_function)(x)
x = layers.Dense(20, activation=activation_function)(x)
return x
output_list = []
for vars in range(len(target_vars)):
x = add_target(input)
output_list.append(x)
x = layers.Concatenate()(output_list)
output_list.clear()
x = layers.Dense(12,activation=activation_function)(x)
for vars in range(len(target_vars)):
# create output layer
y = layers.Dense(1,activation='linear')(x)
output_list.append(y)
model = keras.Model(inputs=input,outputs=output_list)
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
fit = model.fit(X_train,y_train,epochs=epochs_num,batch_size=5)
else:
model = Sequential()
model.add(layers.Conv2D(64,(3,3),input_shape=X_train.shape[1:]))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Conv2D(64,(3,3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(64))
model.add(layers.Activation('relu'))
model.add(layers.Dense(1))
model.add(layers.Activation('linear'))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
fit = model.fit(X_train,y_train,epochs=epochs_num)
#plotting
history = fit
def plot(model_history, metric, graph_title):
history = model_history
plt.plot(history.history[metric])
plt.title(graph_title)
plt.ylabel(metric)
plt.xlabel('epoch')
save_path = os.path.join(data_save_loc,str(target_vars) + " " + metric + ".jpg")
if "?" in save_path:
save_path = save_path.replace("?","")
if save_figs == True:
plt.savefig(save_path)
if show_figs == True:
plt.show()
else:
plt.clf()
plot(history, 'loss', 'model loss')
def save_fitted_model(model, save_location):
model.save(save_location)
if save_fit == True:
save_fitted_model(model, model_save_loc)
else:
model = keras.models.load_model(model_save_loc)
if str(type(prediction)) == "<class 'list'>":
prediction = np.array([prediction])
# utilize validation data
prediction = model.predict(X_val, batch_size=1)
roundedPred = np.around(prediction,0)
if multiple_targets == False and roundedPred.ndim == 1:
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
else:
preShape = roundedPred.shape
roundedPred = roundedPred.flatten()
roundedPred = roundedPred.tolist()
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
roundedPred = np.array(roundedPred)
if len(preShape) == 3:
if preShape[2] == 1:
# reshape array to previous shape without the additional dimension
roundedPred = np.reshape(roundedPred,preShape[:2])
else:
roundedPred = np.reshape(roundedPred,preShape)
else:
roundedPred = np.reshape(roundedPred,preShape)
print("Validation Metrics")
print("- - - - - - - - - - - - - Unrounded Prediction - - - - - - - - - - - - -")
print(prediction)
print("- - - - - - - - - - - - - Rounded Prediction - - - - - - - - - - - - -")
print(roundedPred)
print("- - - - - - - - - - - - - y val - - - - - - - - - - - - -")
print(y_val)
if str(type(prediction)) == "<class 'list'>":
prediction = np.array([prediction])
percentAcc = percentageAccuracy(prediction,y_val)
print("- - - - - - - - - - - - - Percentage Accuracy - - - - - - - - - - - - -")
print(percentAcc)
resultList.append(str(prediction))
resultList.append(str(roundedPred))
resultList.append(str(y_val))
resultList.append(str(percentAcc))
# utilize test data
prediction = model.predict(X_test,batch_size=1)
if multiple_targets == False and roundedPred.ndim == 1:
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
else:
preShape = roundedPred.shape
# if array has multiple dimensions, flatten the array
roundedPred = roundedPred.flatten()
i = 0
for vals in roundedPred:
if int(vals) == -0:
vals = abs(vals)
roundedPred[i] = vals
i = i + 1
if len(preShape) == 3:
if preShape[2] == 1:
# reshape array to previous shape without the additional dimension
roundedPred = np.reshape(roundedPred, preShape[:2])
else:
roundedPred = np.reshape(roundedPred, preShape)
else:
roundedPred = np.reshape(roundedPred, preShape)
print("Test Metrics")
print("- - - - - - - - - - - - - Unrounded Prediction - - - - - - - - - - - - -")
print(prediction)
print("- - - - - - - - - - - - - Rounded Prediction - - - - - - - - - - - - -")
print(roundedPred)
print("- - - - - - - - - - - - - y test - - - - - - - - - - - - -")
print(y_test)
if str(type(prediction)) == "<class 'list'>":
prediction = np.array([prediction])
percentAcc = percentageAccuracy(roundedPred, y_test)
print("- - - - - - - - - - - - - Percentage Accuracy - - - - - - - - - - - - -")
print(percentAcc)
resultList.append(str(prediction))
resultList.append(str(roundedPred))
resultList.append(str(y_test))
resultList.append(str(percentAcc))
if multiple_targets == True and str(type(isBinary)) == "<class 'list'>":
# initialize var as error message
decodedPrediction = "One or all of the target variables are non-binary and/or numeric"
i = 0
for bools in isBinary:
if bools == True:
decodedPrediction = decode(prediction[0,i],targetDict)
i = i + 1
else:
if isBinary:
decodedPrediction = decode(prediction,targetDict)
else:
decodedPrediction = "One or all of the target variables are non-binary and/or numeric"
print("- - - - - - - - - - - - - Translated Prediction - - - - - - - - - - - - -")
print(decodedPrediction)
model(adapted_dataset,img_array,target_vars,act_func)
if run_img_model == True and target_all == False:
image_model(save_dir,main_data,test_file,target_variables,num_epochs)
elif run_img_model == True and target_all == True:
# collect columns in data
cols = list(main_data.columns)
for column in cols:
image_model(save_dir,main_data,test_file,target_variables,num_epochs)
def ValResultPage():
root = tk.Tk()
root.title("Results - Validation")
root.iconbitmap("D:\Cancer_Project\Team8_Cancer_ML\cancer_icon.ico")
# MAKE SCROLLBAR
main_frame = tk.Frame(root)
main_frame.pack(fill=tk.BOTH, expand=1)
canvas = tk.Canvas(main_frame)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=1)
# Add a scrollbars to the canvas
scrollbar = ttk.Scrollbar(main_frame, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
scrollbar_x = ttk.Scrollbar(main_frame, orient=tk.HORIZONTAL, command=canvas.xview)
scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
# Configure the canvas
canvas.configure(xscrollcommand=scrollbar_x.set)
canvas.configure(yscrollcommand=scrollbar.set)
canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
second_frame = tk.Frame(canvas)
canvas.create_window((0, 0), window=second_frame, anchor="nw")
# define fonts
titleFont = tkFont.Font(family="Georgia",size=20)
titleColor = "#f29c2c"
resultFont = tkFont.Font(family="Consolas",size=16)
# ADD WIDGETS
prediction = resultList[0]
roundedPred = resultList[1]
y_val = resultList[2]
percentAcc = resultList[3]
def placeResults(txt):
result = tk.Label(second_frame,text=txt,font=resultFont,bg='black',fg='white')
result.grid(pady=40)
def destroy():
root.quit()
resultTitle = tk.Label(second_frame,text="Prediction",font=titleFont,fg=titleColor)
resultTitle.grid()
placeResults(prediction)
resultTitle = tk.Label(second_frame,text="Rounded Prediction",font=titleFont,fg=titleColor)
resultTitle.grid()
placeResults(roundedPred)
resultTitle = tk.Label(second_frame,text="y_val",font=titleFont,fg=titleColor)
resultTitle.grid()
placeResults(y_val)
resultTitle = tk.Label(second_frame,text="Percentage Accuracy",font=titleFont,fg=titleColor)
resultTitle.grid()
placeResults(percentAcc)
exitButton = tk.Button(second_frame,text="Next",font=titleFont,fg=titleColor,command=destroy)
exitButton.grid()
def quit_window():
root.quit()
root.destroy()
root.protocol("WM_DELETE_WINDOW",quit_window)
root.mainloop()
ValResultPage()
def trainResultPage():
root = tk.Tk()
root.title("Results - Test")
root.iconbitmap("D:\Cancer_Project\Team8_Cancer_ML\cancer_icon.ico")
# Make scrollbar
main_frame = tk.Frame(root)
main_frame.pack(fill=tk.BOTH, expand=1)
canvas = tk.Canvas(main_frame)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=1)
# Add a scrollbars to the canvas
scrollbar = ttk.Scrollbar(main_frame, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
scrollbar_x = ttk.Scrollbar(main_frame, orient=tk.HORIZONTAL, command=canvas.xview)
scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
# Configure the canvas
canvas.configure(xscrollcommand=scrollbar_x.set)
canvas.configure(yscrollcommand=scrollbar.set)
canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
second_frame = tk.Frame(canvas)
canvas.create_window((0, 0), window=second_frame, anchor="nw")
# define fonts
titleFont = tkFont.Font(family="Georgia", size=20)
titleColor = "#f29c2c"
resultFont = tkFont.Font(family="Consolas", size=16)
# ADD WIDGETS
prediction = resultList[4]
roundedPred = resultList[5]
y_test = resultList[6]
percentAcc = resultList[7]
def placeResults(txt):
result = tk.Label(second_frame,text=txt,font=resultFont,bg='black',fg='white')
result.grid(pady=40)
def destroy():
root.quit()
resultTitle = tk.Label(second_frame, text="Prediction", font=titleFont, fg=titleColor)
resultTitle.grid()
placeResults(prediction)
resultTitle = tk.Label(second_frame, text="Rounded Prediction", font=titleFont, fg=titleColor)
resultTitle.grid()
placeResults(roundedPred)
resultTitle = tk.Label(second_frame, text="y_test", font=titleFont, fg=titleColor)
resultTitle.grid()
placeResults(y_test)
resultTitle = tk.Label(second_frame, text="Percentage Accuracy", font=titleFont, fg=titleColor)
resultTitle.grid()
placeResults(percentAcc)
exitButton = tk.Button(second_frame, text="Exit", font=titleFont, fg=titleColor, command=destroy)
exitButton.grid()
def quit_window():
root.quit()
root.destroy()
root.protocol("WM_DELETE_WINDOW", quit_window)
root.mainloop()
trainResultPage()
# delete converted dicom images after use if boolean is true
if del_converted_imgs == True:
folder = save_dir
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))