print('\n\n @ @ @ @ @ @ START @ @ @ @ @ @ @ ');
print('file: script_start.lua');
print('author Davide Chicco <davide.chicco@gmail.com>');
print(os.date("%c", os.time()));
MAX_MSE = 4
-- RANDOM_SEED = 42
-- torch.manualSeed(RANDOM_SEED)
-- math.randomseed(RANDOM_SEED)
local timeStart = os.time()
-- createPerceptron
function createPerceptron(this_input_number, this_hidden_units, this_hidden_layers, this_output_number)
perceptron = nn.Sequential()
perceptron:add(nn.Linear(this_input_number, this_hidden_units))
-- perceptron:add(nn.Sigmoid())
perceptron:add(nn.ReLU())
if DROPOUT_FLAG==true then perceptron:add(nn.Dropout()) end
for w=1,this_hidden_layers do
perceptron:add(nn.Linear(this_hidden_units, this_hidden_units))
-- perceptron:add(nn.Sigmoid())
perceptron:add(nn.ReLU())
if DROPOUT_FLAG==true then perceptron:add(nn.Dropout()) end
end
perceptron:add(nn.Linear(this_hidden_units, this_output_number))
if XAVIER_INITIALIZATION==true then
print("XAVIER_INITIALIZATION = "..tostring(XAVIER_INITIALIZATION))
perceptron = require("./weight-init.lua")(perceptron, 'xavier') -- XAVIER
end
return perceptron;
end
-- function executeTest
function executeTest(testPerceptron, dataset_patient_profile)
local correctPredictions = 0
local atleastOneTrue = false
local atleastOneFalse = false
local predictionTestVect = {}
local truthVect = {}
for i=1,#dataset_patient_profile do
local current_label = dataset_patient_profile[i][2][1]
local original_prediction = testPerceptron:forward(dataset_patient_profile[i][1])[1]
-- io.write("original_prediction = ".. original_prediction)
prediction = original_prediction --(original_prediction+1)/2
predictionTestVect[i] = prediction
truthVect[i] = current_label
-- io.write(" prediction = ".. round(prediction,2))
-- io.write(" current_label = ".. current_label.."\n")
-- io.flush()
local labelResult = false
if current_label >= THRESHOLD and prediction >= THRESHOLD then
labelResult = true
elseif current_label < THRESHOLD and prediction < THRESHOLD then
labelResult = true
end
if labelResult==true then correctPredictions = correctPredictions + 1; end
if prediction>=THRESHOLD then
atleastOneTrue = true
else
atleastOneFalse = true
end
end
print("\nCorrect predictions = "..round(correctPredictions*100/#dataset_patient_profile,2).."%")
if atleastOneTrue==false then print("ATTENTION: all the predictions are FALSE") end
if atleastOneFalse==false then print("ATTENTION: all the predictions are TRUE") end
require './metrics_ROC_AUC_computer.lua'
local output_AUC_computer = metrics_ROC_AUC_computer(predictionTestVect, truthVect)
local auroc = output_AUC_computer[1]
local aupr = output_AUC_computer[2]
local printValues = false
local output_confusion_matrix = confusion_matrix(predictionTestVect, truthVect, THRESHOLD, printValues)
return {output_confusion_matrix[4], output_confusion_matrix[1], output_confusion_matrix[5], auroc, aupr};
-- MCC, accuracy, f1_score, AUROC, AUPR
end
-- Function sleep
function sleep(n) os.execute("sleep " .. tonumber(n)); end
-- Function table.contains
function table.contains(table, element)
local count = 1
for _, value in pairs(table) do
-- print("value: "..tostring(value).." element: "..tostring(element));
if tostring(value) == tostring(element) or value==element then
return {true,count}
end
count = count + 1
end
return {false,-1}
end
-- Function that prints
function printTime(timeStart, stringToPrint)
timeEnd = os.time();
duration = timeEnd - timeStart;
print('\nduration '..stringToPrint.. ': '.. comma_value(tonumber(duration)).. ' seconds');
io.flush();
print('duration '..stringToPrint.. ': '..string.format("%.2d days, %.2d hours, %.2d minutes, %.2d seconds", (duration/(60*60))/24, duration/(60*60)%24, duration/60%60, duration%60))
io.flush();
return duration;
end
-- Function that reads a value and returns the string of the signed value
function signedValueFunction(value)
local value = tonumber(value);
--print("value = "..value);
local charPlus = ""
if tonumber(value) >= 0 then charPlus = "+"; end
local outputString = charPlus..""..tostring(round(value,2));
--print("outputString = "..outputString);
return tostring(outputString);
end
-- from sam_lie
-- Compatible with Lua 5.0 and 5.1.
-- Disclaimer : use at own risk especially for hedge fund reports :-)
--============================================================
-- add comma to separate thousands
--
function comma_value(amount)
local formatted = amount
while true do
formatted, k = string.gsub(formatted, "^(-?%d+)(%d%d%d)", '%1,%2')
if (k==0) then
break
end
end
return formatted
end
-- function that computes the confusion matrix
function confusion_matrix(predictionTestVect, truthVect, threshold, printValues)
local tp = 0
local tn = 0
local fp = 0
local fn = 0
local f1_score = -2
local MatthewsCC = -2
local accuracy = -2
local arrayFPindices = {}
local arrayFPvalues = {}
local arrayTPvalues = {}
local areaRoc = 0
local fpRateVett = {}
local tpRateVett = {}
local precisionVett = {}
local recallVett = {}
for i=1,#predictionTestVect do
if printValues == true then
io.write("predictionTestVect["..i.."] = ".. round(predictionTestVect[i],4).."\ttruthVect["..i.."] = "..truthVect[i].." ");
io.flush();
end
if predictionTestVect[i] >= threshold and truthVect[i] >= threshold then
tp = tp + 1
arrayTPvalues[#arrayTPvalues+1] = predictionTestVect[i]
if printValues == true then print(" TP ") end
elseif predictionTestVect[i] < threshold and truthVect[i] >= threshold then
fn = fn + 1
if printValues == true then print(" FN ") end
elseif predictionTestVect[i] >= threshold and truthVect[i] < threshold then
fp = fp + 1
if printValues == true then print(" FP ") end
arrayFPindices[#arrayFPindices+1] = i;
arrayFPvalues[#arrayFPvalues+1] = predictionTestVect[i]
elseif predictionTestVect[i] < threshold and truthVect[i] < threshold then
tn = tn + 1
if printValues == true then print(" TN ") end
end
end
print("TOTAL:")
print(" FN = "..comma_value(fn).." / "..comma_value(tonumber(fn+tp)).."\t (truth == 1) & (prediction < threshold)");
print(" TP = "..comma_value(tp).." / "..comma_value(tonumber(fn+tp)).."\t (truth == 1) & (prediction >= threshold)\n");
print(" FP = "..comma_value(fp).." / "..comma_value(tonumber(fp+tn)).."\t (truth == 0) & (prediction >= threshold)");
print(" TN = "..comma_value(tn).." / "..comma_value(tonumber(fp+tn)).."\t (truth == 0) & (prediction < threshold)\n");
local continueLabel = true
if continueLabel then
upperMCC = (tp*tn) - (fp*fn)
innerSquare = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
lowerMCC = math.sqrt(innerSquare)
MatthewsCC = -2
if lowerMCC>0 then MatthewsCC = upperMCC/lowerMCC end
local signedMCC = signedValueFunction(MatthewsCC);
-- print("signedMCC = "..signedMCC);
if MatthewsCC > -2 then print("\n::::\tMatthews correlation coefficient = "..signedMCC.."\t::::\n");
else print("Matthews correlation coefficient = NOT computable"); end
accuracy = (tp + tn)/(tp + tn +fn + fp)
print("accuracy = "..round(accuracy,2).. " = (tp + tn) / (tp + tn +fn + fp) \t \t [worst = 0, best = 1]");
recall = tp / (tp + fn)
print("true positive rate = recall = "..round(recall,2).. " = tp / (tp + fn) \t \t [worst = 0, best = 1]");
specificity = tn / (tn + fp)
print("true negative rate = specificity = "..round(specificity,2).. " = tn / (tn + fp) \t \t [worst = 0, best = 1]");
f1_score = -2
if (tp+fp+fn)>0 then
f1_score = (2*tp) / (2*tp+fp+fn)
print("f1_score = "..round(f1_score,2).." = (2*tp) / (2*tp+fp+fn) \t [worst = 0, best = 1]");
else
print("f1_score CANNOT be computed because (tp+fp+fn)==0")
end
print("\n\nMCC \t\t F1_score \t accuracy \t TP_rate \t TN_rate")
print(round(signedMCC,2).. " \t\t "..round(f1_score,2).." \t\t "..round(accuracy,2).." \t\t "..round(recall,2).." \t\t ".. round(specificity,2).."\n\n")
-- local numberOfPredictedOnes = tp + fp;
-- print("numberOfPredictedOnes = (TP + FP) = "..comma_value(numberOfPredictedOnes).." = "..round(numberOfPredictedOnes*100/(tp + tn + fn + fp),2).."%");
--
-- io.write("\nDiagnosis: ");
-- if (fn >= tp and (fn+tp)>0) then print("too many FN false negatives"); end
-- if (fp >= tn and (fp+tn)>0) then print("too many FP false positives"); end
--
--
-- if (tn > (10*fp) and tp > (10*fn)) then print("Excellent ! ! !");
-- elseif (tn > (5*fp) and tp > (5*fn)) then print("Very good ! !");
-- elseif (tn > (2*fp) and tp > (2*fn)) then print("Good !");
-- elseif (tn >= fp and tp >= fn) then print("Alright");
-- else print("Baaaad"); end
end
return {accuracy, arrayFPindices, arrayFPvalues, MatthewsCC, f1_score};
end
-- Permutations
-- tab = {1,2,3,4,5,6,7,8,9,10}
-- permute(tab, 10, 10)
function permute(tab, n, count)
n = n or #tab
for i = 1, count or n do
local j = math.random(i, n)
tab[i], tab[j] = tab[j], tab[i]
end
return tab
end
-- round a real value
function round(num, idp)
local mult = 10^(idp or 0)
return math.floor(num * mult + 0.5) / mult
end
-- ##############################
local profile_vett = {}
local csv = require("csv")
local fileName = "../data/LungCancerDataset_AllRecords_NORM_reduced_features.csv"
-- tostring(arg[1])
-- cervical_arranged_NORM.csv
-- cervical_arranged_NORM_ONLY_BIOPSY_TARGET.csv
print("Readin' "..tostring(fileName));
local f = csv.open(fileName)
local column_names = {}
local j = 0
for fields in f:lines() do
if j>0 then
profile_vett[j] = {}
for i, v in ipairs(fields) do
profile_vett[j][i] = tonumber(v);
end
j = j + 1
else
for i, v in ipairs(fields) do
column_names[i] = v
end
j = j + 1
end
end
OPTIM_PACKAGE = true
MAX_VALUE = 1
local output_number = 1
THRESHOLD = 0.5 -- ORIGINAL
-- THRESHOLD = 0.1529
XAVIER_INITIALIZATION = false
DROPOUT_FLAG = false
MOMENTUM = false
MOMENTUM_ALPHA = 0.5
LEARN_RATE = 0.01 -- default was 0.01
ITERATIONS = 200 -- default was 200 -- I'M ANALYZING THIS PARAMETER IN THIS ANALYSIS
local hidden_units = 50 -- default was 50
local mcc = "mcc"
local aupr = "aupr"
OPTIMIZE_SCORE = mcc
print("\nOPTIM_PACKAGE = ".. tostring(OPTIM_PACKAGE));
print("XAVIER_INITIALIZATION = ".. tostring(XAVIER_INITIALIZATION));
print("DROPOUT_FLAG = ".. tostring(DROPOUT_FLAG));
print("MOMENTUM_ALPHA = ".. tostring(MOMENTUM_ALPHA));
print("MOMENTUM = ".. tostring(MOMENTUM));
print("LEARN_RATE = ".. tostring(LEARN_RATE));
print("ITERATIONS = ".. tostring(ITERATIONS));
-- local hidden_layers = 1 -- best is 1
local hiddenUnitVect = {5,50,100,150,200,250,300,350,400}
local hiddenLayerVect = {1, 2, 3}
-- local hiddenLayerVect = {1}
local max_values = {}
-- filePointer = io.open("normalized_data_file.csv", "w")
local profile_vett_data = {}
local label_vett = {}
for i=1,#profile_vett do
profile_vett_data[i] = {}
-- io.write("#"..i.."# ")
-- io.flush()
for j=1,#(profile_vett[1]) do
if j<#(profile_vett[1]) then
profile_vett_data[i][j] = (profile_vett[i][j]/MAX_VALUE)
-- io.write("profile_vett_data["..i.."]["..j.."] = "..profile_vett_data[i][j].." ")
-- filePointer:write(round(profile_vett_data[i][j],2)..",")
-- io.flush()
else
label_vett[i] = profile_vett[i][j]
-- filePointer:write(round(label_vett[i],2)..",")
-- io.flush()
end
end
-- filePointer:write("\n")
-- io.flush()
end
print("Number of value profiles (rows) = "..#profile_vett_data);
print("Number features (columns) = "..#(profile_vett_data[1]));
print("Number of targets (rows) = "..#label_vett);
if NORMALIZATION==true and #max_values ~= #(profile_vett_data[1]) then
print("Error: different number of max_values and features. The program will stop");
os.exit();
end
local patient_outcome = label_vett
local patients_vett = profile_vett_data
-- print(patients_vett)
-- filePointer:close()
-- os.exit()
-- ########################################################
-- START
local timeStart = os.time();
local indexVect = {};
for i=1, #patients_vett do indexVect[i] = i; end
permutedIndexVect = permute(indexVect, #indexVect, #indexVect);
-- VALIDATION_SET_PERC = 20
-- TEST_SET_SIZE = 100
--
-- local validation_set_size = round((VALIDATION_SET_PERC*(#patients_vett-TEST_SET_SIZE))/100)
--
-- print("training_set_size = "..((#patients_vett-TEST_SET_SIZE)-validation_set_size).." elements");
-- print("validation_set_size = "..validation_set_size.." elements\n");
--
-- print("TEST_SET_SIZE = "..TEST_SET_SIZE.." elements\n");
print("#patients_vett = "..#patients_vett);
TRAINING_SET_PERC = 60
VALIDATION_SET_PERC = 20
TEST_SET_PERC = 20
local training_set_size = round((TRAINING_SET_PERC*(#patients_vett))/100)
local validation_set_size = round((VALIDATION_SET_PERC*(#patients_vett))/100)
local test_set_size = #patients_vett - validation_set_size - training_set_size
print("\ntraining_set_size = "..training_set_size);
print("validation_set_size = "..validation_set_size);
print("test_set_size = "..test_set_size.."\n");
-- os.exit()
train_patient_profile = {}
validation_patient_profile = {}
test_patient_profile = {}
modelFileVect = {}
local original_validation_indexes = {}
for i=1,#patients_vett do
if i>=1 and i<=(training_set_size) then
train_patient_profile[#train_patient_profile+1] = {torch.Tensor(patients_vett[permutedIndexVect[i]]), torch.Tensor{patient_outcome[permutedIndexVect[i]]}}
--print("training outcome["..#train_patient_profile.."] = "..train_patient_profile[#train_patient_profile][2][1]);
elseif i>= (training_set_size+1) and i <= (training_set_size+validation_set_size) then
original_validation_indexes[#original_validation_indexes+1] = permutedIndexVect[i];
-- print("original_validation_indexes =".. permutedIndexVect[i]);
validation_patient_profile[#validation_patient_profile+1] = {torch.Tensor(patients_vett[permutedIndexVect[i]]), torch.Tensor{patient_outcome[permutedIndexVect[i]]}}
--print("validation outcome["..#validation_patient_profile.."] = "..validation_patient_profile[#validation_patient_profile][2][1]);
else
test_patient_profile[#test_patient_profile+1] = {torch.Tensor(patients_vett[permutedIndexVect[i]]), torch.Tensor{patient_outcome[permutedIndexVect[i]]}}
end
end
require 'nn'
input_number = (#(train_patient_profile[1][1]))[1]
function train_patient_profile:size() return #train_patient_profile end
function validation_patient_profile:size() return #validation_patient_profile end
local printError = false
local fileName = nil
local filePointer = nil
if printError == true then
fileName = "./mse_log/positive_error_progress"..tostring(os.time())..".csv"
filePointer = io.open(fileName, "w")
end
-- OPTIMIZATION LOOPS
local MCC_vect = {}
local f1score_vect = {}
local auroc_vett = {}
local aupr_vett = {}
local hus_vect = {}
local hl_vect = {}
for b=1,#hiddenLayerVect do
for a=1,#hiddenUnitVect do
local hidden_units = hiddenUnitVect[a]
local hidden_layers = hiddenLayerVect[b]
print("$$$ hidden_units = "..hidden_units.."\t hidden_layers = "..hidden_layers.." $$$")
local perceptron = createPerceptron(input_number, hidden_units, hidden_layers, output_number)
local criterion = nn.MSECriterion()
local lossSum = 0
local positiveLossSum = 0
local error_progress = 0
local numberOfOnes = 0
local positiveErrorProgress = 0
if OPTIM_PACKAGE == false then
myTrainer = nn.StochasticGradient(perceptron, criterion)
myTrainer.learningRate = LEARN_RATE
myTrainer.maxIteration = ITERATIONS
myTrainer:train(train_patient_profile)
else
require 'optim'
local params, gradParams = perceptron:getParameters()
local optimState = nil
if MOMENTUM==true then
optimState = {learningRate = LEARN_RATE}
else
optimState = {learningRate = LEARN_RATE,
momentum = MOMENTUM_ALPHA }
end
local total_runs = ITERATIONS*#train_patient_profile
local loopIterations = 1
for epoch=1,ITERATIONS do
for k=1,#train_patient_profile do
-- Function feval
local function feval(params)
gradParams:zero()
local thisProfile = train_patient_profile[k][1]
local thisLabel = train_patient_profile[k][2]
local thisPrediction = perceptron:forward(thisProfile)
-- [-1,+1] -> [0,1]
thisPrediction = (thisPrediction+1)/2
local loss = criterion:forward(thisPrediction, thisLabel)
lossSum = lossSum + loss
error_progress = lossSum*100 / (loopIterations*MAX_MSE)
--print("thisLabel[1] = "..thisLabel[1].." positiveLossSum = "..positiveLossSum.." numberOfOnes = "..numberOfOnes);
if thisLabel[1]==1 then
positiveLossSum = positiveLossSum + loss
numberOfOnes = numberOfOnes + 1
end
if (numberOfOnes > 0 ) then
positiveErrorProgress = positiveLossSum*100 / (numberOfOnes*MAX_MSE)
end
if ((loopIterations*100/total_runs)*25)%100==0 then
io.write("completion: ", round((loopIterations*100/total_runs),2).."%" )
io.write(" (epoch="..epoch..")(element="..k..") loss = "..round(loss,3).." ")
io.write("\terror progress = "..round(error_progress,5).."%\n")
end
if printError== true then
filePointer:write(loopIterations..","..positiveErrorProgress.."\n")
end
local dloss_doutput = criterion:backward(thisPrediction, thisLabel)
perceptron:backward(thisProfile, dloss_doutput)
return loss,gradParams
end
optim.sgd(feval, params, optimState)
loopIterations = loopIterations+1
end
end
end
print("\n\n### executeTest(perceptron, validation_patient_profile)")
local testOutput = executeTest(perceptron, validation_patient_profile)
MCC_vect[#MCC_vect+1] = testOutput[1]
f1score_vect[#f1score_vect+1] = testOutput[3]
auroc_vett[#auroc_vett+1] = testOutput[4]
aupr_vett[#aupr_vett+1] = testOutput[5]
hus_vect[#hus_vect+1] = hidden_units
hl_vect[#hl_vect+1] = hidden_layers
local modelFile = "./models/model_hus"..hidden_units.."_hl"..hidden_layers.."_time"..tostring(os.time());
torch.save(tostring(modelFile), perceptron);
print("Saved model file: "..tostring(modelFile).."\n");
modelFileVect[#modelFileVect+1] = modelFile;
end
end
local maxMCC = -1
local maxMCCpos = -1
local max_aupr = -1
local max_aupr_pos = -1
for k=1,#MCC_vect do
io.write("@ @ ["..k.."] ")
io.write("\tAUPR = "..round(aupr_vett[k],2).."% ")
io.write("\tMCC = "..round(MCC_vect[k],2))
io.write("\tF1_score = "..round(f1score_vect[k],2))
io.write("\tAUROC = "..round(auroc_vett[k],2).."% ")
io.write("\thidden units = "..hus_vect[k].." ")
io.write("\thidden layers = "..hl_vect[k].." ")
io.write(" @ @ \n")
io.flush()
if MCC_vect[k]>=maxMCC then
maxMCC = MCC_vect[k]
maxMCCpos = k
end
if aupr_vett[k]>=max_aupr then
max_aupr = aupr_vett[k]
max_aupr_pos = k
end
end
-- CHOOSING THE MODEL BY OPTIMZING THE MCC OR AUPR
local modelFileToLoad = nil
if OPTIMIZE_SCORE == mcc then
modelFileToLoad = tostring(modelFileVect[maxMCCpos])
print("\nmodelFileVect["..maxMCCpos.."]\nmodelFileToLoad ="..modelFileToLoad)
elseif OPTIMIZE_SCORE == aupr then
modelFileToLoad = tostring(modelFileVect[max_aupr_pos])
print("\nmodelFileVect["..max_aupr_pos.."]\nmodelFileToLoad ="..modelFileToLoad)
end
local loadedModel = torch.load(modelFileToLoad)
print("\n\n### executeTest(loadedModel, test_patient_profile)")
local executeTestOutput = executeTest(loadedModel, test_patient_profile)
local lastMCC = executeTestOutput[1]
local lastAccuracy = executeTestOutput[2]
local lastF1score = executeTestOutput[3]
print("':':':':' lastMCC = "..round(lastMCC,2).." lastF1score = "..round(lastF1score,2).." ':':':':'")
for i=1,#modelFileVect do
local command = "rm "..tostring(modelFileVect[i])
io.write("command: "..command.." \n")
local res = sys.execute(command)
-- print("command response: "..res)
end
if printError == true then
filePointer:close()
end
printTime(timeStart, " complete execution")