Diff of /bin/cart.r [000000] .. [868c5d]

Switch to side-by-side view

--- a
+++ b/bin/cart.r
@@ -0,0 +1,69 @@
+setwd(".")
+options(stringsAsFactors = FALSE)
+
+list.of.packages <- c("PRROC", "e1071", "clusterSim","rpart")
+new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
+if(length(new.packages)) install.packages(new.packages)
+
+library("clusterSim")
+library("PRROC")
+library("e1071")
+library("rpart")
+
+source("./confusion_matrix_rates.r")
+source("./utils.r")
+
+
+
+threshold <- 0.5
+
+fileName <- "../data/LungCancerDataset_AllRecords_NORM_27reduced_features.csv"
+mesothelioma_datatable <- read.csv(fileName, header = TRUE, sep =",");
+
+target_index <- dim(mesothelioma_datatable)[2]
+
+cat("fileName: ", fileName, "\n", sep="")
+
+original_mesothelioma_datatable <- mesothelioma_datatable
+
+# shuffle the rows
+mesothelioma_datatable <- original_mesothelioma_datatable[sample(nrow(original_mesothelioma_datatable)),] 
+
+# Allocation of the size of the training set
+perce_training_set <- 80
+size_training_set <- round(dim(mesothelioma_datatable)[1]*(perce_training_set/100))
+
+cat("perce_training_set = ",perce_training_set,"%", sep="")
+
+# Allocation of the training set and of the test set
+training_set <- (mesothelioma_datatable[1:size_training_set,])
+test_set_index_start <- size_training_set+1
+test_set_index_end <- dim(mesothelioma_datatable)[1]
+test_set  <- mesothelioma_datatable[test_set_index_start:test_set_index_end,]
+
+test_labels <- mesothelioma_datatable[test_set_index_start:test_set_index_end, target_index]   # NEW
+
+
+print("dim(training_set)")
+print(dim(training_set))
+
+print("dim(test_set)")
+print(dim(test_set))
+
+
+# Generation of the CART model
+# cart_model <- rpart(class.of.diagnosis ~ keep.side + platelet.count..PLT., method="class", data=training_set);
+cart_model <- rpart(Metastasis ~ ., method="class", data=training_set);
+
+pred_test_predictions <- as.numeric(predict(cart_model, test_set, typ="class"))-1
+pred_test_set_labels <- as.numeric(test_set$Metastasis)
+
+prc_data_test_PRED_binary <- as.numeric(pred_test_predictions)
+
+prc_data_test_PRED_binary[prc_data_test_PRED_binary>=threshold]=1
+prc_data_test_PRED_binary[prc_data_test_PRED_binary<threshold]=0
+# mcc_outcome <- mcc(pred_test_set_labels, prc_data_test_PRED_binary)
+# confusion_matrix_rates(pred_test_set_labels, prc_data_test_PRED_binary)
+
+confusion_matrix_rates(test_labels, pred_test_predictions, "@@@ Test set @@@")
+