Diff of /bin/cart.r [000000] .. [868c5d]

Switch to unified view

a b/bin/cart.r
1
setwd(".")
2
options(stringsAsFactors = FALSE)
3
4
list.of.packages <- c("PRROC", "e1071", "clusterSim","rpart")
5
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
6
if(length(new.packages)) install.packages(new.packages)
7
8
library("clusterSim")
9
library("PRROC")
10
library("e1071")
11
library("rpart")
12
13
source("./confusion_matrix_rates.r")
14
source("./utils.r")
15
16
17
18
threshold <- 0.5
19
20
fileName <- "../data/LungCancerDataset_AllRecords_NORM_27reduced_features.csv"
21
mesothelioma_datatable <- read.csv(fileName, header = TRUE, sep =",");
22
23
target_index <- dim(mesothelioma_datatable)[2]
24
25
cat("fileName: ", fileName, "\n", sep="")
26
27
original_mesothelioma_datatable <- mesothelioma_datatable
28
29
# shuffle the rows
30
mesothelioma_datatable <- original_mesothelioma_datatable[sample(nrow(original_mesothelioma_datatable)),] 
31
32
# Allocation of the size of the training set
33
perce_training_set <- 80
34
size_training_set <- round(dim(mesothelioma_datatable)[1]*(perce_training_set/100))
35
36
cat("perce_training_set = ",perce_training_set,"%", sep="")
37
38
# Allocation of the training set and of the test set
39
training_set <- (mesothelioma_datatable[1:size_training_set,])
40
test_set_index_start <- size_training_set+1
41
test_set_index_end <- dim(mesothelioma_datatable)[1]
42
test_set  <- mesothelioma_datatable[test_set_index_start:test_set_index_end,]
43
44
test_labels <- mesothelioma_datatable[test_set_index_start:test_set_index_end, target_index]   # NEW
45
46
47
print("dim(training_set)")
48
print(dim(training_set))
49
50
print("dim(test_set)")
51
print(dim(test_set))
52
53
54
# Generation of the CART model
55
# cart_model <- rpart(class.of.diagnosis ~ keep.side + platelet.count..PLT., method="class", data=training_set);
56
cart_model <- rpart(Metastasis ~ ., method="class", data=training_set);
57
58
pred_test_predictions <- as.numeric(predict(cart_model, test_set, typ="class"))-1
59
pred_test_set_labels <- as.numeric(test_set$Metastasis)
60
61
prc_data_test_PRED_binary <- as.numeric(pred_test_predictions)
62
63
prc_data_test_PRED_binary[prc_data_test_PRED_binary>=threshold]=1
64
prc_data_test_PRED_binary[prc_data_test_PRED_binary<threshold]=0
65
# mcc_outcome <- mcc(pred_test_set_labels, prc_data_test_PRED_binary)
66
# confusion_matrix_rates(pred_test_set_labels, prc_data_test_PRED_binary)
67
68
confusion_matrix_rates(test_labels, pred_test_predictions, "@@@ Test set @@@")
69