|
a |
|
b/bin/random_forest.r |
|
|
1 |
|
|
|
2 |
|
|
|
3 |
library("randomForest"); |
|
|
4 |
|
|
|
5 |
fileName <- "../data/LungCancerDataset_AllRecords_NORM_reduced_features.csv" |
|
|
6 |
lung_cancer_datatable <- read.csv(fileName, header = TRUE, sep =",", stringsAsFactors = FALSE); |
|
|
7 |
|
|
|
8 |
# lung_cancer_datatable$"Typeoffollow.upexpected" <- NULL |
|
|
9 |
# lung_cancer_datatable$"SiterecwithKaposiandmesothelioma" <- NULL |
|
|
10 |
# |
|
|
11 |
# lung_cancer_datatable$Metastasis <- lung_cancer_datatable$M |
|
|
12 |
# lung_cancer_datatable$M <- NULL |
|
|
13 |
|
|
|
14 |
rf_output <- randomForest(Metastasis ~ ., data=lung_cancer_datatable, importance=TRUE, proximity=TRUE) |
|
|
15 |
|
|
|
16 |
dd <- as.data.frame(rf_output$importance); |
|
|
17 |
dd_sorted <- dd[order(dd$"%IncMSE"), ] |
|
|
18 |
|
|
|
19 |
print(dd_sorted); |
|
|
20 |
|
|
|
21 |
varImpPlot(rf_output) |
|
|
22 |
|