a b/feature_selection/boruta.R
1
library(Boruta)
2
library(ranger)
3
4
mydata <- read.csv("./ki-67.csv",header=T)
5
mydata <- mydata[complete.cases(mydata),]
6
predictors <- data.frame(mydata[1:83])
7
decision <- data.frame(mydata[,84])
8
9
mydata <- data.frame(predictors[1:83], decision = factor(decision[, 1]))
10
11
set.seed(1)
12
Boruta.mydata <- Boruta(decision ~., data = mydata,doTrace = 2, maxRuns=200, ntree = 500)
13
plot(Boruta.mydata)
14
plotImpHistory(Boruta.mydata)
15
plot(Boruta.mydata, xlab = "", xaxt = "n")
16
lz<-lapply(1:ncol(Boruta.mydata$ImpHistory),function(i)
17
  Boruta.mydata$ImpHistory[is.finite(Boruta.mydata$ImpHistory[,i]),i])
18
names(lz) <- colnames(Boruta.mydata$ImpHistory)  
19
Labels <- sort(sapply(lz,median))
20
axis(side = 1,las=2,labels = names(Labels), 
21
     at = 1:ncol(Boruta.mydata$ImpHistory), cex.axis = 0.7)
22
23
final.boruta <- TentativeRoughFix(Boruta.mydata)
24
print(final.boruta)
25
getSelectedAttributes(final.boruta, withTentative = F)
26
boruta.df <-  attStats(final.boruta)
27
boruta.df