|
a |
|
b/partyMod/R/crossvalidation.R |
|
|
1 |
|
|
|
2 |
# library("mboost") |
|
|
3 |
if (FALSE) { |
|
|
4 |
cvrisk2 <- function(object, folds = cv(object@weights), |
|
|
5 |
mincriterion = seq(from = 0.6, to = 0.95, by = 0.05), |
|
|
6 |
papply = if (require("multicore")) mclapply else lapply, |
|
|
7 |
...){ |
|
|
8 |
|
|
|
9 |
weights <- object@weights |
|
|
10 |
|
|
|
11 |
if (any(weights == 0)) |
|
|
12 |
warning("zero weights") |
|
|
13 |
if (is.null(folds)) { |
|
|
14 |
folds <- rmultinom(25, length(weights), weights/sum(weights)) |
|
|
15 |
} else { |
|
|
16 |
stopifnot(is.matrix(folds) && nrow(folds) == length(weights)) |
|
|
17 |
} |
|
|
18 |
fitfct <- object@update |
|
|
19 |
oobrisk <- matrix(0, nrow = ncol(folds), ncol = length(grid)) |
|
|
20 |
|
|
|
21 |
dummyfct <- function(weights) { |
|
|
22 |
mod <- fitfct(weights = weights) |
|
|
23 |
sapply(mincriterion, function(m) { |
|
|
24 |
p <- predict(mod, mincriterion = m) |
|
|
25 |
if (is.factor(p)) |
|
|
26 |
err <- p != mod@responses@variables[[1]] |
|
|
27 |
if (is.numeric(p)) |
|
|
28 |
err <- (p - mod@responses@variables[[1]])^2 |
|
|
29 |
mean(err[weights == 0]) |
|
|
30 |
}) |
|
|
31 |
} |
|
|
32 |
|
|
|
33 |
OOBweights <- matrix(rep(weights, ncol(folds)), ncol = ncol(folds)) |
|
|
34 |
OOBweights[folds > 0] <- 0 |
|
|
35 |
oobrisk <- papply(1:ncol(folds), |
|
|
36 |
function(i) dummyfct(weights = folds[, i])) |
|
|
37 |
|
|
|
38 |
oobrisk <- t(as.data.frame(oobrisk)) |
|
|
39 |
oobrisk <- oobrisk / colSums(OOBweights) |
|
|
40 |
colnames(oobrisk) <- mincriterion |
|
|
41 |
rownames(oobrisk) <- 1:nrow(oobrisk) |
|
|
42 |
oobrisk |
|
|
43 |
} |
|
|
44 |
|
|
|
45 |
set.seed(290875) |
|
|
46 |
|
|
|
47 |
### regression |
|
|
48 |
airq <- subset(airquality, !is.na(Ozone)) |
|
|
49 |
airct <- ctree(Ozone ~ ., data = airq, |
|
|
50 |
controls = ctree_control(maxsurrogate = 3, mincriterion = 0.5)) |
|
|
51 |
### bootstrap |
|
|
52 |
cvm <- cvrisk2(airct, mincriterion = 50:95/100) |
|
|
53 |
boxplot(cvm) |
|
|
54 |
|
|
|
55 |
### 10-fold CV |
|
|
56 |
cvm <- cvrisk2(airct, mincriterion = 50:95/100, cv(airct@weights, type = "kfold")) |
|
|
57 |
boxplot(cvm) |
|
|
58 |
|
|
|
59 |
} |
|
|
60 |
|