Switch to unified view

a b/partyMod/tests/Splits-regtest.R
1
2
set.seed(290875)
3
library("party")
4
library("coin")
5
6
"hohnloser" <-
7
structure(list(EF = as.integer(c(11, 11, 12, 13, 13, 13, 15,
8
17, 20, 20, 20, 20, 20, 21, 22, 22, 22, 22, 23, 24, 24, 24, 24,
9
24, 24, 24, 25, 25, 26, 26, 26, 27, 28, 30, 30, 31, 31, 32, 33,
10
33, 33, 33, 34, 34, 34, 34, 36, 37, 38, 38, 38, 39, 40, 41, 41,
11
41, 43, 43, 43, 44, 44, 49, 50, 51, 51, 51, 52, 52, 52, 56, 56,
12
56, 57, 57, 58, 58, 58, 59, 60, 60, 61, 64, 64, 64, 64, 65, 70,
13
70, 72, 75, 77, 77, 80, 93)), month = as.integer(c(1, 5, 14,
14
2, 10, 39, 16, 17, 1, 1, 1, 8, 29, 22, 1, 3, 11, 15, 13, 1, 1,
15
3, 5, 7, 11, 33, 3, 16, 1, 13, 23, 20, 12, 1, 1, 18, 20, 23,
16
9, 12, 17, 21, 1, 5, 14, 38, 6, 1, 3, 12, 18, 8, 19, 3, 10, 15,
17
19, 31, 33, 23, 24, 5, 13, 4, 21, 28, 3, 16, 37, 1, 3, 33, 23,
18
29, 5, 9, 36, 19, 1, 10, 7, 1, 6, 7, 14, 6, 5, 23, 36, 30, 10,
19
20, 7, 22)), cens = as.integer(c(0, 1, 0, 1, 0, 0, 1, 0, 1, 1,
20
1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
21
0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
22
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
23
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
24
))), .Names = c("EF", "month", "cens"), class = "data.frame", row.names =
25
c("1",
26
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
27
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
28
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
29
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
30
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
31
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
32
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
33
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
34
"91", "92", "93", "94"))
35
36
37
### get rid of the NAMESPACE
38
attach(asNamespace("party"))
39
40
### 
41
###
42
###    Regression tests for cutpoint search
43
###    
44
###    functions defined in file `./src/Splits.c'    
45
46
### tests for function C_Split
47
x <- rnorm(100)
48
y <- rnorm(100)
49
weights <- rep(1, length(x))
50
splitctrl <- new("SplitControl")
51
split <- Split(x, y, weights, splitctrl)
52
mydata <- data.frame(y, x)
53
ms <- show(maxstat_test(y ~ x, data = mydata, distribution = approximate(10)))
54
stopifnot(isequal(split[[1]], ms$estimate[[1]]))
55
stopifnot(isequal(split[[2]], ms$statistic))
56
stopifnot(isequal(max(split[[3]]), ms$statistic))
57
58
### Hohnloser data
59
ms <-  show(maxstat_test(Surv(month, cens) ~ EF, data = hohnloser,
60
distribution = approximate(10)))
61
splitctrl <- new("SplitControl")
62
splitctrl@minprob <- 0.1
63
splitctrl@minsplit <- as.integer(5)
64
65
split <- Split(hohnloser$EF, logrank_trafo(Surv(hohnloser$month, hohnloser$cens)),
66
               rep(1, nrow(hohnloser)), splitctrl)
67
stopifnot(isequal(split[[1]], ms$estimate[[1]]))
68
stopifnot(isequal(split[[2]], ms$statistic))
69
stopifnot(isequal(max(split[[3]]), ms$statistic))
70
71
### categorical splits
72
n <- 100
73
xf <- gl(5, 100/5)
74
yf <- gl(4, 100/4)[sample(1:length(xf))]
75
weights <- rep(1, length(xf))
76
splitctrl <- new("SplitControl")
77
splitctrl@minprob <- 0.1
78
splitctrl@minsplit <- as.integer(5)
79
split <- Split(xf, yf, weights, splitctrl)
80
split
81
82
### Check if the statistic used for selecting the split is
83
### correct: For the ranks of a continuous response the statistic
84
### needs to be equal to the standardized Wilcoxon statistic
85
86
y <- rnorm(100) + c(rep(0, 25), rep(1, 25), rep(0, 25), rep(1, 25))
87
x <- gl(4, 25)
88
weights <- rep(1, length(y))
89
split <- Split(x, rank(y), weights, splitctrl)
90
levelset <- levels(x)[split[[4]] == 1]
91
tstat <- split[[2]]
92
p <- wilcox.test(y ~ I(x %in% levelset),corr = FALSE,
93
                alternative = "less")$p.value
94
stopifnot(isequal(round(abs(qnorm(p)), 6), round(tstat, 6)))
95
96
y <- rnorm(100) + c(rep(0, 25), rep(1, 25), rep(0, 25), rep(1, 25))
97
x <- rnorm(100)
98
weights <- rep(1, length(y))
99
split <- Split(x, rank(y), weights, splitctrl)
100
tstat <- split[[2]]
101
p <- wilcox.test(y ~ I(x <= split[[1]]), corr = FALSE,
102
                alternative = "less")$p.value
103
stopifnot(isequal(round(abs(qnorm(p)), 6), round(tstat, 6)))