Switch to unified view

a b/partyMod/tests/Splits-regtest.Rout.save
1
2
R Under development (unstable) (2014-06-29 r66051) -- "Unsuffered Consequences"
3
Copyright (C) 2014 The R Foundation for Statistical Computing
4
Platform: x86_64-unknown-linux-gnu (64-bit)
5
6
R is free software and comes with ABSOLUTELY NO WARRANTY.
7
You are welcome to redistribute it under certain conditions.
8
Type 'license()' or 'licence()' for distribution details.
9
10
R is a collaborative project with many contributors.
11
Type 'contributors()' for more information and
12
'citation()' on how to cite R or R packages in publications.
13
14
Type 'demo()' for some demos, 'help()' for on-line help, or
15
'help.start()' for an HTML browser interface to help.
16
Type 'q()' to quit R.
17
18
> 
19
> set.seed(290875)
20
> library("party")
21
Loading required package: grid
22
Loading required package: zoo
23
24
Attaching package: 'zoo'
25
26
The following objects are masked from 'package:base':
27
28
    as.Date, as.Date.numeric
29
30
Loading required package: sandwich
31
Loading required package: strucchange
32
Loading required package: modeltools
33
Loading required package: stats4
34
> library("coin")
35
Loading required package: survival
36
Loading required package: splines
37
> 
38
> "hohnloser" <-
39
+ structure(list(EF = as.integer(c(11, 11, 12, 13, 13, 13, 15,
40
+ 17, 20, 20, 20, 20, 20, 21, 22, 22, 22, 22, 23, 24, 24, 24, 24,
41
+ 24, 24, 24, 25, 25, 26, 26, 26, 27, 28, 30, 30, 31, 31, 32, 33,
42
+ 33, 33, 33, 34, 34, 34, 34, 36, 37, 38, 38, 38, 39, 40, 41, 41,
43
+ 41, 43, 43, 43, 44, 44, 49, 50, 51, 51, 51, 52, 52, 52, 56, 56,
44
+ 56, 57, 57, 58, 58, 58, 59, 60, 60, 61, 64, 64, 64, 64, 65, 70,
45
+ 70, 72, 75, 77, 77, 80, 93)), month = as.integer(c(1, 5, 14,
46
+ 2, 10, 39, 16, 17, 1, 1, 1, 8, 29, 22, 1, 3, 11, 15, 13, 1, 1,
47
+ 3, 5, 7, 11, 33, 3, 16, 1, 13, 23, 20, 12, 1, 1, 18, 20, 23,
48
+ 9, 12, 17, 21, 1, 5, 14, 38, 6, 1, 3, 12, 18, 8, 19, 3, 10, 15,
49
+ 19, 31, 33, 23, 24, 5, 13, 4, 21, 28, 3, 16, 37, 1, 3, 33, 23,
50
+ 29, 5, 9, 36, 19, 1, 10, 7, 1, 6, 7, 14, 6, 5, 23, 36, 30, 10,
51
+ 20, 7, 22)), cens = as.integer(c(0, 1, 0, 1, 0, 0, 1, 0, 1, 1,
52
+ 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
53
+ 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
54
+ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
55
+ 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
56
+ ))), .Names = c("EF", "month", "cens"), class = "data.frame", row.names =
57
+ c("1",
58
+ "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
59
+ "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
60
+ "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
61
+ "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
62
+ "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
63
+ "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
64
+ "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
65
+ "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
66
+ "91", "92", "93", "94"))
67
> 
68
> 
69
> ### get rid of the NAMESPACE
70
> attach(asNamespace("party"))
71
The following objects are masked from package:party:
72
73
    cforest, cforest_classical, cforest_control, cforest_unbiased,
74
    conditionalTree, ctree, ctree_control, ctree_memory, edge_simple,
75
    mob, mob_control, node_barplot, node_bivplot, node_boxplot,
76
    node_density, node_hist, node_inner, node_scatterplot, node_surv,
77
    node_terminal, proximity, ptrafo, reweight, sctest.mob, varimp,
78
    varimpAUC
79
80
> 
81
> ### 
82
> ###
83
> ###    Regression tests for cutpoint search
84
> ###    
85
> ###    functions defined in file `./src/Splits.c'    
86
> 
87
> ### tests for function C_Split
88
> x <- rnorm(100)
89
> y <- rnorm(100)
90
> weights <- rep(1, length(x))
91
> splitctrl <- new("SplitControl")
92
> split <- Split(x, y, weights, splitctrl)
93
> mydata <- data.frame(y, x)
94
> ms <- show(maxstat_test(y ~ x, data = mydata, distribution = approximate(10)))
95
96
    Approximative Maxstat Test
97
98
data:  y by x
99
maxT = 1.1008, p-value = 0.9
100
sample estimates:
101
$cutpoint
102
[1] 1.337766
103
104
105
> stopifnot(isequal(split[[1]], ms$estimate[[1]]))
106
> stopifnot(isequal(split[[2]], ms$statistic))
107
> stopifnot(isequal(max(split[[3]]), ms$statistic))
108
> 
109
> ### Hohnloser data
110
> ms <-  show(maxstat_test(Surv(month, cens) ~ EF, data = hohnloser,
111
+ distribution = approximate(10)))
112
113
    Approximative Maxstat Test
114
115
data:  Surv(month, cens) by EF
116
maxT = 3.5647, p-value = 0.1
117
sample estimates:
118
$cutpoint
119
[1] 39
120
121
122
> splitctrl <- new("SplitControl")
123
> splitctrl@minprob <- 0.1
124
> splitctrl@minsplit <- as.integer(5)
125
> 
126
> split <- Split(hohnloser$EF, logrank_trafo(Surv(hohnloser$month, hohnloser$cens)),
127
+                rep(1, nrow(hohnloser)), splitctrl)
128
> stopifnot(isequal(split[[1]], ms$estimate[[1]]))
129
> stopifnot(isequal(split[[2]], ms$statistic))
130
> stopifnot(isequal(max(split[[3]]), ms$statistic))
131
> 
132
> ### categorical splits
133
> n <- 100
134
> xf <- gl(5, 100/5)
135
> yf <- gl(4, 100/4)[sample(1:length(xf))]
136
> weights <- rep(1, length(xf))
137
> splitctrl <- new("SplitControl")
138
> splitctrl@minprob <- 0.1
139
> splitctrl@minsplit <- as.integer(5)
140
> split <- Split(xf, yf, weights, splitctrl)
141
> split
142
[[1]]
143
[1] 1
144
145
[[2]]
146
[1] 4.021194
147
148
[[3]]
149
  [1] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
150
  [9] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
151
 [17] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
152
 [25] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
153
 [33] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.876166 0.000000
154
 [41] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
155
 [49] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
156
 [57] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
157
 [65] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
158
 [73] 0.000000 0.000000 0.000000 0.000000 0.000000 4.021194 0.000000 0.000000
159
 [81] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
160
 [89] 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
161
 [97] 0.000000 0.000000 2.297825 0.000000
162
163
[[4]]
164
[1] 1 1 1 0 1
165
166
> 
167
> ### Check if the statistic used for selecting the split is
168
> ### correct: For the ranks of a continuous response the statistic
169
> ### needs to be equal to the standardized Wilcoxon statistic
170
> 
171
> y <- rnorm(100) + c(rep(0, 25), rep(1, 25), rep(0, 25), rep(1, 25))
172
> x <- gl(4, 25)
173
> weights <- rep(1, length(y))
174
> split <- Split(x, rank(y), weights, splitctrl)
175
> levelset <- levels(x)[split[[4]] == 1]
176
> tstat <- split[[2]]
177
> p <- wilcox.test(y ~ I(x %in% levelset),corr = FALSE,
178
+                 alternative = "less")$p.value
179
> stopifnot(isequal(round(abs(qnorm(p)), 6), round(tstat, 6)))
180
> 
181
> y <- rnorm(100) + c(rep(0, 25), rep(1, 25), rep(0, 25), rep(1, 25))
182
> x <- rnorm(100)
183
> weights <- rep(1, length(y))
184
> split <- Split(x, rank(y), weights, splitctrl)
185
> tstat <- split[[2]]
186
> p <- wilcox.test(y ~ I(x <= split[[1]]), corr = FALSE,
187
+                 alternative = "less")$p.value
188
> stopifnot(isequal(round(abs(qnorm(p)), 6), round(tstat, 6)))
189
> 
190
> proc.time()
191
   user  system elapsed 
192
  0.708   0.056   0.765