|
a |
|
b/partyMod/tests/IndependenceTest-regtest.Rout.save |
|
|
1 |
|
|
|
2 |
R Under development (unstable) (2014-06-29 r66051) -- "Unsuffered Consequences" |
|
|
3 |
Copyright (C) 2014 The R Foundation for Statistical Computing |
|
|
4 |
Platform: x86_64-unknown-linux-gnu (64-bit) |
|
|
5 |
|
|
|
6 |
R is free software and comes with ABSOLUTELY NO WARRANTY. |
|
|
7 |
You are welcome to redistribute it under certain conditions. |
|
|
8 |
Type 'license()' or 'licence()' for distribution details. |
|
|
9 |
|
|
|
10 |
R is a collaborative project with many contributors. |
|
|
11 |
Type 'contributors()' for more information and |
|
|
12 |
'citation()' on how to cite R or R packages in publications. |
|
|
13 |
|
|
|
14 |
Type 'demo()' for some demos, 'help()' for on-line help, or |
|
|
15 |
'help.start()' for an HTML browser interface to help. |
|
|
16 |
Type 'q()' to quit R. |
|
|
17 |
|
|
|
18 |
> |
|
|
19 |
> set.seed(290875) |
|
|
20 |
> library("party") |
|
|
21 |
Loading required package: grid |
|
|
22 |
Loading required package: zoo |
|
|
23 |
|
|
|
24 |
Attaching package: 'zoo' |
|
|
25 |
|
|
|
26 |
The following objects are masked from 'package:base': |
|
|
27 |
|
|
|
28 |
as.Date, as.Date.numeric |
|
|
29 |
|
|
|
30 |
Loading required package: sandwich |
|
|
31 |
Loading required package: strucchange |
|
|
32 |
Loading required package: modeltools |
|
|
33 |
Loading required package: stats4 |
|
|
34 |
> |
|
|
35 |
> ### get rid of the NAMESPACE |
|
|
36 |
> attach(asNamespace("party")) |
|
|
37 |
The following objects are masked from package:party: |
|
|
38 |
|
|
|
39 |
cforest, cforest_classical, cforest_control, cforest_unbiased, |
|
|
40 |
conditionalTree, ctree, ctree_control, ctree_memory, edge_simple, |
|
|
41 |
mob, mob_control, node_barplot, node_bivplot, node_boxplot, |
|
|
42 |
node_density, node_hist, node_inner, node_scatterplot, node_surv, |
|
|
43 |
node_terminal, proximity, ptrafo, reweight, sctest.mob, varimp, |
|
|
44 |
varimpAUC |
|
|
45 |
|
|
|
46 |
> |
|
|
47 |
> ### |
|
|
48 |
> ### |
|
|
49 |
> ### Regression tests for independence tests |
|
|
50 |
> ### |
|
|
51 |
> ### functions defined in file `./src/IndependenceTest.c' |
|
|
52 |
> |
|
|
53 |
> ### tests for function C_IndependenceTest |
|
|
54 |
> xval <- rnorm(60) |
|
|
55 |
> x <- matrix(rank(xval), ncol = 1) |
|
|
56 |
> yf <- gl(2, 30) |
|
|
57 |
> y <- sapply(levels(yf), function(l) as.numeric(yf == l))[,1,drop = FALSE] |
|
|
58 |
> weights <- rep(1, nrow(x)) |
|
|
59 |
> |
|
|
60 |
> varctrl <- new("VariableControl") |
|
|
61 |
> lec <- new("LinStatExpectCovar", ncol(x), ncol(y)) |
|
|
62 |
> |
|
|
63 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
64 |
+ lec, varctrl, PACKAGE = "party") |
|
|
65 |
> print(res) |
|
|
66 |
[1] 0.3991795 0.6897610 |
|
|
67 |
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
68 |
> print(wmw) |
|
|
69 |
|
|
|
70 |
Wilcoxon rank sum test |
|
|
71 |
|
|
|
72 |
data: xval by yf |
|
|
73 |
W = 477, p-value = 0.6898 |
|
|
74 |
alternative hypothesis: true location shift is not equal to 0 |
|
|
75 |
|
|
|
76 |
> stopifnot(isequal(res[2], wmw$p.value)) |
|
|
77 |
> |
|
|
78 |
> xval <- rnorm(60) |
|
|
79 |
> x <- matrix(rank(xval), ncol = 1) |
|
|
80 |
> yf <- gl(3, 20) |
|
|
81 |
> y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
82 |
> weights <- rep(1, nrow(x)) |
|
|
83 |
> |
|
|
84 |
> varctrl <- new("VariableControl") |
|
|
85 |
> varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
86 |
> print(varctrl) |
|
|
87 |
An object of class "VariableControl" |
|
|
88 |
Slot "teststat": |
|
|
89 |
[1] quad |
|
|
90 |
Levels: max quad |
|
|
91 |
|
|
|
92 |
Slot "pvalue": |
|
|
93 |
[1] TRUE |
|
|
94 |
|
|
|
95 |
Slot "tol": |
|
|
96 |
[1] 1e-10 |
|
|
97 |
|
|
|
98 |
Slot "maxpts": |
|
|
99 |
[1] 25000 |
|
|
100 |
|
|
|
101 |
Slot "abseps": |
|
|
102 |
[1] 1e-04 |
|
|
103 |
|
|
|
104 |
Slot "releps": |
|
|
105 |
[1] 0 |
|
|
106 |
|
|
|
107 |
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
108 |
> lec@rank <- 0 |
|
|
109 |
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
110 |
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
111 |
> |
|
|
112 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
113 |
+ lec, varctrl, PACKAGE = "party") |
|
|
114 |
> print(res) |
|
|
115 |
[1] 0.6219672 0.7327259 |
|
|
116 |
> kw <- kruskal.test(xval ~ yf) |
|
|
117 |
> print(kw) |
|
|
118 |
|
|
|
119 |
Kruskal-Wallis rank sum test |
|
|
120 |
|
|
|
121 |
data: xval by yf |
|
|
122 |
Kruskal-Wallis chi-squared = 0.622, df = 2, p-value = 0.7327 |
|
|
123 |
|
|
|
124 |
> stopifnot(isequal(res[2], kw$p.value)) |
|
|
125 |
> stopifnot(isequal(lec@rank, kw$parameter)) |
|
|
126 |
> |
|
|
127 |
> tmp <- x |
|
|
128 |
> x <- y |
|
|
129 |
> y <- tmp |
|
|
130 |
> varctrl <- new("VariableControl") |
|
|
131 |
> varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
132 |
> print(varctrl) |
|
|
133 |
An object of class "VariableControl" |
|
|
134 |
Slot "teststat": |
|
|
135 |
[1] quad |
|
|
136 |
Levels: max quad |
|
|
137 |
|
|
|
138 |
Slot "pvalue": |
|
|
139 |
[1] TRUE |
|
|
140 |
|
|
|
141 |
Slot "tol": |
|
|
142 |
[1] 1e-10 |
|
|
143 |
|
|
|
144 |
Slot "maxpts": |
|
|
145 |
[1] 25000 |
|
|
146 |
|
|
|
147 |
Slot "abseps": |
|
|
148 |
[1] 1e-04 |
|
|
149 |
|
|
|
150 |
Slot "releps": |
|
|
151 |
[1] 0 |
|
|
152 |
|
|
|
153 |
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
154 |
> lec@rank <- 0 |
|
|
155 |
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
156 |
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
157 |
> |
|
|
158 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
159 |
+ lec, varctrl, PACKAGE = "party") |
|
|
160 |
> print(res) |
|
|
161 |
[1] 0.6219672 0.7327259 |
|
|
162 |
> kw <- kruskal.test(xval ~ yf) |
|
|
163 |
> print(kw) |
|
|
164 |
|
|
|
165 |
Kruskal-Wallis rank sum test |
|
|
166 |
|
|
|
167 |
data: xval by yf |
|
|
168 |
Kruskal-Wallis chi-squared = 0.622, df = 2, p-value = 0.7327 |
|
|
169 |
|
|
|
170 |
> stopifnot(isequal(res[2], kw$p.value)) |
|
|
171 |
> |
|
|
172 |
> xf <- gl(3, 2000) |
|
|
173 |
> x <- sapply(levels(xf), function(l) as.numeric(xf == l)) |
|
|
174 |
> yf <- gl(3, 2000)[sample(1:6000)] |
|
|
175 |
> y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
176 |
> weights <- rep(1, nrow(x)) |
|
|
177 |
> |
|
|
178 |
> varctrl <- new("VariableControl") |
|
|
179 |
> varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
180 |
> print(varctrl) |
|
|
181 |
An object of class "VariableControl" |
|
|
182 |
Slot "teststat": |
|
|
183 |
[1] quad |
|
|
184 |
Levels: max quad |
|
|
185 |
|
|
|
186 |
Slot "pvalue": |
|
|
187 |
[1] TRUE |
|
|
188 |
|
|
|
189 |
Slot "tol": |
|
|
190 |
[1] 1e-10 |
|
|
191 |
|
|
|
192 |
Slot "maxpts": |
|
|
193 |
[1] 25000 |
|
|
194 |
|
|
|
195 |
Slot "abseps": |
|
|
196 |
[1] 1e-04 |
|
|
197 |
|
|
|
198 |
Slot "releps": |
|
|
199 |
[1] 0 |
|
|
200 |
|
|
|
201 |
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
202 |
> lec@rank <- 0 |
|
|
203 |
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
204 |
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
205 |
> |
|
|
206 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
207 |
+ lec, varctrl, PACKAGE = "party") |
|
|
208 |
> print(res) |
|
|
209 |
[1] 1.334778 0.855448 |
|
|
210 |
> chis <- chisq.test(table(xf, yf), correct = FALSE) |
|
|
211 |
> print(chis) |
|
|
212 |
|
|
|
213 |
Pearson's Chi-squared test |
|
|
214 |
|
|
|
215 |
data: table(xf, yf) |
|
|
216 |
X-squared = 1.335, df = 4, p-value = 0.8554 |
|
|
217 |
|
|
|
218 |
> stopifnot(isequal(round(res[2],3), round(chis$p.value,3))) |
|
|
219 |
> |
|
|
220 |
> ### unbalanced data |
|
|
221 |
> xval <- rnorm(60) |
|
|
222 |
> x <- matrix(rank(xval), ncol = 1) |
|
|
223 |
> yf <- factor(rnorm(60) > 1) |
|
|
224 |
> y <- sapply(levels(yf), function(l) as.numeric(yf == l)) #[,1,drop = FALSE] |
|
|
225 |
> weights <- rep(1, nrow(x)) |
|
|
226 |
> |
|
|
227 |
> varctrl <- new("VariableControl") |
|
|
228 |
> lec <- new("LinStatExpectCovar", ncol(x), ncol(y)) |
|
|
229 |
> |
|
|
230 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
231 |
+ lec, varctrl, PACKAGE = "party") |
|
|
232 |
> print(res) |
|
|
233 |
[1] 1.5378968 0.1240738 |
|
|
234 |
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
235 |
> print(wmw) |
|
|
236 |
|
|
|
237 |
Wilcoxon rank sum test |
|
|
238 |
|
|
|
239 |
data: xval by yf |
|
|
240 |
W = 80, p-value = 0.1241 |
|
|
241 |
alternative hypothesis: true location shift is not equal to 0 |
|
|
242 |
|
|
|
243 |
> stopifnot(isequal(res[2], wmw$p.value)) |
|
|
244 |
> |
|
|
245 |
> varctrl <- new("VariableControl") |
|
|
246 |
> lec <- new("LinStatExpectCovar", ncol(y), ncol(x)) |
|
|
247 |
> |
|
|
248 |
> res <- .Call("R_IndependenceTest", y, x, weights, |
|
|
249 |
+ lec, varctrl, PACKAGE = "party") |
|
|
250 |
> print(res) |
|
|
251 |
[1] 1.5378968 0.1240738 |
|
|
252 |
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
253 |
> print(wmw) |
|
|
254 |
|
|
|
255 |
Wilcoxon rank sum test |
|
|
256 |
|
|
|
257 |
data: xval by yf |
|
|
258 |
W = 80, p-value = 0.1241 |
|
|
259 |
alternative hypothesis: true location shift is not equal to 0 |
|
|
260 |
|
|
|
261 |
> stopifnot(isequal(res[2], wmw$p.value)) |
|
|
262 |
> |
|
|
263 |
> xf <- factor(cut(rnorm(6000), breaks = c(-Inf, -2, 0.5, Inf))) |
|
|
264 |
> x <- sapply(levels(xf), function(l) as.numeric(xf == l)) |
|
|
265 |
> yf <- factor(cut(rnorm(6000), breaks = c(-Inf, -0.5, 1.5, Inf))) |
|
|
266 |
> y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
267 |
> weights <- rep(1, nrow(x)) |
|
|
268 |
> |
|
|
269 |
> varctrl <- new("VariableControl") |
|
|
270 |
> varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
271 |
> print(varctrl) |
|
|
272 |
An object of class "VariableControl" |
|
|
273 |
Slot "teststat": |
|
|
274 |
[1] quad |
|
|
275 |
Levels: max quad |
|
|
276 |
|
|
|
277 |
Slot "pvalue": |
|
|
278 |
[1] TRUE |
|
|
279 |
|
|
|
280 |
Slot "tol": |
|
|
281 |
[1] 1e-10 |
|
|
282 |
|
|
|
283 |
Slot "maxpts": |
|
|
284 |
[1] 25000 |
|
|
285 |
|
|
|
286 |
Slot "abseps": |
|
|
287 |
[1] 1e-04 |
|
|
288 |
|
|
|
289 |
Slot "releps": |
|
|
290 |
[1] 0 |
|
|
291 |
|
|
|
292 |
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
293 |
> lec@rank <- 0 |
|
|
294 |
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
295 |
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
296 |
> |
|
|
297 |
> res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
298 |
+ lec, varctrl, PACKAGE = "party") |
|
|
299 |
> print(res) |
|
|
300 |
[1] 2.1737015 0.7038467 |
|
|
301 |
> chis <- chisq.test(table(xf, yf), correct = FALSE) |
|
|
302 |
> print(chis) |
|
|
303 |
|
|
|
304 |
Pearson's Chi-squared test |
|
|
305 |
|
|
|
306 |
data: table(xf, yf) |
|
|
307 |
X-squared = 2.1741, df = 4, p-value = 0.7038 |
|
|
308 |
|
|
|
309 |
> stopifnot(isequal(round(res[2],3), round(chis$p.value,3))) |
|
|
310 |
> |
|
|
311 |
> |
|
|
312 |
> ### Multiple Variables |
|
|
313 |
> gtctrl <- new("GlobalTestControl") |
|
|
314 |
> tlev <- levels(gtctrl@testtype) |
|
|
315 |
> gtctrl@testtype <- factor("Univariate", levels = tlev) |
|
|
316 |
> |
|
|
317 |
> mydata = data.frame(y = gl(2, 50), x1 = rnorm(100), |
|
|
318 |
+ x2 = rnorm(100), x3 = rnorm(100)) |
|
|
319 |
> inp <- initVariableFrame(mydata[,c("x1", "x2", "x3"),drop = FALSE], |
|
|
320 |
+ trafo = function(data) ptrafo(data, numeric_trafo = rank)) |
|
|
321 |
> resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE) |
|
|
322 |
> ls <- new("LearningSample", inputs = inp, responses = resp, |
|
|
323 |
+ weights = rep(1, inp@nobs), nobs = nrow(mydata), |
|
|
324 |
+ ninputs = inp@ninputs) |
|
|
325 |
> tm <- ctree_memory(ls) |
|
|
326 |
> varctrl <- new("VariableControl") |
|
|
327 |
> pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]] |
|
|
328 |
> wpvals <- rep(0, 3) |
|
|
329 |
> wpvals[1] <- wilcox.test(x1 ~ y, data = mydata, |
|
|
330 |
+ correct = FALSE, exact = FALSE)$p.value |
|
|
331 |
> wpvals[2] <- wilcox.test(x2 ~ y, data = mydata, |
|
|
332 |
+ correct = FALSE, exact = FALSE)$p.value |
|
|
333 |
> wpvals[3] <- wilcox.test(x3 ~ y, data = mydata, |
|
|
334 |
+ correct = FALSE, exact = FALSE)$p.value |
|
|
335 |
> stopifnot(isequal(wpvals, 1 - pvals)) |
|
|
336 |
> |
|
|
337 |
> varctrl <- new("VariableControl") |
|
|
338 |
> gtctrl@testtype <- factor("MonteCarlo", levels = tlev) |
|
|
339 |
> gtctrl@nresample <- as.integer(19999) |
|
|
340 |
> inp <- initVariableFrame(mydata[,"x1",drop = FALSE], trafo = function(data) |
|
|
341 |
+ ptrafo(data, numeric_trafo = rank)) |
|
|
342 |
> resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE) |
|
|
343 |
> ls <- new("LearningSample", inputs = inp, responses = resp, |
|
|
344 |
+ weights = rep(1, inp@nobs), nobs = nrow(mydata), |
|
|
345 |
+ ninputs = as.integer(1)) |
|
|
346 |
> pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]] |
|
|
347 |
> stopifnot(abs((1 - pvals) - wilcox.test(x1 ~ y, data = mydata, |
|
|
348 |
+ exact = TRUE)$p.value) < 1e-2) |
|
|
349 |
> |
|
|
350 |
> proc.time() |
|
|
351 |
user system elapsed |
|
|
352 |
0.964 0.048 1.012 |