Switch to unified view

a b/partyMod/tests/IndependenceTest-regtest.Rout.save
1
2
R Under development (unstable) (2014-06-29 r66051) -- "Unsuffered Consequences"
3
Copyright (C) 2014 The R Foundation for Statistical Computing
4
Platform: x86_64-unknown-linux-gnu (64-bit)
5
6
R is free software and comes with ABSOLUTELY NO WARRANTY.
7
You are welcome to redistribute it under certain conditions.
8
Type 'license()' or 'licence()' for distribution details.
9
10
R is a collaborative project with many contributors.
11
Type 'contributors()' for more information and
12
'citation()' on how to cite R or R packages in publications.
13
14
Type 'demo()' for some demos, 'help()' for on-line help, or
15
'help.start()' for an HTML browser interface to help.
16
Type 'q()' to quit R.
17
18
> 
19
> set.seed(290875)
20
> library("party")
21
Loading required package: grid
22
Loading required package: zoo
23
24
Attaching package: 'zoo'
25
26
The following objects are masked from 'package:base':
27
28
    as.Date, as.Date.numeric
29
30
Loading required package: sandwich
31
Loading required package: strucchange
32
Loading required package: modeltools
33
Loading required package: stats4
34
> 
35
> ### get rid of the NAMESPACE
36
> attach(asNamespace("party"))
37
The following objects are masked from package:party:
38
39
    cforest, cforest_classical, cforest_control, cforest_unbiased,
40
    conditionalTree, ctree, ctree_control, ctree_memory, edge_simple,
41
    mob, mob_control, node_barplot, node_bivplot, node_boxplot,
42
    node_density, node_hist, node_inner, node_scatterplot, node_surv,
43
    node_terminal, proximity, ptrafo, reweight, sctest.mob, varimp,
44
    varimpAUC
45
46
> 
47
> ### 
48
> ###
49
> ###    Regression tests for independence tests
50
> ###    
51
> ###    functions defined in file `./src/IndependenceTest.c'    
52
> 
53
> ### tests for function C_IndependenceTest
54
> xval <- rnorm(60)
55
> x <- matrix(rank(xval), ncol = 1)
56
> yf <- gl(2, 30)
57
> y <- sapply(levels(yf), function(l) as.numeric(yf == l))[,1,drop = FALSE]
58
> weights <- rep(1, nrow(x))
59
> 
60
> varctrl <- new("VariableControl")
61
> lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
62
> 
63
> res <- .Call("R_IndependenceTest", x, y, weights,  
64
+               lec, varctrl, PACKAGE = "party")
65
> print(res)
66
[1] 0.3991795 0.6897610
67
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
68
> print(wmw)
69
70
    Wilcoxon rank sum test
71
72
data:  xval by yf
73
W = 477, p-value = 0.6898
74
alternative hypothesis: true location shift is not equal to 0
75
76
> stopifnot(isequal(res[2], wmw$p.value))
77
> 
78
> xval <- rnorm(60)
79
> x <- matrix(rank(xval), ncol = 1)
80
> yf <- gl(3, 20)
81
> y <- sapply(levels(yf), function(l) as.numeric(yf == l))
82
> weights <- rep(1, nrow(x))
83
> 
84
> varctrl <- new("VariableControl")
85
> varctrl@teststat <- factor("quad", levels = c("max", "quad"))
86
> print(varctrl)
87
An object of class "VariableControl"
88
Slot "teststat":
89
[1] quad
90
Levels: max quad
91
92
Slot "pvalue":
93
[1] TRUE
94
95
Slot "tol":
96
[1] 1e-10
97
98
Slot "maxpts":
99
[1] 25000
100
101
Slot "abseps":
102
[1] 1e-04
103
104
Slot "releps":
105
[1] 0
106
107
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
108
> lec@rank <- 0
109
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
110
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
111
> 
112
> res <- .Call("R_IndependenceTest", x, y, weights,  
113
+               lec, varctrl, PACKAGE = "party")
114
> print(res)
115
[1] 0.6219672 0.7327259
116
> kw <- kruskal.test(xval ~ yf)
117
> print(kw)
118
119
    Kruskal-Wallis rank sum test
120
121
data:  xval by yf
122
Kruskal-Wallis chi-squared = 0.622, df = 2, p-value = 0.7327
123
124
> stopifnot(isequal(res[2], kw$p.value))
125
> stopifnot(isequal(lec@rank, kw$parameter))
126
> 
127
> tmp <- x
128
> x <- y
129
> y <- tmp
130
> varctrl <- new("VariableControl")
131
> varctrl@teststat <- factor("quad", levels = c("max", "quad"))
132
> print(varctrl)
133
An object of class "VariableControl"
134
Slot "teststat":
135
[1] quad
136
Levels: max quad
137
138
Slot "pvalue":
139
[1] TRUE
140
141
Slot "tol":
142
[1] 1e-10
143
144
Slot "maxpts":
145
[1] 25000
146
147
Slot "abseps":
148
[1] 1e-04
149
150
Slot "releps":
151
[1] 0
152
153
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
154
> lec@rank <- 0
155
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
156
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
157
> 
158
> res <- .Call("R_IndependenceTest", x, y, weights,  
159
+               lec, varctrl, PACKAGE = "party")
160
> print(res)
161
[1] 0.6219672 0.7327259
162
> kw <- kruskal.test(xval ~ yf)
163
> print(kw)
164
165
    Kruskal-Wallis rank sum test
166
167
data:  xval by yf
168
Kruskal-Wallis chi-squared = 0.622, df = 2, p-value = 0.7327
169
170
> stopifnot(isequal(res[2], kw$p.value))
171
> 
172
> xf <- gl(3, 2000)
173
> x <- sapply(levels(xf), function(l) as.numeric(xf == l))
174
> yf <- gl(3, 2000)[sample(1:6000)]
175
> y <- sapply(levels(yf), function(l) as.numeric(yf == l))
176
> weights <- rep(1, nrow(x))
177
> 
178
> varctrl <- new("VariableControl")
179
> varctrl@teststat <- factor("quad", levels = c("max", "quad"))
180
> print(varctrl)
181
An object of class "VariableControl"
182
Slot "teststat":
183
[1] quad
184
Levels: max quad
185
186
Slot "pvalue":
187
[1] TRUE
188
189
Slot "tol":
190
[1] 1e-10
191
192
Slot "maxpts":
193
[1] 25000
194
195
Slot "abseps":
196
[1] 1e-04
197
198
Slot "releps":
199
[1] 0
200
201
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
202
> lec@rank <- 0
203
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
204
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
205
> 
206
> res <- .Call("R_IndependenceTest", x, y, weights,  
207
+               lec, varctrl, PACKAGE = "party")
208
> print(res)
209
[1] 1.334778 0.855448
210
> chis <- chisq.test(table(xf, yf), correct = FALSE)
211
> print(chis)
212
213
    Pearson's Chi-squared test
214
215
data:  table(xf, yf)
216
X-squared = 1.335, df = 4, p-value = 0.8554
217
218
> stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
219
> 
220
> ### unbalanced data
221
> xval <- rnorm(60)
222
> x <- matrix(rank(xval), ncol = 1)
223
> yf <- factor(rnorm(60) > 1)
224
> y <- sapply(levels(yf), function(l) as.numeric(yf == l)) #[,1,drop = FALSE]
225
> weights <- rep(1, nrow(x))
226
> 
227
> varctrl <- new("VariableControl")
228
> lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
229
> 
230
> res <- .Call("R_IndependenceTest", x, y, weights,
231
+               lec, varctrl, PACKAGE = "party")
232
> print(res)
233
[1] 1.5378968 0.1240738
234
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
235
> print(wmw)
236
237
    Wilcoxon rank sum test
238
239
data:  xval by yf
240
W = 80, p-value = 0.1241
241
alternative hypothesis: true location shift is not equal to 0
242
243
> stopifnot(isequal(res[2], wmw$p.value))
244
> 
245
> varctrl <- new("VariableControl")
246
> lec <- new("LinStatExpectCovar", ncol(y), ncol(x))
247
> 
248
> res <- .Call("R_IndependenceTest", y, x, weights,
249
+               lec, varctrl, PACKAGE = "party")
250
> print(res)
251
[1] 1.5378968 0.1240738
252
> wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
253
> print(wmw)
254
255
    Wilcoxon rank sum test
256
257
data:  xval by yf
258
W = 80, p-value = 0.1241
259
alternative hypothesis: true location shift is not equal to 0
260
261
> stopifnot(isequal(res[2], wmw$p.value))
262
> 
263
> xf <- factor(cut(rnorm(6000), breaks = c(-Inf, -2, 0.5, Inf)))
264
> x <- sapply(levels(xf), function(l) as.numeric(xf == l))
265
> yf <- factor(cut(rnorm(6000), breaks = c(-Inf, -0.5, 1.5, Inf)))
266
> y <- sapply(levels(yf), function(l) as.numeric(yf == l))
267
> weights <- rep(1, nrow(x))
268
> 
269
> varctrl <- new("VariableControl")
270
> varctrl@teststat <- factor("quad", levels = c("max", "quad"))
271
> print(varctrl)
272
An object of class "VariableControl"
273
Slot "teststat":
274
[1] quad
275
Levels: max quad
276
277
Slot "pvalue":
278
[1] TRUE
279
280
Slot "tol":
281
[1] 1e-10
282
283
Slot "maxpts":
284
[1] 25000
285
286
Slot "abseps":
287
[1] 1e-04
288
289
Slot "releps":
290
[1] 0
291
292
> lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
293
> lec@rank <- 0
294
> lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
295
> lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
296
> 
297
> res <- .Call("R_IndependenceTest", x, y, weights,  
298
+               lec, varctrl, PACKAGE = "party")
299
> print(res)
300
[1] 2.1737015 0.7038467
301
> chis <- chisq.test(table(xf, yf), correct = FALSE)
302
> print(chis)
303
304
    Pearson's Chi-squared test
305
306
data:  table(xf, yf)
307
X-squared = 2.1741, df = 4, p-value = 0.7038
308
309
> stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
310
> 
311
> 
312
> ### Multiple Variables
313
> gtctrl <- new("GlobalTestControl")
314
> tlev <- levels(gtctrl@testtype)   
315
> gtctrl@testtype <- factor("Univariate", levels = tlev)
316
> 
317
> mydata = data.frame(y = gl(2, 50), x1 = rnorm(100),
318
+                     x2 = rnorm(100), x3 = rnorm(100))
319
> inp <- initVariableFrame(mydata[,c("x1", "x2", "x3"),drop = FALSE], 
320
+     trafo = function(data) ptrafo(data, numeric_trafo = rank))
321
> resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
322
> ls <- new("LearningSample", inputs = inp, responses = resp,
323
+           weights = rep(1, inp@nobs), nobs = nrow(mydata), 
324
+           ninputs = inp@ninputs)
325
> tm <- ctree_memory(ls)
326
> varctrl <- new("VariableControl")
327
> pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
328
> wpvals <- rep(0, 3)
329
> wpvals[1] <- wilcox.test(x1 ~ y, data = mydata,
330
+                          correct = FALSE, exact = FALSE)$p.value
331
> wpvals[2] <- wilcox.test(x2 ~ y, data = mydata, 
332
+                          correct = FALSE, exact = FALSE)$p.value
333
> wpvals[3] <- wilcox.test(x3 ~ y, data = mydata, 
334
+                          correct = FALSE, exact = FALSE)$p.value
335
> stopifnot(isequal(wpvals, 1 - pvals))
336
> 
337
> varctrl <- new("VariableControl")
338
> gtctrl@testtype <- factor("MonteCarlo", levels = tlev)
339
> gtctrl@nresample <- as.integer(19999)
340
> inp <- initVariableFrame(mydata[,"x1",drop = FALSE], trafo = function(data)
341
+     ptrafo(data, numeric_trafo = rank))
342
> resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
343
> ls <- new("LearningSample", inputs = inp, responses = resp,
344
+           weights = rep(1, inp@nobs), nobs = nrow(mydata), 
345
+           ninputs = as.integer(1))
346
> pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
347
> stopifnot(abs((1 - pvals) - wilcox.test(x1 ~ y, data = mydata, 
348
+     exact = TRUE)$p.value) < 1e-2)
349
> 
350
> proc.time()
351
   user  system elapsed 
352
  0.964   0.048   1.012