Switch to unified view

a b/partyMod/tests/IndependenceTest-regtest.R
1
2
set.seed(290875)
3
library("party")
4
5
### get rid of the NAMESPACE
6
attach(asNamespace("party"))
7
8
### 
9
###
10
###    Regression tests for independence tests
11
###    
12
###    functions defined in file `./src/IndependenceTest.c'    
13
14
### tests for function C_IndependenceTest
15
xval <- rnorm(60)
16
x <- matrix(rank(xval), ncol = 1)
17
yf <- gl(2, 30)
18
y <- sapply(levels(yf), function(l) as.numeric(yf == l))[,1,drop = FALSE]
19
weights <- rep(1, nrow(x))
20
21
varctrl <- new("VariableControl")
22
lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
23
24
res <- .Call("R_IndependenceTest", x, y, weights,  
25
              lec, varctrl, PACKAGE = "party")
26
print(res)
27
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
28
print(wmw)
29
stopifnot(isequal(res[2], wmw$p.value))
30
31
xval <- rnorm(60)
32
x <- matrix(rank(xval), ncol = 1)
33
yf <- gl(3, 20)
34
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
35
weights <- rep(1, nrow(x))
36
37
varctrl <- new("VariableControl")
38
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
39
print(varctrl)
40
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
41
lec@rank <- 0
42
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
43
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
44
45
res <- .Call("R_IndependenceTest", x, y, weights,  
46
              lec, varctrl, PACKAGE = "party")
47
print(res)
48
kw <- kruskal.test(xval ~ yf)
49
print(kw)
50
stopifnot(isequal(res[2], kw$p.value))
51
stopifnot(isequal(lec@rank, kw$parameter))
52
53
tmp <- x
54
x <- y
55
y <- tmp
56
varctrl <- new("VariableControl")
57
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
58
print(varctrl)
59
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
60
lec@rank <- 0
61
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
62
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
63
64
res <- .Call("R_IndependenceTest", x, y, weights,  
65
              lec, varctrl, PACKAGE = "party")
66
print(res)
67
kw <- kruskal.test(xval ~ yf)
68
print(kw)
69
stopifnot(isequal(res[2], kw$p.value))
70
71
xf <- gl(3, 2000)
72
x <- sapply(levels(xf), function(l) as.numeric(xf == l))
73
yf <- gl(3, 2000)[sample(1:6000)]
74
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
75
weights <- rep(1, nrow(x))
76
77
varctrl <- new("VariableControl")
78
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
79
print(varctrl)
80
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
81
lec@rank <- 0
82
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
83
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
84
85
res <- .Call("R_IndependenceTest", x, y, weights,  
86
              lec, varctrl, PACKAGE = "party")
87
print(res)
88
chis <- chisq.test(table(xf, yf), correct = FALSE)
89
print(chis)
90
stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
91
92
### unbalanced data
93
xval <- rnorm(60)
94
x <- matrix(rank(xval), ncol = 1)
95
yf <- factor(rnorm(60) > 1)
96
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) #[,1,drop = FALSE]
97
weights <- rep(1, nrow(x))
98
99
varctrl <- new("VariableControl")
100
lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
101
102
res <- .Call("R_IndependenceTest", x, y, weights,
103
              lec, varctrl, PACKAGE = "party")
104
print(res)
105
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
106
print(wmw)
107
stopifnot(isequal(res[2], wmw$p.value))
108
109
varctrl <- new("VariableControl")
110
lec <- new("LinStatExpectCovar", ncol(y), ncol(x))
111
112
res <- .Call("R_IndependenceTest", y, x, weights,
113
              lec, varctrl, PACKAGE = "party")
114
print(res)
115
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
116
print(wmw)
117
stopifnot(isequal(res[2], wmw$p.value))
118
119
xf <- factor(cut(rnorm(6000), breaks = c(-Inf, -2, 0.5, Inf)))
120
x <- sapply(levels(xf), function(l) as.numeric(xf == l))
121
yf <- factor(cut(rnorm(6000), breaks = c(-Inf, -0.5, 1.5, Inf)))
122
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
123
weights <- rep(1, nrow(x))
124
125
varctrl <- new("VariableControl")
126
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
127
print(varctrl)
128
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
129
lec@rank <- 0
130
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
131
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
132
133
res <- .Call("R_IndependenceTest", x, y, weights,  
134
              lec, varctrl, PACKAGE = "party")
135
print(res)
136
chis <- chisq.test(table(xf, yf), correct = FALSE)
137
print(chis)
138
stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
139
140
141
### Multiple Variables
142
gtctrl <- new("GlobalTestControl")
143
tlev <- levels(gtctrl@testtype)   
144
gtctrl@testtype <- factor("Univariate", levels = tlev)
145
146
mydata = data.frame(y = gl(2, 50), x1 = rnorm(100),
147
                    x2 = rnorm(100), x3 = rnorm(100))
148
inp <- initVariableFrame(mydata[,c("x1", "x2", "x3"),drop = FALSE], 
149
    trafo = function(data) ptrafo(data, numeric_trafo = rank))
150
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
151
ls <- new("LearningSample", inputs = inp, responses = resp,
152
          weights = rep(1, inp@nobs), nobs = nrow(mydata), 
153
          ninputs = inp@ninputs)
154
tm <- ctree_memory(ls)
155
varctrl <- new("VariableControl")
156
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
157
wpvals <- rep(0, 3)
158
wpvals[1] <- wilcox.test(x1 ~ y, data = mydata,
159
                         correct = FALSE, exact = FALSE)$p.value
160
wpvals[2] <- wilcox.test(x2 ~ y, data = mydata, 
161
                         correct = FALSE, exact = FALSE)$p.value
162
wpvals[3] <- wilcox.test(x3 ~ y, data = mydata, 
163
                         correct = FALSE, exact = FALSE)$p.value
164
stopifnot(isequal(wpvals, 1 - pvals))
165
166
varctrl <- new("VariableControl")
167
gtctrl@testtype <- factor("MonteCarlo", levels = tlev)
168
gtctrl@nresample <- as.integer(19999)
169
inp <- initVariableFrame(mydata[,"x1",drop = FALSE], trafo = function(data)
170
    ptrafo(data, numeric_trafo = rank))
171
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
172
ls <- new("LearningSample", inputs = inp, responses = resp,
173
          weights = rep(1, inp@nobs), nobs = nrow(mydata), 
174
          ninputs = as.integer(1))
175
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
176
stopifnot(abs((1 - pvals) - wilcox.test(x1 ~ y, data = mydata, 
177
    exact = TRUE)$p.value) < 1e-2)