|
a |
|
b/partyMod/tests/IndependenceTest-regtest.R |
|
|
1 |
|
|
|
2 |
set.seed(290875) |
|
|
3 |
library("party") |
|
|
4 |
|
|
|
5 |
### get rid of the NAMESPACE |
|
|
6 |
attach(asNamespace("party")) |
|
|
7 |
|
|
|
8 |
### |
|
|
9 |
### |
|
|
10 |
### Regression tests for independence tests |
|
|
11 |
### |
|
|
12 |
### functions defined in file `./src/IndependenceTest.c' |
|
|
13 |
|
|
|
14 |
### tests for function C_IndependenceTest |
|
|
15 |
xval <- rnorm(60) |
|
|
16 |
x <- matrix(rank(xval), ncol = 1) |
|
|
17 |
yf <- gl(2, 30) |
|
|
18 |
y <- sapply(levels(yf), function(l) as.numeric(yf == l))[,1,drop = FALSE] |
|
|
19 |
weights <- rep(1, nrow(x)) |
|
|
20 |
|
|
|
21 |
varctrl <- new("VariableControl") |
|
|
22 |
lec <- new("LinStatExpectCovar", ncol(x), ncol(y)) |
|
|
23 |
|
|
|
24 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
25 |
lec, varctrl, PACKAGE = "party") |
|
|
26 |
print(res) |
|
|
27 |
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
28 |
print(wmw) |
|
|
29 |
stopifnot(isequal(res[2], wmw$p.value)) |
|
|
30 |
|
|
|
31 |
xval <- rnorm(60) |
|
|
32 |
x <- matrix(rank(xval), ncol = 1) |
|
|
33 |
yf <- gl(3, 20) |
|
|
34 |
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
35 |
weights <- rep(1, nrow(x)) |
|
|
36 |
|
|
|
37 |
varctrl <- new("VariableControl") |
|
|
38 |
varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
39 |
print(varctrl) |
|
|
40 |
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
41 |
lec@rank <- 0 |
|
|
42 |
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
43 |
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
44 |
|
|
|
45 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
46 |
lec, varctrl, PACKAGE = "party") |
|
|
47 |
print(res) |
|
|
48 |
kw <- kruskal.test(xval ~ yf) |
|
|
49 |
print(kw) |
|
|
50 |
stopifnot(isequal(res[2], kw$p.value)) |
|
|
51 |
stopifnot(isequal(lec@rank, kw$parameter)) |
|
|
52 |
|
|
|
53 |
tmp <- x |
|
|
54 |
x <- y |
|
|
55 |
y <- tmp |
|
|
56 |
varctrl <- new("VariableControl") |
|
|
57 |
varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
58 |
print(varctrl) |
|
|
59 |
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
60 |
lec@rank <- 0 |
|
|
61 |
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
62 |
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
63 |
|
|
|
64 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
65 |
lec, varctrl, PACKAGE = "party") |
|
|
66 |
print(res) |
|
|
67 |
kw <- kruskal.test(xval ~ yf) |
|
|
68 |
print(kw) |
|
|
69 |
stopifnot(isequal(res[2], kw$p.value)) |
|
|
70 |
|
|
|
71 |
xf <- gl(3, 2000) |
|
|
72 |
x <- sapply(levels(xf), function(l) as.numeric(xf == l)) |
|
|
73 |
yf <- gl(3, 2000)[sample(1:6000)] |
|
|
74 |
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
75 |
weights <- rep(1, nrow(x)) |
|
|
76 |
|
|
|
77 |
varctrl <- new("VariableControl") |
|
|
78 |
varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
79 |
print(varctrl) |
|
|
80 |
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
81 |
lec@rank <- 0 |
|
|
82 |
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
83 |
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
84 |
|
|
|
85 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
86 |
lec, varctrl, PACKAGE = "party") |
|
|
87 |
print(res) |
|
|
88 |
chis <- chisq.test(table(xf, yf), correct = FALSE) |
|
|
89 |
print(chis) |
|
|
90 |
stopifnot(isequal(round(res[2],3), round(chis$p.value,3))) |
|
|
91 |
|
|
|
92 |
### unbalanced data |
|
|
93 |
xval <- rnorm(60) |
|
|
94 |
x <- matrix(rank(xval), ncol = 1) |
|
|
95 |
yf <- factor(rnorm(60) > 1) |
|
|
96 |
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) #[,1,drop = FALSE] |
|
|
97 |
weights <- rep(1, nrow(x)) |
|
|
98 |
|
|
|
99 |
varctrl <- new("VariableControl") |
|
|
100 |
lec <- new("LinStatExpectCovar", ncol(x), ncol(y)) |
|
|
101 |
|
|
|
102 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
103 |
lec, varctrl, PACKAGE = "party") |
|
|
104 |
print(res) |
|
|
105 |
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
106 |
print(wmw) |
|
|
107 |
stopifnot(isequal(res[2], wmw$p.value)) |
|
|
108 |
|
|
|
109 |
varctrl <- new("VariableControl") |
|
|
110 |
lec <- new("LinStatExpectCovar", ncol(y), ncol(x)) |
|
|
111 |
|
|
|
112 |
res <- .Call("R_IndependenceTest", y, x, weights, |
|
|
113 |
lec, varctrl, PACKAGE = "party") |
|
|
114 |
print(res) |
|
|
115 |
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE) |
|
|
116 |
print(wmw) |
|
|
117 |
stopifnot(isequal(res[2], wmw$p.value)) |
|
|
118 |
|
|
|
119 |
xf <- factor(cut(rnorm(6000), breaks = c(-Inf, -2, 0.5, Inf))) |
|
|
120 |
x <- sapply(levels(xf), function(l) as.numeric(xf == l)) |
|
|
121 |
yf <- factor(cut(rnorm(6000), breaks = c(-Inf, -0.5, 1.5, Inf))) |
|
|
122 |
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) |
|
|
123 |
weights <- rep(1, nrow(x)) |
|
|
124 |
|
|
|
125 |
varctrl <- new("VariableControl") |
|
|
126 |
varctrl@teststat <- factor("quad", levels = c("max", "quad")) |
|
|
127 |
print(varctrl) |
|
|
128 |
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y)) |
|
|
129 |
lec@rank <- 0 |
|
|
130 |
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y)) |
|
|
131 |
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y)) |
|
|
132 |
|
|
|
133 |
res <- .Call("R_IndependenceTest", x, y, weights, |
|
|
134 |
lec, varctrl, PACKAGE = "party") |
|
|
135 |
print(res) |
|
|
136 |
chis <- chisq.test(table(xf, yf), correct = FALSE) |
|
|
137 |
print(chis) |
|
|
138 |
stopifnot(isequal(round(res[2],3), round(chis$p.value,3))) |
|
|
139 |
|
|
|
140 |
|
|
|
141 |
### Multiple Variables |
|
|
142 |
gtctrl <- new("GlobalTestControl") |
|
|
143 |
tlev <- levels(gtctrl@testtype) |
|
|
144 |
gtctrl@testtype <- factor("Univariate", levels = tlev) |
|
|
145 |
|
|
|
146 |
mydata = data.frame(y = gl(2, 50), x1 = rnorm(100), |
|
|
147 |
x2 = rnorm(100), x3 = rnorm(100)) |
|
|
148 |
inp <- initVariableFrame(mydata[,c("x1", "x2", "x3"),drop = FALSE], |
|
|
149 |
trafo = function(data) ptrafo(data, numeric_trafo = rank)) |
|
|
150 |
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE) |
|
|
151 |
ls <- new("LearningSample", inputs = inp, responses = resp, |
|
|
152 |
weights = rep(1, inp@nobs), nobs = nrow(mydata), |
|
|
153 |
ninputs = inp@ninputs) |
|
|
154 |
tm <- ctree_memory(ls) |
|
|
155 |
varctrl <- new("VariableControl") |
|
|
156 |
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]] |
|
|
157 |
wpvals <- rep(0, 3) |
|
|
158 |
wpvals[1] <- wilcox.test(x1 ~ y, data = mydata, |
|
|
159 |
correct = FALSE, exact = FALSE)$p.value |
|
|
160 |
wpvals[2] <- wilcox.test(x2 ~ y, data = mydata, |
|
|
161 |
correct = FALSE, exact = FALSE)$p.value |
|
|
162 |
wpvals[3] <- wilcox.test(x3 ~ y, data = mydata, |
|
|
163 |
correct = FALSE, exact = FALSE)$p.value |
|
|
164 |
stopifnot(isequal(wpvals, 1 - pvals)) |
|
|
165 |
|
|
|
166 |
varctrl <- new("VariableControl") |
|
|
167 |
gtctrl@testtype <- factor("MonteCarlo", levels = tlev) |
|
|
168 |
gtctrl@nresample <- as.integer(19999) |
|
|
169 |
inp <- initVariableFrame(mydata[,"x1",drop = FALSE], trafo = function(data) |
|
|
170 |
ptrafo(data, numeric_trafo = rank)) |
|
|
171 |
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE) |
|
|
172 |
ls <- new("LearningSample", inputs = inp, responses = resp, |
|
|
173 |
weights = rep(1, inp@nobs), nobs = nrow(mydata), |
|
|
174 |
ninputs = as.integer(1)) |
|
|
175 |
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]] |
|
|
176 |
stopifnot(abs((1 - pvals) - wilcox.test(x1 ~ y, data = mydata, |
|
|
177 |
exact = TRUE)$p.value) < 1e-2) |