[fbf06f]: / partyMod / tests / IndependenceTest-regtest.R

Download this file

178 lines (151 with data), 6.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
set.seed(290875)
library("party")
### get rid of the NAMESPACE
attach(asNamespace("party"))
###
###
### Regression tests for independence tests
###
### functions defined in file `./src/IndependenceTest.c'
### tests for function C_IndependenceTest
xval <- rnorm(60)
x <- matrix(rank(xval), ncol = 1)
yf <- gl(2, 30)
y <- sapply(levels(yf), function(l) as.numeric(yf == l))[,1,drop = FALSE]
weights <- rep(1, nrow(x))
varctrl <- new("VariableControl")
lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
print(wmw)
stopifnot(isequal(res[2], wmw$p.value))
xval <- rnorm(60)
x <- matrix(rank(xval), ncol = 1)
yf <- gl(3, 20)
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
weights <- rep(1, nrow(x))
varctrl <- new("VariableControl")
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
print(varctrl)
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
lec@rank <- 0
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
kw <- kruskal.test(xval ~ yf)
print(kw)
stopifnot(isequal(res[2], kw$p.value))
stopifnot(isequal(lec@rank, kw$parameter))
tmp <- x
x <- y
y <- tmp
varctrl <- new("VariableControl")
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
print(varctrl)
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
lec@rank <- 0
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
kw <- kruskal.test(xval ~ yf)
print(kw)
stopifnot(isequal(res[2], kw$p.value))
xf <- gl(3, 2000)
x <- sapply(levels(xf), function(l) as.numeric(xf == l))
yf <- gl(3, 2000)[sample(1:6000)]
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
weights <- rep(1, nrow(x))
varctrl <- new("VariableControl")
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
print(varctrl)
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
lec@rank <- 0
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
chis <- chisq.test(table(xf, yf), correct = FALSE)
print(chis)
stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
### unbalanced data
xval <- rnorm(60)
x <- matrix(rank(xval), ncol = 1)
yf <- factor(rnorm(60) > 1)
y <- sapply(levels(yf), function(l) as.numeric(yf == l)) #[,1,drop = FALSE]
weights <- rep(1, nrow(x))
varctrl <- new("VariableControl")
lec <- new("LinStatExpectCovar", ncol(x), ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
print(wmw)
stopifnot(isequal(res[2], wmw$p.value))
varctrl <- new("VariableControl")
lec <- new("LinStatExpectCovar", ncol(y), ncol(x))
res <- .Call("R_IndependenceTest", y, x, weights,
lec, varctrl, PACKAGE = "party")
print(res)
wmw <- wilcox.test(xval ~ yf, exact = FALSE, correct = FALSE)
print(wmw)
stopifnot(isequal(res[2], wmw$p.value))
xf <- factor(cut(rnorm(6000), breaks = c(-Inf, -2, 0.5, Inf)))
x <- sapply(levels(xf), function(l) as.numeric(xf == l))
yf <- factor(cut(rnorm(6000), breaks = c(-Inf, -0.5, 1.5, Inf)))
y <- sapply(levels(yf), function(l) as.numeric(yf == l))
weights <- rep(1, nrow(x))
varctrl <- new("VariableControl")
varctrl@teststat <- factor("quad", levels = c("max", "quad"))
print(varctrl)
lec <- new("LinStatExpectCovarMPinv", ncol(x), ncol(y))
lec@rank <- 0
lec@MPinv <- matrix(0, nrow = ncol(x) * ncol(y), ncol = ncol(x) * ncol(y))
lec@svdmem <- new("svd_mem", ncol(x) * ncol(y))
res <- .Call("R_IndependenceTest", x, y, weights,
lec, varctrl, PACKAGE = "party")
print(res)
chis <- chisq.test(table(xf, yf), correct = FALSE)
print(chis)
stopifnot(isequal(round(res[2],3), round(chis$p.value,3)))
### Multiple Variables
gtctrl <- new("GlobalTestControl")
tlev <- levels(gtctrl@testtype)
gtctrl@testtype <- factor("Univariate", levels = tlev)
mydata = data.frame(y = gl(2, 50), x1 = rnorm(100),
x2 = rnorm(100), x3 = rnorm(100))
inp <- initVariableFrame(mydata[,c("x1", "x2", "x3"),drop = FALSE],
trafo = function(data) ptrafo(data, numeric_trafo = rank))
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
ls <- new("LearningSample", inputs = inp, responses = resp,
weights = rep(1, inp@nobs), nobs = nrow(mydata),
ninputs = inp@ninputs)
tm <- ctree_memory(ls)
varctrl <- new("VariableControl")
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
wpvals <- rep(0, 3)
wpvals[1] <- wilcox.test(x1 ~ y, data = mydata,
correct = FALSE, exact = FALSE)$p.value
wpvals[2] <- wilcox.test(x2 ~ y, data = mydata,
correct = FALSE, exact = FALSE)$p.value
wpvals[3] <- wilcox.test(x3 ~ y, data = mydata,
correct = FALSE, exact = FALSE)$p.value
stopifnot(isequal(wpvals, 1 - pvals))
varctrl <- new("VariableControl")
gtctrl@testtype <- factor("MonteCarlo", levels = tlev)
gtctrl@nresample <- as.integer(19999)
inp <- initVariableFrame(mydata[,"x1",drop = FALSE], trafo = function(data)
ptrafo(data, numeric_trafo = rank))
resp <- initVariableFrame(mydata[,"y",drop = FALSE], trafo = NULL, response = TRUE)
ls <- new("LearningSample", inputs = inp, responses = resp,
weights = rep(1, inp@nobs), nobs = nrow(mydata),
ninputs = as.integer(1))
pvals <- .Call("R_GlobalTest", ls, ls@weights, tm, varctrl, gtctrl, PACKAGE = "party")[[2]]
stopifnot(abs((1 - pvals) - wilcox.test(x1 ~ y, data = mydata,
exact = TRUE)$p.value) < 1e-2)