Switch to unified view

a b/partyMod/demo/strucchange-perm.R
1
###################
2
## Preliminaries ##
3
###################
4
5
## packages
6
library("coin")
7
library("strucchange")
8
library("lattice")
9
10
## random seed
11
rseed <- 20061103
12
13
## theme for lattice/trellis graphics
14
trellis.par.set(theme = canonical.theme(color = FALSE))
15
16
17
######################
18
## Boston homicides ##
19
######################
20
21
## time series plot
22
data("BostonHomicide", package = "strucchange")
23
hom_month <- zoo(as.vector(BostonHomicide$homicides), as.yearmon(as.vector(time(BostonHomicide$homicides))))
24
hom_year <- aggregate(hom_month, function(x) as.numeric(floor(x)), mean)
25
plot(hom_month, col = grey(0.7), lwd = 2, ylab = "Number of homicides", xlab = "Time")
26
lines(hom_year, type = "b")
27
28
## monthly data
29
hom_month <- data.frame(
30
  log_homicides = log(coredata(hom_month) + 0.5),
31
  time = as.numeric(time(hom_month)))
32
33
## asymptotic unconditional test
34
sctest(gefp(log_homicides ~ 1, data = hom_month), functional = supLM(0.1))
35
36
## asymptotic conditional test
37
set.seed(rseed)
38
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1)
39
40
## approximate conditional test
41
set.seed(rseed)
42
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1, distribution = approximate(9999))
43
44
## annual data
45
hom_year <- data.frame(
46
  homicides = coredata(hom_year),
47
  time = time(hom_year))
48
49
## asymptotic unconditional test
50
sctest(gefp(homicides ~ 1, data = hom_year), functional = supLM(0.1))
51
52
## asymptotic conditional test
53
set.seed(rseed)
54
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1)
55
56
## approximate conditional test
57
set.seed(rseed)
58
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1, distribution = approximate(9999))
59
## note that the manuscript computes the exact conditional p-value
60
## by exhaustive search (rather than approximating via 10,000 simulations)
61
62
63
###########################
64
## Hiring discrimination ##
65
###########################
66
67
## data
68
hire <- cbind(c(2, 0, 0, 0, 5, 14), c(427, 86, 104, 180, 111, 59))
69
hire <- data.frame(
70
  resp = factor(unlist(sapply(1:nrow(hire), function(i) rep(c("female", "male"), hire[i,])))),
71
  time = as.numeric(rep(1991:1996, rowSums(hire))))
72
73
## visualization
74
set.seed(rseed)
75
maxstat_test(resp ~ time, data = hire, distribution = approximate(9999), minprob = 0.05)
76
77
78
################
79
## CO2 reflux ##
80
################
81
82
## data
83
data("CWD", package = "coin")
84
lwood <- reshape(CWD, varying = paste("sample", c(2:4,6:8), sep = ""),
85
  timevar = "tree", direction = "long", v.names =  "sample")
86
lwood$tree <- factor(lwood$tree)
87
88
## visualization
89
print(xyplot(sample ~ time | tree, data = lwood, type = "b",
90
  scales = list(y = list(relation = "free")),
91
  layout = c(3, 2), xlab = "Time", ylab = expression(paste(CO[2], " reflux")),
92
  as.table = TRUE))
93
94
## test
95
(cwd_mt <- maxstat_test(sample2 + sample3 + sample4 + sample6 + sample7 + sample8 ~ trend, 
96
  data = CWD, distribution = approximate(1e5)))
97
98
## maximally selected statistics with 5% critical value
99
cwd_st <- statistic(cwd_mt, "standardized")
100
cwd_st <- data.frame(cwd_st, time = CWD$time[1] + CWD$trend[2:11])
101
cwd_st <- reshape(cwd_st, varying = paste("sample", c(2:4,6:8), sep = ""),
102
  timevar = "tree", direction = "long", v.names = "sample")
103
cwd_st$tree <- factor(cwd_st$tree)
104
cwd_q <- qperm(cwd_mt, 0.95)
105
print(xyplot(sample ~ time | tree, data = cwd_st, type = "b",
106
  panel = function(...) {
107
    panel.xyplot(...)
108
    panel.abline(h = c(-cwd_q, cwd_q), col = 2)
109
    panel.abline(h = 0, col = "gray")
110
  },
111
  layout = c(3, 2), xlab = "Time", ylab = "Test statistics", ylim = c(-3.5, 3.5),
112
  as.table = TRUE))
113
114
115
##################################
116
## Dow Jones Industrial Average ##
117
##################################
118
119
## data
120
data("DJIA", package = "strucchange")
121
djia <- diff(log(DJIA)) * 100
122
djia_res <- coredata(djia - mean(djia))
123
djia_trafo <- cbind(Intercept = djia_res, Variance = djia_res^2 - mean(djia_res^2))
124
djia_time <- time(djia)
125
126
## visualization
127
plot(djia, xlab = "Time", ylab = "Dow Jones stock returns")
128
129
## test
130
set.seed(rseed)
131
djia_mt <- maxstat_test(djia_trafo ~ as.numeric(djia_time), distribution = approximate(9999), minprob = 0.1)
132
133
## maximally selected statistics
134
apply(abs(statistic(djia_mt, "standardized")), 2, max)
135
136
## critical values
137
qperm(djia_mt, 0.95)
138
139
## breakpoint
140
djia_point <- djia_time[floor(length(djia) * 0.1) + which.max(abs(statistic(djia_mt, "standardized")[,2]))]
141
142
## autocorrelation
143
djia_pre  <- window(djia, end = djia_point)
144
djia_post <- window(djia, start = djia_point + 1)
145
ar1 <- function(x, digits = 3) {
146
  x <- coredata(x)
147
  x <- x - mean(x)
148
  c(acf(x, plot = FALSE)$acf[2], acf(x^2, plot = FALSE)$acf[2])
149
}
150
ar1(djia_pre)
151
ar1(djia_post)
152
153
154
#######################
155
## Economic journals ##
156
#######################
157
158
## data
159
data("Journals", package = "AER")
160
Journals$age <- 2000 - Journals$foundingyear
161
Journals <- Journals[order(Journals$age),]
162
163
## model in root node
164
jour_lm  <- lm(log(subs) ~ log(price/citations), data = Journals)
165
166
## test in root node
167
set.seed(rseed)
168
(jour_mt <- maxstat_test(estfun(jour_lm) ~ age, data = Journals, distribution = approximate(9999), minprob = 0.1))
169
170
## test information
171
jour_critval <- qperm(jour_mt, 0.95^(1/4))
172
jour_process <- statistic(jour_mt, "standardized")
173
jour_process <- zoo(jour_process, as.numeric(sapply(strsplit(rownames(jour_process), "x <= "), tail, 1)))
174
colnames(jour_process) <- c("Intercept", "Slope")
175
jour_point <- time(jour_process)[apply(coredata(abs(jour_process)), 2, which.max)]
176
177
## visualization of test in root node
178
mypanel <- function(x, y, subscripts, groups, panel = panel.xyplot,
179
  col = 1, type = "p", pch = 20, lty = 1, lwd = 1, ...)
180
{
181
  col <- rep(as.list(col), length = nlevels(groups))
182
  type <- rep(as.list(type), length = nlevels(groups))
183
  pch <- rep(as.list(pch), length = nlevels(groups))
184
  lty <- rep(as.list(lty), length = nlevels(groups))
185
  lwd <- rep(as.list(lwd), length = nlevels(groups))
186
187
  for(g in 1:nlevels(groups)) {
188
    idx <- g == groups[subscripts]
189
    if (any(idx)) panel(x[idx], y[idx], ...,
190
      col = col[[g]], type = type[[g]], pch = pch[[g]],
191
      lty = lty[[g]], lwd = lwd[[g]])
192
    grid::grid.lines(y = grid::unit(0, "native"), gp = grid::gpar(col = "gray"))
193
    grid::grid.lines(y = grid::unit(jour_critval, "native"), gp = grid::gpar(col = 2))
194
  }
195
}
196
print(xyplot(abs(jour_process), panel = mypanel,
197
  xlab = "Time", type = "b", ylim = c(0, 6), as.table = TRUE))
198
199
200
### fit node with h(Y) = score and g(X) = maxstat_trafo
201
ytrf <- function(data) {
202
    ret <- estfun(lm(data[[1]] ~ data[[2]]))
203
    attr(ret, "assign") <- 1:2
204
    ret
205
}
206
xtrf <- function(data) trafo(data, numeric_trafo = maxstat_trafo)
207
mynode <- function(data) {
208
  vars <- c("society", "pages", "charpp", "age")
209
  sapply(vars, function(v) {
210
    f <- as.formula(paste("log(subs) + log(price/citations) ~", v))
211
    it <- independence_test(f, data = data,
212
      ytrafo = ytrf, xtrafo = xtrf, distribution = approximate(9999))
213
    c(statistic(it), 1 - (1 - pvalue(it))^length(vars))
214
  })
215
}
216
217
## fit all tree elements
218
jour_node <- factor(Journals$age <= 18, levels = c(TRUE, FALSE), labels = c("Node 2", "Node 3"))
219
jour_lm2 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 2",])
220
jour_lm3 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 3",])
221
222
## conduct tests in all leaves
223
set.seed(rseed)
224
jour_tree <- list(
225
  mynode(Journals),
226
  mynode(Journals[jour_node == "Node 2",]),
227
  mynode(data = Journals[jour_node == "Node 3",])
228
)
229
230
## fitted models
231
plot(log(subs) ~ log(price/citations), data = Journals,
232
  xlab = "log(price/citations)", ylab = "log(subscriptions)",
233
  pch = c(24, 21)[jour_node], bg = hcl(c(0, 240), 50, 70)[jour_node])
234
abline(coef(jour_lm2), col = hcl(  0, 80, 30), lty = 5, lwd = 1.7)
235
abline(coef(jour_lm3), col = hcl(240, 80, 30), lty = 1, lwd = 1.7)
236
legend("bottomleft", c(expression(age > 18), expression(age <= 18)),
237
  pch = c(19, 17), lty = c(1, 5), col = hcl(c(240, 0), 80, 30), bty = "n")