|
a |
|
b/partyMod/demo/strucchange-perm.R |
|
|
1 |
################### |
|
|
2 |
## Preliminaries ## |
|
|
3 |
################### |
|
|
4 |
|
|
|
5 |
## packages |
|
|
6 |
library("coin") |
|
|
7 |
library("strucchange") |
|
|
8 |
library("lattice") |
|
|
9 |
|
|
|
10 |
## random seed |
|
|
11 |
rseed <- 20061103 |
|
|
12 |
|
|
|
13 |
## theme for lattice/trellis graphics |
|
|
14 |
trellis.par.set(theme = canonical.theme(color = FALSE)) |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
###################### |
|
|
18 |
## Boston homicides ## |
|
|
19 |
###################### |
|
|
20 |
|
|
|
21 |
## time series plot |
|
|
22 |
data("BostonHomicide", package = "strucchange") |
|
|
23 |
hom_month <- zoo(as.vector(BostonHomicide$homicides), as.yearmon(as.vector(time(BostonHomicide$homicides)))) |
|
|
24 |
hom_year <- aggregate(hom_month, function(x) as.numeric(floor(x)), mean) |
|
|
25 |
plot(hom_month, col = grey(0.7), lwd = 2, ylab = "Number of homicides", xlab = "Time") |
|
|
26 |
lines(hom_year, type = "b") |
|
|
27 |
|
|
|
28 |
## monthly data |
|
|
29 |
hom_month <- data.frame( |
|
|
30 |
log_homicides = log(coredata(hom_month) + 0.5), |
|
|
31 |
time = as.numeric(time(hom_month))) |
|
|
32 |
|
|
|
33 |
## asymptotic unconditional test |
|
|
34 |
sctest(gefp(log_homicides ~ 1, data = hom_month), functional = supLM(0.1)) |
|
|
35 |
|
|
|
36 |
## asymptotic conditional test |
|
|
37 |
set.seed(rseed) |
|
|
38 |
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1) |
|
|
39 |
|
|
|
40 |
## approximate conditional test |
|
|
41 |
set.seed(rseed) |
|
|
42 |
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1, distribution = approximate(9999)) |
|
|
43 |
|
|
|
44 |
## annual data |
|
|
45 |
hom_year <- data.frame( |
|
|
46 |
homicides = coredata(hom_year), |
|
|
47 |
time = time(hom_year)) |
|
|
48 |
|
|
|
49 |
## asymptotic unconditional test |
|
|
50 |
sctest(gefp(homicides ~ 1, data = hom_year), functional = supLM(0.1)) |
|
|
51 |
|
|
|
52 |
## asymptotic conditional test |
|
|
53 |
set.seed(rseed) |
|
|
54 |
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1) |
|
|
55 |
|
|
|
56 |
## approximate conditional test |
|
|
57 |
set.seed(rseed) |
|
|
58 |
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1, distribution = approximate(9999)) |
|
|
59 |
## note that the manuscript computes the exact conditional p-value |
|
|
60 |
## by exhaustive search (rather than approximating via 10,000 simulations) |
|
|
61 |
|
|
|
62 |
|
|
|
63 |
########################### |
|
|
64 |
## Hiring discrimination ## |
|
|
65 |
########################### |
|
|
66 |
|
|
|
67 |
## data |
|
|
68 |
hire <- cbind(c(2, 0, 0, 0, 5, 14), c(427, 86, 104, 180, 111, 59)) |
|
|
69 |
hire <- data.frame( |
|
|
70 |
resp = factor(unlist(sapply(1:nrow(hire), function(i) rep(c("female", "male"), hire[i,])))), |
|
|
71 |
time = as.numeric(rep(1991:1996, rowSums(hire)))) |
|
|
72 |
|
|
|
73 |
## visualization |
|
|
74 |
set.seed(rseed) |
|
|
75 |
maxstat_test(resp ~ time, data = hire, distribution = approximate(9999), minprob = 0.05) |
|
|
76 |
|
|
|
77 |
|
|
|
78 |
################ |
|
|
79 |
## CO2 reflux ## |
|
|
80 |
################ |
|
|
81 |
|
|
|
82 |
## data |
|
|
83 |
data("CWD", package = "coin") |
|
|
84 |
lwood <- reshape(CWD, varying = paste("sample", c(2:4,6:8), sep = ""), |
|
|
85 |
timevar = "tree", direction = "long", v.names = "sample") |
|
|
86 |
lwood$tree <- factor(lwood$tree) |
|
|
87 |
|
|
|
88 |
## visualization |
|
|
89 |
print(xyplot(sample ~ time | tree, data = lwood, type = "b", |
|
|
90 |
scales = list(y = list(relation = "free")), |
|
|
91 |
layout = c(3, 2), xlab = "Time", ylab = expression(paste(CO[2], " reflux")), |
|
|
92 |
as.table = TRUE)) |
|
|
93 |
|
|
|
94 |
## test |
|
|
95 |
(cwd_mt <- maxstat_test(sample2 + sample3 + sample4 + sample6 + sample7 + sample8 ~ trend, |
|
|
96 |
data = CWD, distribution = approximate(1e5))) |
|
|
97 |
|
|
|
98 |
## maximally selected statistics with 5% critical value |
|
|
99 |
cwd_st <- statistic(cwd_mt, "standardized") |
|
|
100 |
cwd_st <- data.frame(cwd_st, time = CWD$time[1] + CWD$trend[2:11]) |
|
|
101 |
cwd_st <- reshape(cwd_st, varying = paste("sample", c(2:4,6:8), sep = ""), |
|
|
102 |
timevar = "tree", direction = "long", v.names = "sample") |
|
|
103 |
cwd_st$tree <- factor(cwd_st$tree) |
|
|
104 |
cwd_q <- qperm(cwd_mt, 0.95) |
|
|
105 |
print(xyplot(sample ~ time | tree, data = cwd_st, type = "b", |
|
|
106 |
panel = function(...) { |
|
|
107 |
panel.xyplot(...) |
|
|
108 |
panel.abline(h = c(-cwd_q, cwd_q), col = 2) |
|
|
109 |
panel.abline(h = 0, col = "gray") |
|
|
110 |
}, |
|
|
111 |
layout = c(3, 2), xlab = "Time", ylab = "Test statistics", ylim = c(-3.5, 3.5), |
|
|
112 |
as.table = TRUE)) |
|
|
113 |
|
|
|
114 |
|
|
|
115 |
################################## |
|
|
116 |
## Dow Jones Industrial Average ## |
|
|
117 |
################################## |
|
|
118 |
|
|
|
119 |
## data |
|
|
120 |
data("DJIA", package = "strucchange") |
|
|
121 |
djia <- diff(log(DJIA)) * 100 |
|
|
122 |
djia_res <- coredata(djia - mean(djia)) |
|
|
123 |
djia_trafo <- cbind(Intercept = djia_res, Variance = djia_res^2 - mean(djia_res^2)) |
|
|
124 |
djia_time <- time(djia) |
|
|
125 |
|
|
|
126 |
## visualization |
|
|
127 |
plot(djia, xlab = "Time", ylab = "Dow Jones stock returns") |
|
|
128 |
|
|
|
129 |
## test |
|
|
130 |
set.seed(rseed) |
|
|
131 |
djia_mt <- maxstat_test(djia_trafo ~ as.numeric(djia_time), distribution = approximate(9999), minprob = 0.1) |
|
|
132 |
|
|
|
133 |
## maximally selected statistics |
|
|
134 |
apply(abs(statistic(djia_mt, "standardized")), 2, max) |
|
|
135 |
|
|
|
136 |
## critical values |
|
|
137 |
qperm(djia_mt, 0.95) |
|
|
138 |
|
|
|
139 |
## breakpoint |
|
|
140 |
djia_point <- djia_time[floor(length(djia) * 0.1) + which.max(abs(statistic(djia_mt, "standardized")[,2]))] |
|
|
141 |
|
|
|
142 |
## autocorrelation |
|
|
143 |
djia_pre <- window(djia, end = djia_point) |
|
|
144 |
djia_post <- window(djia, start = djia_point + 1) |
|
|
145 |
ar1 <- function(x, digits = 3) { |
|
|
146 |
x <- coredata(x) |
|
|
147 |
x <- x - mean(x) |
|
|
148 |
c(acf(x, plot = FALSE)$acf[2], acf(x^2, plot = FALSE)$acf[2]) |
|
|
149 |
} |
|
|
150 |
ar1(djia_pre) |
|
|
151 |
ar1(djia_post) |
|
|
152 |
|
|
|
153 |
|
|
|
154 |
####################### |
|
|
155 |
## Economic journals ## |
|
|
156 |
####################### |
|
|
157 |
|
|
|
158 |
## data |
|
|
159 |
data("Journals", package = "AER") |
|
|
160 |
Journals$age <- 2000 - Journals$foundingyear |
|
|
161 |
Journals <- Journals[order(Journals$age),] |
|
|
162 |
|
|
|
163 |
## model in root node |
|
|
164 |
jour_lm <- lm(log(subs) ~ log(price/citations), data = Journals) |
|
|
165 |
|
|
|
166 |
## test in root node |
|
|
167 |
set.seed(rseed) |
|
|
168 |
(jour_mt <- maxstat_test(estfun(jour_lm) ~ age, data = Journals, distribution = approximate(9999), minprob = 0.1)) |
|
|
169 |
|
|
|
170 |
## test information |
|
|
171 |
jour_critval <- qperm(jour_mt, 0.95^(1/4)) |
|
|
172 |
jour_process <- statistic(jour_mt, "standardized") |
|
|
173 |
jour_process <- zoo(jour_process, as.numeric(sapply(strsplit(rownames(jour_process), "x <= "), tail, 1))) |
|
|
174 |
colnames(jour_process) <- c("Intercept", "Slope") |
|
|
175 |
jour_point <- time(jour_process)[apply(coredata(abs(jour_process)), 2, which.max)] |
|
|
176 |
|
|
|
177 |
## visualization of test in root node |
|
|
178 |
mypanel <- function(x, y, subscripts, groups, panel = panel.xyplot, |
|
|
179 |
col = 1, type = "p", pch = 20, lty = 1, lwd = 1, ...) |
|
|
180 |
{ |
|
|
181 |
col <- rep(as.list(col), length = nlevels(groups)) |
|
|
182 |
type <- rep(as.list(type), length = nlevels(groups)) |
|
|
183 |
pch <- rep(as.list(pch), length = nlevels(groups)) |
|
|
184 |
lty <- rep(as.list(lty), length = nlevels(groups)) |
|
|
185 |
lwd <- rep(as.list(lwd), length = nlevels(groups)) |
|
|
186 |
|
|
|
187 |
for(g in 1:nlevels(groups)) { |
|
|
188 |
idx <- g == groups[subscripts] |
|
|
189 |
if (any(idx)) panel(x[idx], y[idx], ..., |
|
|
190 |
col = col[[g]], type = type[[g]], pch = pch[[g]], |
|
|
191 |
lty = lty[[g]], lwd = lwd[[g]]) |
|
|
192 |
grid::grid.lines(y = grid::unit(0, "native"), gp = grid::gpar(col = "gray")) |
|
|
193 |
grid::grid.lines(y = grid::unit(jour_critval, "native"), gp = grid::gpar(col = 2)) |
|
|
194 |
} |
|
|
195 |
} |
|
|
196 |
print(xyplot(abs(jour_process), panel = mypanel, |
|
|
197 |
xlab = "Time", type = "b", ylim = c(0, 6), as.table = TRUE)) |
|
|
198 |
|
|
|
199 |
|
|
|
200 |
### fit node with h(Y) = score and g(X) = maxstat_trafo |
|
|
201 |
ytrf <- function(data) { |
|
|
202 |
ret <- estfun(lm(data[[1]] ~ data[[2]])) |
|
|
203 |
attr(ret, "assign") <- 1:2 |
|
|
204 |
ret |
|
|
205 |
} |
|
|
206 |
xtrf <- function(data) trafo(data, numeric_trafo = maxstat_trafo) |
|
|
207 |
mynode <- function(data) { |
|
|
208 |
vars <- c("society", "pages", "charpp", "age") |
|
|
209 |
sapply(vars, function(v) { |
|
|
210 |
f <- as.formula(paste("log(subs) + log(price/citations) ~", v)) |
|
|
211 |
it <- independence_test(f, data = data, |
|
|
212 |
ytrafo = ytrf, xtrafo = xtrf, distribution = approximate(9999)) |
|
|
213 |
c(statistic(it), 1 - (1 - pvalue(it))^length(vars)) |
|
|
214 |
}) |
|
|
215 |
} |
|
|
216 |
|
|
|
217 |
## fit all tree elements |
|
|
218 |
jour_node <- factor(Journals$age <= 18, levels = c(TRUE, FALSE), labels = c("Node 2", "Node 3")) |
|
|
219 |
jour_lm2 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 2",]) |
|
|
220 |
jour_lm3 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 3",]) |
|
|
221 |
|
|
|
222 |
## conduct tests in all leaves |
|
|
223 |
set.seed(rseed) |
|
|
224 |
jour_tree <- list( |
|
|
225 |
mynode(Journals), |
|
|
226 |
mynode(Journals[jour_node == "Node 2",]), |
|
|
227 |
mynode(data = Journals[jour_node == "Node 3",]) |
|
|
228 |
) |
|
|
229 |
|
|
|
230 |
## fitted models |
|
|
231 |
plot(log(subs) ~ log(price/citations), data = Journals, |
|
|
232 |
xlab = "log(price/citations)", ylab = "log(subscriptions)", |
|
|
233 |
pch = c(24, 21)[jour_node], bg = hcl(c(0, 240), 50, 70)[jour_node]) |
|
|
234 |
abline(coef(jour_lm2), col = hcl( 0, 80, 30), lty = 5, lwd = 1.7) |
|
|
235 |
abline(coef(jour_lm3), col = hcl(240, 80, 30), lty = 1, lwd = 1.7) |
|
|
236 |
legend("bottomleft", c(expression(age > 18), expression(age <= 18)), |
|
|
237 |
pch = c(19, 17), lty = c(1, 5), col = hcl(c(240, 0), 80, 30), bty = "n") |