atlantis / Git / Diff of /partyMod/demo/strucchange-perm.R

Models:
DanielG/
atlantis
Downloads: 1
Diff of /partyMod/demo/strucchange-perm.R [000000] .. [fbf06f]
Switch to side-by-side view

--- a
+++ b/partyMod/demo/strucchange-perm.R
@@ -0,0 +1,237 @@
+###################
+## Preliminaries ##
+###################
+
+## packages
+library("coin")
+library("strucchange")
+library("lattice")
+
+## random seed
+rseed <- 20061103
+
+## theme for lattice/trellis graphics
+trellis.par.set(theme = canonical.theme(color = FALSE))
+
+
+######################
+## Boston homicides ##
+######################
+
+## time series plot
+data("BostonHomicide", package = "strucchange")
+hom_month <- zoo(as.vector(BostonHomicide$homicides), as.yearmon(as.vector(time(BostonHomicide$homicides))))
+hom_year <- aggregate(hom_month, function(x) as.numeric(floor(x)), mean)
+plot(hom_month, col = grey(0.7), lwd = 2, ylab = "Number of homicides", xlab = "Time")
+lines(hom_year, type = "b")
+
+## monthly data
+hom_month <- data.frame(
+  log_homicides = log(coredata(hom_month) + 0.5),
+  time = as.numeric(time(hom_month)))
+
+## asymptotic unconditional test
+sctest(gefp(log_homicides ~ 1, data = hom_month), functional = supLM(0.1))
+
+## asymptotic conditional test
+set.seed(rseed)
+maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1)
+
+## approximate conditional test
+set.seed(rseed)
+maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1, distribution = approximate(9999))
+
+## annual data
+hom_year <- data.frame(
+  homicides = coredata(hom_year),
+  time = time(hom_year))
+
+## asymptotic unconditional test
+sctest(gefp(homicides ~ 1, data = hom_year), functional = supLM(0.1))
+
+## asymptotic conditional test
+set.seed(rseed)
+maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1)
+
+## approximate conditional test
+set.seed(rseed)
+maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1, distribution = approximate(9999))
+## note that the manuscript computes the exact conditional p-value
+## by exhaustive search (rather than approximating via 10,000 simulations)
+
+
+###########################
+## Hiring discrimination ##
+###########################
+
+## data
+hire <- cbind(c(2, 0, 0, 0, 5, 14), c(427, 86, 104, 180, 111, 59))
+hire <- data.frame(
+  resp = factor(unlist(sapply(1:nrow(hire), function(i) rep(c("female", "male"), hire[i,])))),
+  time = as.numeric(rep(1991:1996, rowSums(hire))))
+
+## visualization
+set.seed(rseed)
+maxstat_test(resp ~ time, data = hire, distribution = approximate(9999), minprob = 0.05)
+
+
+################
+## CO2 reflux ##
+################
+
+## data
+data("CWD", package = "coin")
+lwood <- reshape(CWD, varying = paste("sample", c(2:4,6:8), sep = ""),
+  timevar = "tree", direction = "long", v.names =  "sample")
+lwood$tree <- factor(lwood$tree)
+
+## visualization
+print(xyplot(sample ~ time | tree, data = lwood, type = "b",
+  scales = list(y = list(relation = "free")),
+  layout = c(3, 2), xlab = "Time", ylab = expression(paste(CO[2], " reflux")),
+  as.table = TRUE))
+
+## test
+(cwd_mt <- maxstat_test(sample2 + sample3 + sample4 + sample6 + sample7 + sample8 ~ trend, 
+  data = CWD, distribution = approximate(1e5)))
+
+## maximally selected statistics with 5% critical value
+cwd_st <- statistic(cwd_mt, "standardized")
+cwd_st <- data.frame(cwd_st, time = CWD$time[1] + CWD$trend[2:11])
+cwd_st <- reshape(cwd_st, varying = paste("sample", c(2:4,6:8), sep = ""),
+  timevar = "tree", direction = "long", v.names = "sample")
+cwd_st$tree <- factor(cwd_st$tree)
+cwd_q <- qperm(cwd_mt, 0.95)
+print(xyplot(sample ~ time | tree, data = cwd_st, type = "b",
+  panel = function(...) {
+    panel.xyplot(...)
+    panel.abline(h = c(-cwd_q, cwd_q), col = 2)
+    panel.abline(h = 0, col = "gray")
+  },
+  layout = c(3, 2), xlab = "Time", ylab = "Test statistics", ylim = c(-3.5, 3.5),
+  as.table = TRUE))
+
+
+##################################
+## Dow Jones Industrial Average ##
+##################################
+
+## data
+data("DJIA", package = "strucchange")
+djia <- diff(log(DJIA)) * 100
+djia_res <- coredata(djia - mean(djia))
+djia_trafo <- cbind(Intercept = djia_res, Variance = djia_res^2 - mean(djia_res^2))
+djia_time <- time(djia)
+
+## visualization
+plot(djia, xlab = "Time", ylab = "Dow Jones stock returns")
+
+## test
+set.seed(rseed)
+djia_mt <- maxstat_test(djia_trafo ~ as.numeric(djia_time), distribution = approximate(9999), minprob = 0.1)
+
+## maximally selected statistics
+apply(abs(statistic(djia_mt, "standardized")), 2, max)
+
+## critical values
+qperm(djia_mt, 0.95)
+
+## breakpoint
+djia_point <- djia_time[floor(length(djia) * 0.1) + which.max(abs(statistic(djia_mt, "standardized")[,2]))]
+
+## autocorrelation
+djia_pre  <- window(djia, end = djia_point)
+djia_post <- window(djia, start = djia_point + 1)
+ar1 <- function(x, digits = 3) {
+  x <- coredata(x)
+  x <- x - mean(x)
+  c(acf(x, plot = FALSE)$acf[2], acf(x^2, plot = FALSE)$acf[2])
+}
+ar1(djia_pre)
+ar1(djia_post)
+
+
+#######################
+## Economic journals ##
+#######################
+
+## data
+data("Journals", package = "AER")
+Journals$age <- 2000 - Journals$foundingyear
+Journals <- Journals[order(Journals$age),]
+
+## model in root node
+jour_lm  <- lm(log(subs) ~ log(price/citations), data = Journals)
+
+## test in root node
+set.seed(rseed)
+(jour_mt <- maxstat_test(estfun(jour_lm) ~ age, data = Journals, distribution = approximate(9999), minprob = 0.1))
+
+## test information
+jour_critval <- qperm(jour_mt, 0.95^(1/4))
+jour_process <- statistic(jour_mt, "standardized")
+jour_process <- zoo(jour_process, as.numeric(sapply(strsplit(rownames(jour_process), "x <= "), tail, 1)))
+colnames(jour_process) <- c("Intercept", "Slope")
+jour_point <- time(jour_process)[apply(coredata(abs(jour_process)), 2, which.max)]
+
+## visualization of test in root node
+mypanel <- function(x, y, subscripts, groups, panel = panel.xyplot,
+  col = 1, type = "p", pch = 20, lty = 1, lwd = 1, ...)
+{
+  col <- rep(as.list(col), length = nlevels(groups))
+  type <- rep(as.list(type), length = nlevels(groups))
+  pch <- rep(as.list(pch), length = nlevels(groups))
+  lty <- rep(as.list(lty), length = nlevels(groups))
+  lwd <- rep(as.list(lwd), length = nlevels(groups))
+
+  for(g in 1:nlevels(groups)) {
+    idx <- g == groups[subscripts]
+    if (any(idx)) panel(x[idx], y[idx], ...,
+      col = col[[g]], type = type[[g]], pch = pch[[g]],
+      lty = lty[[g]], lwd = lwd[[g]])
+    grid::grid.lines(y = grid::unit(0, "native"), gp = grid::gpar(col = "gray"))
+    grid::grid.lines(y = grid::unit(jour_critval, "native"), gp = grid::gpar(col = 2))
+  }
+}
+print(xyplot(abs(jour_process), panel = mypanel,
+  xlab = "Time", type = "b", ylim = c(0, 6), as.table = TRUE))
+
+
+### fit node with h(Y) = score and g(X) = maxstat_trafo
+ytrf <- function(data) {
+    ret <- estfun(lm(data[[1]] ~ data[[2]]))
+    attr(ret, "assign") <- 1:2
+    ret
+}
+xtrf <- function(data) trafo(data, numeric_trafo = maxstat_trafo)
+mynode <- function(data) {
+  vars <- c("society", "pages", "charpp", "age")
+  sapply(vars, function(v) {
+    f <- as.formula(paste("log(subs) + log(price/citations) ~", v))
+    it <- independence_test(f, data = data,
+      ytrafo = ytrf, xtrafo = xtrf, distribution = approximate(9999))
+    c(statistic(it), 1 - (1 - pvalue(it))^length(vars))
+  })
+}
+
+## fit all tree elements
+jour_node <- factor(Journals$age <= 18, levels = c(TRUE, FALSE), labels = c("Node 2", "Node 3"))
+jour_lm2 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 2",])
+jour_lm3 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 3",])
+
+## conduct tests in all leaves
+set.seed(rseed)
+jour_tree <- list(
+  mynode(Journals),
+  mynode(Journals[jour_node == "Node 2",]),
+  mynode(data = Journals[jour_node == "Node 3",])
+)
+
+## fitted models
+plot(log(subs) ~ log(price/citations), data = Journals,
+  xlab = "log(price/citations)", ylab = "log(subscriptions)",
+  pch = c(24, 21)[jour_node], bg = hcl(c(0, 240), 50, 70)[jour_node])
+abline(coef(jour_lm2), col = hcl(  0, 80, 30), lty = 5, lwd = 1.7)
+abline(coef(jour_lm3), col = hcl(240, 80, 30), lty = 1, lwd = 1.7)
+legend("bottomleft", c(expression(age > 18), expression(age <= 18)),
+  pch = c(19, 17), lty = c(1, 5), col = hcl(c(240, 0), 80, 30), bty = "n")