outbreaker2 / Git / Diff of /R/outbreaker_chains

Models:
MarcoTheBlack/
outbreaker2
Downloads: 1
Diff of /R/outbreaker_chains_methods.R [000000] .. [dfe06d]
Switch to side-by-side view

--- a
+++ b/R/outbreaker_chains_methods.R
@@ -0,0 +1,486 @@
+#' Basic methods for processing outbreaker results
+#'
+#' Several methods are defined for instances of the class
+#' \code{outbreaker_chains}, returned by \code{\link{outbreaker}}, including:
+#' \code{print}, \code{plot}, \code{summary}
+#'
+#' @rdname outbreaker_chains
+#'
+#' @aliases outbreaker_chains print.outbreaker_chains plot.outbreaker_chains
+#'   summary.outbreaker_chains
+#'
+#' @author Thibaut Jombart (\email{thibautjombart@@gmail.com}).
+#'
+#' @param x an \code{outbreaker_chains} object as returned by \code{outbreaker}.
+#' @param n_row the number of rows to display in head and tail; defaults to 3.
+#' @param n_col the number of columns to display; defaults to 8.
+#' @param ... further arguments to be passed to other methods.
+#'
+#' @export
+#' @importFrom utils head tail
+#'
+print.outbreaker_chains <- function(x, n_row = 3, n_col = 8, ...) {
+  cat("\n\n ///// outbreaker results ///\n")
+  cat("\nclass: ", class(x))
+  cat("\ndimensions", nrow(x), "rows, ", ncol(x), "columns")
+
+  ## process names of variables not shown
+  if (ncol(x) > n_col) {
+    ori_names <- names(x)
+    x <- x[, seq_len(min(n_col, ncol(x)))]
+
+    not_shown <- setdiff(ori_names, names(x))
+
+    alpha_txt <- paste(not_shown[range(grep("alpha", not_shown))], collapse=" - ")
+    t_inf_txt <- paste(not_shown[range(grep("t_inf", not_shown))], collapse=" - ")
+    kappa_txt <- paste(not_shown[range(grep("kappa", not_shown))], collapse=" - ")
+
+    cat("\nancestries not shown:", alpha_txt)
+    cat("\ninfection dates not shown:", t_inf_txt)
+    cat("\nintermediate generations not shown:", kappa_txt)
+  }
+
+  ## heads and tails
+  cat("\n\n/// head //\n")
+  print(head(as.data.frame(x), n_row))
+  cat("\n...")
+  cat("\n/// tail //\n")
+  print(tail(as.data.frame(x), n_row))
+}
+
+
+
+
+
+#' @rdname outbreaker_chains
+#'
+#' @param y a character string indicating which element of an
+#'   \code{outbreaker_chains} object to plot.
+#'
+#' @param type a character string indicating the kind of plot to be used (see details).
+#' 
+#' @param group a vector of character strings indicating the parameters to display, 
+#' or "all" to display all global parameters (non node-specific parameters).
+#'
+#' @param burnin the number of iterations to be discarded as burnin.
+#'
+#' @param min_support a number between 0 and 1 indicating the minimum support of
+#' ancestries to be plotted; only used if 'type' is 'network'.
+#'
+#' @param labels a vector of length N indicating the case labels (must be
+#'   provided in the same order used for dates of symptom onset).
+#'
+## #' @param dens_all a logical indicating if the overal density computed over
+## all runs should be displayed; defaults to TRUE #' @param col the colors to be
+## used for different runs.
+#'
+#' @export
+#'
+#' @seealso See \href{http://www.repidemicsconsortium.org/outbreaker2/articles/introduction.html#graphics}{introduction vignette} for detailed examples on how to visualise \code{outbreaker_chains} objects.
+#'
+#' @details \code{type} indicates the type of graphic to plot:
+#'
+#' \itemize{
+#'
+#' \item \code{trace} to visualise MCMC traces for parameters or augmented data (plots the
+#' log-likelihood by default)
+#'
+#' \item \code{hist} to plot histograms of quantitative values
+#'
+#' \item \code{density} to plot kernel density estimations of quantitative values
+#'
+#' \item \code{alpha} to visualise the posterior frequency of ancestries
+#'
+#' \item \code{network} to visualise the transmission tree; note that
+#'  this opens up an interactive plot and requires a web browser with
+#'  Javascript enabled; the argument `min_support` is useful to select only the
+#'  most supported ancestries and avoid displaying too many links
+#'
+#' \item \code{kappa} to visualise the distributions generations between cases and their
+#' ancestor/infector
+#'
+#' }
+#'
+#' @importFrom ggplot2 ggplot geom_line geom_point geom_histogram geom_density
+#'   geom_violin aes aes_string coord_flip labs guides scale_size_area
+#'   scale_x_discrete scale_y_discrete scale_color_manual scale_fill_manual
+#'   scale_x_continuous scale_y_continuous theme_bw facet_wrap
+#'
+#' @importFrom stats reshape
+#' @importFrom grDevices xyTable
+#' @importFrom graphics plot
+#'
+plot.outbreaker_chains <- function(x, y = "post",
+                                   type = c("trace", "hist", "density",
+                                            "alpha", "t_inf", "kappa", "network"),
+                                   group = NULL, 
+                                   burnin = 0, min_support = 0.1, labels = NULL, ...) {
+
+  ## CHECKS ##
+  type <- match.arg(type)
+  if (!y %in% names(x)) {
+    stop(paste(y,"is not a column of x"))
+  }
+
+  ## THIS IS JUST TO APPEASE R CMD check
+
+  ## hopefully cran will avoid spurious warnings along the lines of "no
+  ## visible binding for global variable" when using ggplot2::aes(...)
+
+  frequency <- NULL
+
+  ## GET DATA TO PLOT ##
+  if (burnin > max(x$step)) {
+    stop("burnin exceeds the number of steps in x")
+  }
+  x <- x[x$step>burnin,,drop = FALSE]
+
+  ## check group
+  if(!is.null(group)) {
+    if(length(group) == 1L && group == "all") {
+      ##remove _[digit] vars
+      y_vars = names(x)[!grepl("(_[[:digit:]]+$)", names(x))]
+    } else if(all(group %in% names(x))) {
+      y_vars = c("step", group)
+    } else {
+      stop("grouping variables not found in outbreaker object")
+    }
+    ## get only relevant data
+    x_sub = as.data.frame(x)[,y_vars]
+    ## switch it to long format to use in ggplot
+    x_long = reshape(x_sub, 
+                     idvar = "step", 
+                     ids = x_sub$step,
+                     direction = "long", 
+                     new.row.names = NULL,
+                     timevar = "Parameters", 
+                     v.names = "y",
+                     varying = list(names(x_sub)[2:ncol(x_sub)]), 
+                     times = names(x_sub)[2:ncol(x_sub)])
+  }
+
+  ## MAKE PLOT ##
+  if (type == "trace") {
+    if (!is.null(group)) {
+      out <- ggplot(x_long) +
+        geom_line(aes_string(x = "step", y = "y")) +
+        scale_x_continuous(name = "Iteration") + 
+        scale_y_continuous(name = NULL) + 
+        facet_wrap(~ Parameters, scales = "free") 
+    } else {
+      out <- ggplot(x) +
+        geom_line(aes_string(x = "step", y = y)) +
+        labs(x = "Iteration",
+             y = y,
+             title = paste("trace:",y))
+    }
+  }
+
+  if (type == "hist") {
+    if (!is.null(group)) {
+      out <- ggplot(x_long) +
+        geom_histogram(aes_string(x = "y")) +
+        geom_point(aes_string(x = "y", y = 0),
+                   shape="|",
+                   alpha = 0.5,
+                   size = 3) +
+        scale_x_continuous(name = NULL) + 
+        scale_y_continuous(name = NULL) + 
+        facet_wrap(~Parameters, scales = "free") 
+    } else {
+      out <- ggplot(x) +
+        geom_histogram(aes_string(x = y)) +
+        geom_point(aes_string(x = y, y = 0),
+                   shape="|",
+                   alpha = 0.5,
+                   size = 3) +
+        labs(x = y,
+             title = paste("histogram:",y))
+    }
+  }
+
+  if (type == "density") {
+    if (!is.null(group)) {
+      out <- ggplot(x_long) +
+        geom_density(aes_string(x = "y")) +
+        geom_point(aes_string(x = "y", y = 0),
+                   shape="|",
+                   alpha = 0.5,
+                   size = 3) +
+        scale_x_continuous(name = NULL) + 
+        scale_y_continuous(name = NULL) + 
+        facet_wrap(~Parameters, scales = "free") 
+    } else {
+      out <- ggplot(x) +
+        geom_density(aes_string(x = y)) +
+        geom_point(aes_string(x = y, y = 0),
+                   shape="|",
+                   alpha = 0.5,
+                   size = 3) +
+        labs(x = y,
+             title = paste("density:",y))
+    }
+  }
+
+  if (type == "alpha") {
+    alpha <- as.matrix(x[,grep("alpha", names(x))])
+    colnames(alpha) <- seq_len(ncol(alpha))
+    from <- as.vector(alpha)
+    to <- as.vector(col(alpha))
+    from[is.na(from)] <- 0
+    out_dat <- data.frame(xyTable(from,to))
+    names(out_dat) <- c("from", "to", "frequency")
+    ## Calculate proportion among ancestries
+    get_prop <- function(i) {
+        ind <- which(out_dat$to == out_dat$to[i])
+        out_dat[[3]][i]/sum(out_dat[[3]][ind])
+    }
+    ## Return labels, if provided
+    get_alpha_lab <- function(axis, labels = NULL) {
+      if(is.null(labels)) labels <- seq_len(ncol(alpha))
+      if(axis == 'x') return(labels) else
+      if(axis == 'y') return(c("Import", labels))
+    }
+    ## Return custom colors if provided
+    get_alpha_color <- function(color = NULL) {
+      if(is.null(color)) return(NULL)
+      else return(scale_color_manual(values = color))
+    }
+    ## This joining function is needed so that the '...' argument can be passed
+    ## to two functions with different arguments
+    get_lab_color <- function(labels = NULL, color = NULL) {
+      list(alpha_lab_x = get_alpha_lab('x', labels),
+           alpha_lab_y = get_alpha_lab('y', labels),
+           alpha_color = get_alpha_color(color))
+    }
+
+    tmp <- get_lab_color(labels, ...)
+    out_dat[3] <- vapply(seq_along(out_dat[[3]]), get_prop, 1)
+    out_dat$from <- factor(out_dat$from, levels = c(0, sort(unique(out_dat$to))))
+    out_dat$to <- factor(out_dat$to, levels = sort(unique(out_dat$to)))
+    out <- ggplot(out_dat) +
+      geom_point(aes(x = to, y = from, size = frequency, color = to)) +
+      scale_x_discrete(drop = FALSE, labels = tmp$alpha_lab_x) +
+      scale_y_discrete(drop = FALSE, labels = tmp$alpha_lab_y) +
+      labs(x = 'To', y = 'From', size = 'Posterior\nfrequency') +
+      tmp$alpha_color +
+      scale_size_area() +
+      guides(colour = "none")
+  }
+
+  if (type == "t_inf") {
+    get_t_inf_lab <- function(labels = NULL) {
+      N <- ncol(t_inf)
+      if(is.null(labels)) labels <- 1:N
+      return(labels)
+    }
+    ## Return custom colors if provided
+    get_t_inf_color <- function(color = NULL) {
+      if(is.null(color)) return(NULL)
+      else return(scale_fill_manual(values = color))
+    }
+    ## This joining function is needed so that the '...' argument can be passed
+    ## to two functions with different arguments
+    get_lab_color <- function(labels = NULL, color = NULL) {
+      list(t_inf_lab_x = get_t_inf_lab(labels),
+           t_inf_color = get_t_inf_color(color))
+    }
+
+    t_inf <- as.matrix(x[,grep("t_inf", names(x))])
+    tmp <- get_lab_color(labels, ...)
+    dates <- as.vector(t_inf)
+    cases <- as.vector(col(t_inf))
+    out_dat <- data.frame(cases = factor(cases), dates = dates)
+    out <- ggplot(out_dat) +
+      geom_violin(aes(x = cases, y = dates, fill = cases)) +
+      coord_flip() + 
+      guides(fill = "none") +
+      labs(y = 'Infection time', x = NULL) +
+      tmp$t_inf_color +
+      scale_x_discrete(labels = tmp$t_inf_lab)
+  }
+
+  if (type == "kappa") {
+    get_kappa_lab <- function(labels = NULL) {
+      N <- ncol(kappa)
+      if(is.null(labels)) labels <- 1:N
+      return(labels)
+    }
+    kappa <- as.matrix(x[,grep("kappa", names(x))])
+    generations <- as.vector(kappa)
+    cases <- as.vector(col(kappa))
+    to_keep <- !is.na(generations)
+    generations <- generations[to_keep]
+    cases <- cases[to_keep]
+    out_dat <- data.frame(xyTable(generations, cases))
+    get_prop <- function(i) {
+        ind <- which(out_dat$y == out_dat$y[i])
+        out_dat[[3]][i]/sum(out_dat[[3]][ind])
+    }
+    out_dat[3] <- vapply(seq_along(out_dat[[3]]), get_prop, 1)
+    names(out_dat) <- c("generations", "cases", "frequency")
+    out <- ggplot(out_dat) +
+      geom_point(aes(x = generations, y = as.factor(cases), size = frequency, color = factor(cases))) +
+      scale_size_area() +
+      scale_y_discrete(labels = get_kappa_lab(labels)) +
+      guides(colour = "none") +
+      labs(title = "number of generations between cases",
+           x = "number of generations to ancestor",
+           y = NULL)
+  }
+
+  if (type == "network") {
+    ## extract edge info: ancestries
+    alpha <- x[, grep("alpha",names(x)), drop = FALSE]
+    from <- unlist(alpha)
+    to <- as.vector(col(alpha))
+    N <- ncol(alpha)
+    edges <- stats::na.omit(data.frame(xyTable(from, to)))
+    edges[3] <- edges$number/nrow(alpha)
+    names(edges) <- c("from", "to", "value")
+    edges <- edges[edges$value > min_support,,drop = FALSE]
+    edges$arrows <- "to"
+    case_cols <- cases_pal(N)
+    edges$color <- case_cols[edges$from]
+
+
+    ## ## extract edge info: timing
+    ## t_inf <- x[, grep("t_inf",names(x)), drop = FALSE]
+    ## mean_time <- apply(t_inf, 2, mean)
+    ## mean_delay <- mean_time[edges$to] - mean_time[edges$from]
+    ## mean_delay[mean_delay<1] <- 1
+    ## edges$label <- paste(round(mean_delay), "days")
+
+    ## node info
+    find_nodes_size <- function(i) {
+      sum(from==i, na.rm = TRUE) / nrow(alpha)
+    }
+    get_node_lab <- function(labels = NULL) {
+      if(is.null(labels)) labels <- 1:N
+      return(labels)
+    }
+    nodes <- data.frame(id = seq_len(ncol(alpha)),
+                        label = seq_len(ncol(alpha)))
+    nodes$value <- vapply(nodes$id,
+                          find_nodes_size,
+                          numeric(1))
+    nodes$color <- case_cols
+    nodes$shape <- rep("dot", N)
+    nodes$label <- get_node_lab(labels)
+
+    smry <- summary(x, burnin = burnin)
+    is_imported <- is.na(smry$tree$from)
+    nodes$shaped[is_imported] <- "star"
+
+    ## generate graph
+    out <- visNetwork::visNetwork(nodes = nodes, edges = edges, ...)
+    out <- visNetwork::visNodes(out, shadow = list(enabled = TRUE, size = 10),
+                                color = list(highlight = "red"))
+    out <- visNetwork::visEdges(out, arrows = list(
+      to = list(enabled = TRUE, scaleFactor = 0.2)),
+      color = list(highlight = "red"))
+
+  }
+
+  return(out)
+}
+
+
+
+
+#' @rdname outbreaker_chains
+#' @param object an \code{outbreaker_chains} object as returned by \code{outbreaker}.
+#'
+#' @param method the method used to determine consensus ancestries. 'mpa'
+#'   (maximum posterior ancestry) simply returns the posterior ancestry with the
+#'   highest posterior support for each case, even if this includes
+#'   cycles. 'decycle' will return the maximum posterior ancestry, except when
+#'   cycles are detected, in which case the link in the cycle with the lowest
+#'   support is pruned and the tree recalculated.
+#'
+#' @export
+#' @importFrom stats median
+summary.outbreaker_chains <- function(object, burnin = 0, method = c("mpa", "decycle"), ...) {
+  ## check burnin ##
+  x <- object
+  if (burnin > max(x$step)) {
+    stop("burnin exceeds the number of steps in object")
+  }
+  x <- x[x$step>burnin,,drop = FALSE]
+
+
+  ## make output ##
+  out <- list()
+
+
+  ## summary for $step ##
+  interv <- ifelse(nrow(x)>2, diff(tail(x$step, 2)), NA)
+  out$step <- c(first = min(x$step),
+                last = max(x$step),
+                interval = interv,
+                n_steps = length(x$step)
+                )
+
+
+  ## summary of post, like, prior ##
+  out$post <- summary(x$post)
+  out$like <- summary(x$like)
+  out$prior <- summary(x$prior)
+
+
+  ## summary for mu ##
+  out$mu <- summary(x$mu)
+
+  ## summary for pi ##
+  out$pi <- summary(x$pi)
+
+
+  ## summary tree ##
+  out$tree <- list()
+
+  ## summary of alpha ##
+  alpha <- as.matrix(x[,grep("alpha", names(x))])
+
+  method <- match.arg(method)
+
+  if(method == 'mpa') {
+
+    ## function to get most frequent item
+    f1 <- function(x) {
+      as.integer(names(sort(table(x, exclude = NULL), decreasing = TRUE)[1]))
+    }
+    out$tree$from <- apply(alpha, 2, f1)
+    out$tree$to <- seq_len(ncol(alpha))
+
+    ## function to get frequency of most frequent item
+    f2 <- function(x) {
+      (sort(table(x), decreasing = TRUE)/length(x))[1]
+    }
+    support <- apply(alpha, 2, f2)
+
+  } else if(method == 'decycle') {
+
+    cons <- .decycle_tree(x)
+    out$tree$from <- cons$from
+    out$tree$to <- cons$to
+    support <- cons$support
+
+  }
+
+  ## summary of t_inf ##
+  t_inf <- as.matrix(x[,grep("t_inf", names(x))])
+  out$tree$time <- apply(t_inf, 2, median)
+
+  out$tree$support <- support
+
+  ## summary of kappa ##
+  kappa <- as.matrix(x[,grep("kappa", names(x))])
+  out$tree$generations <- apply(kappa, 2, median, na.rm = TRUE)
+  out$tree$generations[is.na(out$tree$from)] <- NA
+
+  ## shape tree as a data.frame
+  out$tree <- as.data.frame(out$tree)
+  rownames(out$tree) <- NULL
+
+  return(out)
+}