biotmle / Git / Diff of /R/plots.R

Models:
DanielG/
biotmle
Downloads: 3
Diff of /R/plots.R [000000] .. [efa494]
Switch to side-by-side view

--- a
+++ b/R/plots.R
@@ -0,0 +1,267 @@
+#' Plot p-values from moderated statistical tests for class biotmle
+#'
+#' Histogram of raw or FDR-adjusted p-values from the moderated t-test.
+#'
+#' @param x object of class \code{biotmle} as produced by an appropriate call
+#'  to \code{biomarkertmle}
+#' @param type character describing whether to provide a plot of unadjusted or
+#'  adjusted p-values (adjustment performed via Benjamini-Hochberg)
+#' @param ... additional arguments passed \code{plot} as necessary
+#'
+#' @importFrom ggplot2 ggplot aes geom_histogram guides guide_legend xlab ylab
+#'  ggtitle theme_bw
+#'
+#' @return object of class \code{ggplot} containing a histogram of the raw or
+#'  Benjamini-Hochberg corrected p-values (depending on user input).
+#'
+#' @export
+#'
+#' @method plot bioTMLE
+#'
+#' @examples
+#' \dontrun{
+#' library(dplyr)
+#' library(biotmleData)
+#' library(SuperLearner)
+#' library(SummarizedExperiment)
+#' data(illuminaData)
+#'
+#' colData(illuminaData) <- colData(illuminaData) %>%
+#'   data.frame() %>%
+#'   mutate(age = as.numeric(age > median(age))) %>%
+#'   DataFrame()
+#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
+#'
+#' biomarkerTMLEout <- biomarkertmle(
+#'   se = illuminaData,
+#'   varInt = benz_idx,
+#'   bppar_type = BiocParallel::SerialParam(),
+#'   g_lib = c("SL.mean", "SL.glm"),
+#'   Q_lib = c("SL.mean", "SL.glm")
+#' )
+#'
+#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
+#'
+#' plot(x = limmaTMLEout, type = "pvals_adj")
+#' }
+plot.bioTMLE <- function(x, ..., type = "pvals_adj") {
+  if (type == "pvals_raw") {
+    p <- ggplot2::ggplot(x@topTable, ggplot2::aes(P.Value)) +
+      ggplot2::geom_histogram(ggplot2::aes(
+        y = ..count..,
+      ), colour = "white", na.rm = TRUE, binwidth = 0.025) +
+      ggplot2::ggtitle("Histogram of raw p-values") +
+      ggplot2::xlab("magnitude of raw p-values")
+  } else if (type == "pvals_adj") {
+    p <- ggplot2::ggplot(
+      as.data.frame(x@topTable),
+      ggplot2::aes(adj.P.Val)
+    ) +
+      ggplot2::geom_histogram(ggplot2::aes(
+        y = ..count..,
+      ), colour = "white", na.rm = TRUE, binwidth = 0.025) +
+      ggplot2::ggtitle("Histogram of BH-corrected FDR p-values") +
+      ggplot2::xlab("magnitude of BH-corrected p-values")
+  }
+  p <- p +
+    ggplot2::guides(fill = ggplot2::guide_legend(title = NULL)) +
+    ggplot2::theme_bw() +
+    ggplot2::theme(legend.position = NULL)
+  return(p)
+}
+
+################################################################################
+
+#' Volcano plot for class biotmle
+#'
+#' Volcano plot of the log-changes in the target causal paramter against the
+#' log raw p-values from the moderated t-test.
+#'
+#' @param biotmle object of class \code{biotmle} as produced by an appropriate
+#'  call to \code{biomarkertmle}
+#' @param ate_bound A \code{numeric} indicating the highest magnitude of the
+#'  average treatment effect to be colored on the x-axis of the volcano plot;
+#'  this limits the observations to be considered differentially expressed to
+#'  those in a user-specified interval.
+#' @param pval_bound A \code{numeric} indicating the largest corrected p-value
+#'  to be colored on the y-axis of the volcano plot; this limits observations
+#'  considered as differentially expressed to those in a user-specified
+#'  interval.
+#'
+#' @importFrom dplyr "%>%" arrange mutate select filter
+#' @importFrom ggplot2 ggplot aes geom_point guides guide_legend xlab ylab
+#'  ggtitle theme_bw
+#' @importFrom ggsci scale_fill_gsea
+#' @importFrom stats quantile
+#' @importFrom assertthat assert_that
+#' @importFrom methods is
+#'
+#' @return object of class \code{ggplot} containing a standard volcano plot of
+#'  the log-fold change in the causal target parameter against the raw log
+#'  p-value computed from the moderated tests in \code{modtest_ic}.
+#'
+#' @export volcano_ic
+#'
+#' @examples
+#' \dontrun{
+#' library(dplyr)
+#' library(biotmleData)
+#' library(SuperLearner)
+#' library(SummarizedExperiment)
+#' data(illuminaData)
+#'
+#' colData(illuminaData) <- colData(illuminaData) %>%
+#'   data.frame() %>%
+#'   mutate(age = as.numeric(age > median(age))) %>%
+#'   DataFrame()
+#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
+#'
+#' biomarkerTMLEout <- biomarkertmle(
+#'   se = illuminaData,
+#'   varInt = benz_idx,
+#'   bppar_type = BiocParallel::SerialParam(),
+#'   g_lib = c("SL.mean", "SL.glm"),
+#'   Q_lib = c("SL.mean", "SL.glm")
+#' )
+#'
+#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
+#'
+#' volcano_ic(biotmle = limmaTMLEout)
+#' }
+volcano_ic <- function(biotmle, ate_bound = 1.0, pval_bound = 0.2) {
+  # check class since not a generic method
+  assertthat::assert_that(is(biotmle, "bioTMLE"))
+
+  tt_volcano <- biotmle@topTable %>%
+    dplyr::arrange(adj.P.Val) %>%
+    dplyr::mutate(
+      AveExpr = I(AveExpr),
+      logPval = -log10(P.Value),
+      color = ifelse((AveExpr > ate_bound) & (adj.P.Val < pval_bound), "1",
+        ifelse((AveExpr < -ate_bound) & (adj.P.Val < pval_bound),
+          "-1", "0"
+        )
+      )
+    ) %>%
+    dplyr::select(which(colnames(.) %in% c("AveExpr", "logPval", "color"))) %>%
+    dplyr::filter((AveExpr > stats::quantile(AveExpr, probs = 0.05)) &
+      AveExpr < stats::quantile(AveExpr, probs = 0.95))
+
+  p <- ggplot2::ggplot(tt_volcano, ggplot2::aes(x = AveExpr, y = logPval)) +
+    ggplot2::geom_point(aes(colour = color)) +
+    ggplot2::xlab("Average Treatment Effect") +
+    ggplot2::ylab("-log10(raw p-value)") +
+    ggplot2::ggtitle("Volcano Plot: Average Treatment Effect") +
+    ggsci::scale_fill_gsea() +
+    ggplot2::guides(color = ggplot2::guide_legend(title = NULL)) +
+    ggplot2::theme_bw()
+  return(p)
+}
+
+################################################################################
+
+utils::globalVariables(c(
+  "adj.P.Val", ".", "..count..", "P.Value", "color",
+  "AveExpr", "logPval"
+))
+
+#' Heatmap for class biotmle
+#'
+#' Heatmap of contributions of a select subset of biomarkers to the variable
+#' importance measure changes as assessed by influence curve-based estimation,
+#' across all subjects. The heatmap produced performs supervised clustering, as
+#' per Pollard & van der Laan (2008) <doi:10.2202/1544-6115.1404>.
+#'
+#' @param x Object of class \code{biotmle} as produced by an appropriate call
+#'  to \code{biomarkertmle}.
+#' @param design A vector giving the contrast to be displayed in the heatmap.
+#' @param FDRcutoff Cutoff to be used in controlling the False Discovery Rate.
+#' @param type A \code{character} describing whether to plot only a top number
+#'  (as defined by FDR-corrected p-value) of biomarkers or all biomarkers.
+#' @param top Number of identified biomarkers to plot in the heatmap.
+#' @param ... additional arguments passed to \code{superheat::superheat} as
+#'  necessary
+#'
+#' @importFrom dplyr "%>%" arrange filter slice
+#' @importFrom superheat superheat
+#' @importFrom assertthat assert_that
+#' @importFrom methods is
+#'
+#' @return heatmap (from \pkg{superheat}) using hierarchical clustering to plot
+#'  the changes in the variable importance measure for all subjects across a
+#'  specified top number of biomarkers.
+#'
+#' @export heatmap_ic
+#'
+#' @examples
+#' \dontrun{
+#' library(dplyr)
+#' library(biotmleData)
+#' library(SummarizedExperiment)
+#' data(illuminaData)
+#'
+#' colData(illuminaData) <- colData(illuminaData) %>%
+#'   data.frame() %>%
+#'   mutate(age = as.numeric(age > median(age))) %>%
+#'   DataFrame()
+#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
+#'
+#' biomarkerTMLEout <- biomarkertmle(
+#'   se = illuminaData,
+#'   varInt = benz_idx,
+#'   bppar_type = BiocParallel::SerialParam(),
+#'   g_lib = c("SL.mean", "SL.glm"),
+#'   Q_lib = c("SL.mean", "SL.glm")
+#' )
+#'
+#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
+#'
+#' heatmap_ic(x = limmaTMLEout, design = design, FDRcutoff = 0.05, top = 10)
+#' }
+heatmap_ic <- function(x, ..., design, FDRcutoff = 0.25,
+                       type = c("top", "all"), top = 25) {
+  # check class since not a generic method
+  assertthat::assert_that(is(x, "bioTMLE"))
+  type <- match.arg(type)
+
+  if (type == "top") {
+    topbiomarkersFDR <- x@topTable %>%
+      dplyr::filter(adj.P.Val < FDRcutoff) %>%
+      dplyr::arrange(adj.P.Val) %>%
+      dplyr::slice(seq_len(top))
+
+    if (nrow(topbiomarkersFDR) < top) {
+      message(paste(top, "biomarkers not found below specified FDR cutoff."))
+    }
+
+    if (any(class(x@tmleOut) %in% "EList")) {
+      biomarkerTMLEout_top <- x@tmleOut$E %>%
+        data.frame() %>%
+        dplyr::filter(rownames(x) %in% topbiomarkersFDR$ID)
+    } else {
+      biomarkerTMLEout_top <- x@tmleOut %>%
+        dplyr::filter(rownames(x) %in% topbiomarkersFDR$ID)
+    }
+    plot_title <- paste("Supervised Heatmap of Top", top, "Biomarkers")
+  } else {
+    if (any(class(x@tmleOut) %in% "EList")) {
+      biomarkerTMLEout_top <- x@tmleOut$E %>%
+        as.data.frame()
+    } else {
+      biomarkerTMLEout_top <- x@tmleOut
+    }
+    plot_title <- "Heatmap of Biomarkers with Supervised Clustering"
+  }
+
+  # group labels
+  annot <- ifelse(design == 0, "Control", "Treated")
+
+  # build supervised heatmap
+  superheat::superheat(as.matrix(biomarkerTMLEout_top),
+    grid.hline.col = "white", force.grid.hline = TRUE,
+    grid.vline.col = "white", force.grid.vline = TRUE,
+    membership.cols = annot,
+    title = plot_title,
+    ...
+  )
+}