Diff of /R/eif_moderated.R [000000] .. [efa494]

Switch to side-by-side view

--- a
+++ b/R/eif_moderated.R
@@ -0,0 +1,102 @@
+#' Moderated Statistical Tests for Influence Functions
+#'
+#' Performs variance shrinkage via application of an empirical Bayes procedure
+#' (of LIMMA) on the observed data after a transformation moving the data to
+#' influence function space, based on the average treatment effect parameter.
+#'
+#' @param biotmle \code{biotmle} object as generated by \code{biomarkertmle}
+#' @param adjust the multiple testing correction to be applied to p-values that
+#'  are generated from the moderated tests. The recommended (default) method
+#'  is that of Benjamini and Hochberg. See \link[limma]{topTable} for a list of
+#'  appropriate methods.
+#' @param pval_type The reference distribution to be used for computing the
+#'  p-value. Those based on the normal approximation tend to provide misleading
+#'  inference when working with moderately sized (finite) samples. Use of the
+#'  logistic distribution has been found to empirically improve performance in
+#'  settings where multiple hypothesis testing is a concern.
+#' @param ... Other arguments passed to \code{\link[limma]{topTable}}.
+#'
+#' @importFrom methods is
+#' @importFrom dplyr "%>%" mutate select
+#' @importFrom stats plogis p.adjust
+#' @importFrom limma lmFit eBayes topTable
+#' @importFrom tibble as_tibble rownames_to_column
+#' @importFrom assertthat assert_that
+#'
+#' @return \code{biotmle} object containing the results of applying both
+#'  \code{\link[limma]{lmFit}} and \code{\link[limma]{topTable}}.
+#'
+#' @export modtest_ic
+#'
+#' @examples
+#' library(dplyr)
+#' library(biotmleData)
+#' library(SuperLearner)
+#' library(SummarizedExperiment)
+#' data(illuminaData)
+#'
+#' colData(illuminaData) <- colData(illuminaData) %>%
+#'   data.frame() %>%
+#'   dplyr::mutate(age = as.numeric(age > median(age))) %>%
+#'   DataFrame()
+#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
+#'
+#' biomarkerTMLEout <- biomarkertmle(
+#'   se = illuminaData[1:2, ],
+#'   varInt = benz_idx,
+#'   bppar_type = BiocParallel::SerialParam(),
+#'   g_lib = c("SL.mean", "SL.glm"),
+#'   Q_lib = c("SL.mean", "SL.glm")
+#' )
+#'
+#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
+modtest_ic <- function(biotmle,
+                       adjust = "BH",
+                       pval_type = c("normal", "logistic"),
+                       ...) {
+  # check for input type and set argument defaults
+  assertthat::assert_that(is(biotmle, "bioTMLE"))
+  pval_type <- match.arg(pval_type)
+
+  # extract influence function estimates and number of observations
+  biomarkertmle_out <- as.matrix(biotmle@tmleOut)
+  n_obs <- nrow(colData(biotmle))
+
+  # build design matrix for forcing limma to only perform shrinkage
+  fit <- limma::lmFit(
+    object = biomarkertmle_out,
+    design = rep(1, n_obs)
+  )
+  fit <- limma::eBayes(fit = fit)
+
+  # compute inference
+  tt <- limma::topTable(
+    fit = fit,
+    coef = 1,
+    number = Inf,
+    adjust.method = adjust,
+    sort.by = "none",
+    ...
+  ) %>%
+    tibble::as_tibble(.name_repair = "minimal") %>%
+    dplyr::mutate(
+      # remove log-fold change and overwrite average expression with ATE
+      ID = biotmle@NAMES,
+      logFC = NULL,
+      AveExpr = biotmle@ateOut,
+      var_bayes = fit$s2.post / n_obs,
+    ) %>%
+    dplyr::select(ID, AveExpr, t, P.Value, adj.P.Val, B, var_bayes)
+
+  # use logistic distribution as reference for p-values by default
+  if (pval_type == "logistic") {
+    p_val <- 2 * stats::plogis(-abs(tt$t), location = 0, scale = sqrt(3) / pi)
+    p_val_adj <- stats::p.adjust(p_val, method = adjust)
+    tt$P.Value <- p_val
+    tt$adj.P.Val <- p_val_adj
+  }
+
+  # store in slot of output object
+  biotmle@topTable <- tt
+  return(biotmle)
+}