a b/R/eif_moderated.R
1
#' Moderated Statistical Tests for Influence Functions
2
#'
3
#' Performs variance shrinkage via application of an empirical Bayes procedure
4
#' (of LIMMA) on the observed data after a transformation moving the data to
5
#' influence function space, based on the average treatment effect parameter.
6
#'
7
#' @param biotmle \code{biotmle} object as generated by \code{biomarkertmle}
8
#' @param adjust the multiple testing correction to be applied to p-values that
9
#'  are generated from the moderated tests. The recommended (default) method
10
#'  is that of Benjamini and Hochberg. See \link[limma]{topTable} for a list of
11
#'  appropriate methods.
12
#' @param pval_type The reference distribution to be used for computing the
13
#'  p-value. Those based on the normal approximation tend to provide misleading
14
#'  inference when working with moderately sized (finite) samples. Use of the
15
#'  logistic distribution has been found to empirically improve performance in
16
#'  settings where multiple hypothesis testing is a concern.
17
#' @param ... Other arguments passed to \code{\link[limma]{topTable}}.
18
#'
19
#' @importFrom methods is
20
#' @importFrom dplyr "%>%" mutate select
21
#' @importFrom stats plogis p.adjust
22
#' @importFrom limma lmFit eBayes topTable
23
#' @importFrom tibble as_tibble rownames_to_column
24
#' @importFrom assertthat assert_that
25
#'
26
#' @return \code{biotmle} object containing the results of applying both
27
#'  \code{\link[limma]{lmFit}} and \code{\link[limma]{topTable}}.
28
#'
29
#' @export modtest_ic
30
#'
31
#' @examples
32
#' library(dplyr)
33
#' library(biotmleData)
34
#' library(SuperLearner)
35
#' library(SummarizedExperiment)
36
#' data(illuminaData)
37
#'
38
#' colData(illuminaData) <- colData(illuminaData) %>%
39
#'   data.frame() %>%
40
#'   dplyr::mutate(age = as.numeric(age > median(age))) %>%
41
#'   DataFrame()
42
#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
43
#'
44
#' biomarkerTMLEout <- biomarkertmle(
45
#'   se = illuminaData[1:2, ],
46
#'   varInt = benz_idx,
47
#'   bppar_type = BiocParallel::SerialParam(),
48
#'   g_lib = c("SL.mean", "SL.glm"),
49
#'   Q_lib = c("SL.mean", "SL.glm")
50
#' )
51
#'
52
#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
53
modtest_ic <- function(biotmle,
54
                       adjust = "BH",
55
                       pval_type = c("normal", "logistic"),
56
                       ...) {
57
  # check for input type and set argument defaults
58
  assertthat::assert_that(is(biotmle, "bioTMLE"))
59
  pval_type <- match.arg(pval_type)
60
61
  # extract influence function estimates and number of observations
62
  biomarkertmle_out <- as.matrix(biotmle@tmleOut)
63
  n_obs <- nrow(colData(biotmle))
64
65
  # build design matrix for forcing limma to only perform shrinkage
66
  fit <- limma::lmFit(
67
    object = biomarkertmle_out,
68
    design = rep(1, n_obs)
69
  )
70
  fit <- limma::eBayes(fit = fit)
71
72
  # compute inference
73
  tt <- limma::topTable(
74
    fit = fit,
75
    coef = 1,
76
    number = Inf,
77
    adjust.method = adjust,
78
    sort.by = "none",
79
    ...
80
  ) %>%
81
    tibble::as_tibble(.name_repair = "minimal") %>%
82
    dplyr::mutate(
83
      # remove log-fold change and overwrite average expression with ATE
84
      ID = biotmle@NAMES,
85
      logFC = NULL,
86
      AveExpr = biotmle@ateOut,
87
      var_bayes = fit$s2.post / n_obs,
88
    ) %>%
89
    dplyr::select(ID, AveExpr, t, P.Value, adj.P.Val, B, var_bayes)
90
91
  # use logistic distribution as reference for p-values by default
92
  if (pval_type == "logistic") {
93
    p_val <- 2 * stats::plogis(-abs(tt$t), location = 0, scale = sqrt(3) / pi)
94
    p_val_adj <- stats::p.adjust(p_val, method = adjust)
95
    tt$P.Value <- p_val
96
    tt$adj.P.Val <- p_val_adj
97
  }
98
99
  # store in slot of output object
100
  biotmle@topTable <- tt
101
  return(biotmle)
102
}