[d84c2d]: / R / eif_moderated.R

Download this file

103 lines (97 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
#' Moderated Statistical Tests for Influence Functions
#'
#' Performs variance shrinkage via application of an empirical Bayes procedure
#' (of LIMMA) on the observed data after a transformation moving the data to
#' influence function space, based on the average treatment effect parameter.
#'
#' @param biotmle \code{biotmle} object as generated by \code{biomarkertmle}
#' @param adjust the multiple testing correction to be applied to p-values that
#' are generated from the moderated tests. The recommended (default) method
#' is that of Benjamini and Hochberg. See \link[limma]{topTable} for a list of
#' appropriate methods.
#' @param pval_type The reference distribution to be used for computing the
#' p-value. Those based on the normal approximation tend to provide misleading
#' inference when working with moderately sized (finite) samples. Use of the
#' logistic distribution has been found to empirically improve performance in
#' settings where multiple hypothesis testing is a concern.
#' @param ... Other arguments passed to \code{\link[limma]{topTable}}.
#'
#' @importFrom methods is
#' @importFrom dplyr "%>%" mutate select
#' @importFrom stats plogis p.adjust
#' @importFrom limma lmFit eBayes topTable
#' @importFrom tibble as_tibble rownames_to_column
#' @importFrom assertthat assert_that
#'
#' @return \code{biotmle} object containing the results of applying both
#' \code{\link[limma]{lmFit}} and \code{\link[limma]{topTable}}.
#'
#' @export modtest_ic
#'
#' @examples
#' library(dplyr)
#' library(biotmleData)
#' library(SuperLearner)
#' library(SummarizedExperiment)
#' data(illuminaData)
#'
#' colData(illuminaData) <- colData(illuminaData) %>%
#' data.frame() %>%
#' dplyr::mutate(age = as.numeric(age > median(age))) %>%
#' DataFrame()
#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene")
#'
#' biomarkerTMLEout <- biomarkertmle(
#' se = illuminaData[1:2, ],
#' varInt = benz_idx,
#' bppar_type = BiocParallel::SerialParam(),
#' g_lib = c("SL.mean", "SL.glm"),
#' Q_lib = c("SL.mean", "SL.glm")
#' )
#'
#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout)
modtest_ic <- function(biotmle,
adjust = "BH",
pval_type = c("normal", "logistic"),
...) {
# check for input type and set argument defaults
assertthat::assert_that(is(biotmle, "bioTMLE"))
pval_type <- match.arg(pval_type)
# extract influence function estimates and number of observations
biomarkertmle_out <- as.matrix(biotmle@tmleOut)
n_obs <- nrow(colData(biotmle))
# build design matrix for forcing limma to only perform shrinkage
fit <- limma::lmFit(
object = biomarkertmle_out,
design = rep(1, n_obs)
)
fit <- limma::eBayes(fit = fit)
# compute inference
tt <- limma::topTable(
fit = fit,
coef = 1,
number = Inf,
adjust.method = adjust,
sort.by = "none",
...
) %>%
tibble::as_tibble(.name_repair = "minimal") %>%
dplyr::mutate(
# remove log-fold change and overwrite average expression with ATE
ID = biotmle@NAMES,
logFC = NULL,
AveExpr = biotmle@ateOut,
var_bayes = fit$s2.post / n_obs,
) %>%
dplyr::select(ID, AveExpr, t, P.Value, adj.P.Val, B, var_bayes)
# use logistic distribution as reference for p-values by default
if (pval_type == "logistic") {
p_val <- 2 * stats::plogis(-abs(tt$t), location = 0, scale = sqrt(3) / pi)
p_val_adj <- stats::p.adjust(p_val, method = adjust)
tt$P.Value <- p_val
tt$adj.P.Val <- p_val_adj
}
# store in slot of output object
biotmle@topTable <- tt
return(biotmle)
}