|
a |
|
b/R/eif_moderated.R |
|
|
1 |
#' Moderated Statistical Tests for Influence Functions |
|
|
2 |
#' |
|
|
3 |
#' Performs variance shrinkage via application of an empirical Bayes procedure |
|
|
4 |
#' (of LIMMA) on the observed data after a transformation moving the data to |
|
|
5 |
#' influence function space, based on the average treatment effect parameter. |
|
|
6 |
#' |
|
|
7 |
#' @param biotmle \code{biotmle} object as generated by \code{biomarkertmle} |
|
|
8 |
#' @param adjust the multiple testing correction to be applied to p-values that |
|
|
9 |
#' are generated from the moderated tests. The recommended (default) method |
|
|
10 |
#' is that of Benjamini and Hochberg. See \link[limma]{topTable} for a list of |
|
|
11 |
#' appropriate methods. |
|
|
12 |
#' @param pval_type The reference distribution to be used for computing the |
|
|
13 |
#' p-value. Those based on the normal approximation tend to provide misleading |
|
|
14 |
#' inference when working with moderately sized (finite) samples. Use of the |
|
|
15 |
#' logistic distribution has been found to empirically improve performance in |
|
|
16 |
#' settings where multiple hypothesis testing is a concern. |
|
|
17 |
#' @param ... Other arguments passed to \code{\link[limma]{topTable}}. |
|
|
18 |
#' |
|
|
19 |
#' @importFrom methods is |
|
|
20 |
#' @importFrom dplyr "%>%" mutate select |
|
|
21 |
#' @importFrom stats plogis p.adjust |
|
|
22 |
#' @importFrom limma lmFit eBayes topTable |
|
|
23 |
#' @importFrom tibble as_tibble rownames_to_column |
|
|
24 |
#' @importFrom assertthat assert_that |
|
|
25 |
#' |
|
|
26 |
#' @return \code{biotmle} object containing the results of applying both |
|
|
27 |
#' \code{\link[limma]{lmFit}} and \code{\link[limma]{topTable}}. |
|
|
28 |
#' |
|
|
29 |
#' @export modtest_ic |
|
|
30 |
#' |
|
|
31 |
#' @examples |
|
|
32 |
#' library(dplyr) |
|
|
33 |
#' library(biotmleData) |
|
|
34 |
#' library(SuperLearner) |
|
|
35 |
#' library(SummarizedExperiment) |
|
|
36 |
#' data(illuminaData) |
|
|
37 |
#' |
|
|
38 |
#' colData(illuminaData) <- colData(illuminaData) %>% |
|
|
39 |
#' data.frame() %>% |
|
|
40 |
#' dplyr::mutate(age = as.numeric(age > median(age))) %>% |
|
|
41 |
#' DataFrame() |
|
|
42 |
#' benz_idx <- which(names(colData(illuminaData)) %in% "benzene") |
|
|
43 |
#' |
|
|
44 |
#' biomarkerTMLEout <- biomarkertmle( |
|
|
45 |
#' se = illuminaData[1:2, ], |
|
|
46 |
#' varInt = benz_idx, |
|
|
47 |
#' bppar_type = BiocParallel::SerialParam(), |
|
|
48 |
#' g_lib = c("SL.mean", "SL.glm"), |
|
|
49 |
#' Q_lib = c("SL.mean", "SL.glm") |
|
|
50 |
#' ) |
|
|
51 |
#' |
|
|
52 |
#' limmaTMLEout <- modtest_ic(biotmle = biomarkerTMLEout) |
|
|
53 |
modtest_ic <- function(biotmle, |
|
|
54 |
adjust = "BH", |
|
|
55 |
pval_type = c("normal", "logistic"), |
|
|
56 |
...) { |
|
|
57 |
# check for input type and set argument defaults |
|
|
58 |
assertthat::assert_that(is(biotmle, "bioTMLE")) |
|
|
59 |
pval_type <- match.arg(pval_type) |
|
|
60 |
|
|
|
61 |
# extract influence function estimates and number of observations |
|
|
62 |
biomarkertmle_out <- as.matrix(biotmle@tmleOut) |
|
|
63 |
n_obs <- nrow(colData(biotmle)) |
|
|
64 |
|
|
|
65 |
# build design matrix for forcing limma to only perform shrinkage |
|
|
66 |
fit <- limma::lmFit( |
|
|
67 |
object = biomarkertmle_out, |
|
|
68 |
design = rep(1, n_obs) |
|
|
69 |
) |
|
|
70 |
fit <- limma::eBayes(fit = fit) |
|
|
71 |
|
|
|
72 |
# compute inference |
|
|
73 |
tt <- limma::topTable( |
|
|
74 |
fit = fit, |
|
|
75 |
coef = 1, |
|
|
76 |
number = Inf, |
|
|
77 |
adjust.method = adjust, |
|
|
78 |
sort.by = "none", |
|
|
79 |
... |
|
|
80 |
) %>% |
|
|
81 |
tibble::as_tibble(.name_repair = "minimal") %>% |
|
|
82 |
dplyr::mutate( |
|
|
83 |
# remove log-fold change and overwrite average expression with ATE |
|
|
84 |
ID = biotmle@NAMES, |
|
|
85 |
logFC = NULL, |
|
|
86 |
AveExpr = biotmle@ateOut, |
|
|
87 |
var_bayes = fit$s2.post / n_obs, |
|
|
88 |
) %>% |
|
|
89 |
dplyr::select(ID, AveExpr, t, P.Value, adj.P.Val, B, var_bayes) |
|
|
90 |
|
|
|
91 |
# use logistic distribution as reference for p-values by default |
|
|
92 |
if (pval_type == "logistic") { |
|
|
93 |
p_val <- 2 * stats::plogis(-abs(tt$t), location = 0, scale = sqrt(3) / pi) |
|
|
94 |
p_val_adj <- stats::p.adjust(p_val, method = adjust) |
|
|
95 |
tt$P.Value <- p_val |
|
|
96 |
tt$adj.P.Val <- p_val_adj |
|
|
97 |
} |
|
|
98 |
|
|
|
99 |
# store in slot of output object |
|
|
100 |
biotmle@topTable <- tt |
|
|
101 |
return(biotmle) |
|
|
102 |
} |