|
a |
|
b/R/enrichment_functions.R |
|
|
1 |
#' Calculate pathway enrichment for multiple omics layer. |
|
|
2 |
#' |
|
|
3 |
#' This function calculates GSEA-based enrichments scores for multiple omics |
|
|
4 |
#' layer at once. Input pathways or gene sets have to be prepared in advance by |
|
|
5 |
#' means of the function \code{\link[multiGSEA]{initOmicsDataStructure}}. The function uses pre- |
|
|
6 |
#' ranked lists for each omics layer to calculate the enrichment score. The |
|
|
7 |
#' ranking can be calculated by means of the function |
|
|
8 |
#' \link[multiGSEA]{rankFeatures}. |
|
|
9 |
#' |
|
|
10 |
#' @param pathways Nested list containing all pathway features for the |
|
|
11 |
#' respective omics layer. |
|
|
12 |
#' @param ranks Nested list containing the measured and pre-ranked features for |
|
|
13 |
#' each omics layer. |
|
|
14 |
#' @param eps This parameter sets the boundary for calculating the p value. |
|
|
15 |
#' |
|
|
16 |
#' @return Nested list containing the enrichment scores for each given pathway |
|
|
17 |
#' and omics layer. |
|
|
18 |
#' |
|
|
19 |
#' @examples |
|
|
20 |
#' |
|
|
21 |
#' # Download pathway definition and extract features |
|
|
22 |
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome")) |
|
|
23 |
#' |
|
|
24 |
#' # load omics data and calculate ranks |
|
|
25 |
#' data(transcriptome) |
|
|
26 |
#' data(proteome) |
|
|
27 |
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome")) |
|
|
28 |
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue) |
|
|
29 |
#' names(ranks$transcriptome) <- transcriptome$Symbol |
|
|
30 |
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue) |
|
|
31 |
#' names(ranks$proteome) <- proteome$Symbol |
|
|
32 |
#' |
|
|
33 |
#' ## run the enrichment |
|
|
34 |
#' multiGSEA(pathways, ranks) |
|
|
35 |
#' @importFrom fgsea fgseaMultilevel |
|
|
36 |
#' |
|
|
37 |
#' @export |
|
|
38 |
multiGSEA <- function(pathways, ranks, eps = 0) { |
|
|
39 |
# Go through all omics layer. |
|
|
40 |
es <- lapply(names(pathways), function(omics) { |
|
|
41 |
fgsea::fgseaMultilevel(pathways[[omics]], ranks[[omics]], eps = eps) |
|
|
42 |
}) |
|
|
43 |
|
|
|
44 |
names(es) <- names(pathways) |
|
|
45 |
|
|
|
46 |
return(es) |
|
|
47 |
} |
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
#' Create a reshaped data frame from multiGSEA output. |
|
|
52 |
#' |
|
|
53 |
#' This function reshapes the output from multiGSEA to get a single data frame |
|
|
54 |
#' with columns for p-values and adjusted p-values for each omics layer. Each |
|
|
55 |
#' row of the data frame represents one pathway. |
|
|
56 |
#' |
|
|
57 |
#' @param enrichmentScores Nested List of enrichment scores, calculated by |
|
|
58 |
#' multiGSEA function. |
|
|
59 |
#' @param pathwayNames List containing Pathway names. |
|
|
60 |
#' |
|
|
61 |
#' @return Data frame where rows are pathways and columns are (adjusted) |
|
|
62 |
#' p-values for each omics layer. |
|
|
63 |
#' |
|
|
64 |
#' @examples |
|
|
65 |
#' # Download pathway definition and extract features |
|
|
66 |
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome")) |
|
|
67 |
#' |
|
|
68 |
#' # load omics data and calculate ranks |
|
|
69 |
#' data(transcriptome) |
|
|
70 |
#' data(proteome) |
|
|
71 |
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome")) |
|
|
72 |
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue) |
|
|
73 |
#' names(ranks$transcriptome) <- transcriptome$Symbol |
|
|
74 |
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue) |
|
|
75 |
#' names(ranks$proteome) <- proteome$Symbol |
|
|
76 |
#' |
|
|
77 |
#' # run the enrichment |
|
|
78 |
#' es <- multiGSEA(pathways, ranks) |
|
|
79 |
#' |
|
|
80 |
#' extractPvalues( |
|
|
81 |
#' enrichmentScores = es, |
|
|
82 |
#' pathwayNames = names(pathways[[1]]) |
|
|
83 |
#' ) |
|
|
84 |
#' @export |
|
|
85 |
extractPvalues <- function(enrichmentScores, pathwayNames) { |
|
|
86 |
# Go through all the pathways |
|
|
87 |
res <- lapply(pathwayNames, function(name) { |
|
|
88 |
# Go through all the possible omics layer |
|
|
89 |
unlist(lapply(names(enrichmentScores), function(y) { |
|
|
90 |
df <- enrichmentScores[[y]][which(enrichmentScores[[y]]$pathway == name), c(2, 3)] |
|
|
91 |
if (nrow(df) == 0) { |
|
|
92 |
df <- data.frame(pval = NA, padj = NA) |
|
|
93 |
} |
|
|
94 |
names(df) <- paste0(y, "_", names(df)) |
|
|
95 |
df |
|
|
96 |
})) |
|
|
97 |
}) |
|
|
98 |
|
|
|
99 |
# Combine list elements to data frame |
|
|
100 |
# and assign pathway names as rownames |
|
|
101 |
res <- data.frame(do.call(rbind, res)) |
|
|
102 |
|
|
|
103 |
return(res) |
|
|
104 |
} |
|
|
105 |
|
|
|
106 |
|
|
|
107 |
|
|
|
108 |
#' Calculate a combined p-value for multiple omics layer. |
|
|
109 |
#' |
|
|
110 |
#' This function applies the Stouffer method, the Edgington method or the |
|
|
111 |
#' Fisher\'s combined probability test to combine p-values of independent tests |
|
|
112 |
#' that are based on the same null hypothesis. The Stouffer method can also be |
|
|
113 |
#' applied in a weighted fashion. |
|
|
114 |
#' |
|
|
115 |
#' @param df Data frame where rows represent a certain pathway or gene set and |
|
|
116 |
#' columns represent p-values derived from independent tests, e.g., different |
|
|
117 |
#' omics layer. |
|
|
118 |
#' @param method String that specifies the method to combine multiple p-values. |
|
|
119 |
#' Default: "stouffer" Options: "stouffer", "fisher", "edgington" |
|
|
120 |
#' @param col_pattern String of the pattern that specifies the columns to be |
|
|
121 |
#' combined. Default: "pval", Options: "pval", "padj" (legacy) |
|
|
122 |
#' @param weights List of weights that will be used in a weighted Stouffer |
|
|
123 |
#' method. |
|
|
124 |
#' |
|
|
125 |
#' @return Vector of length \code{nrow(df)} with combined p-values. |
|
|
126 |
#' |
|
|
127 |
#' @examples |
|
|
128 |
#' df <- cbind(runif(5), runif(5), runif(5)) |
|
|
129 |
#' colnames(df) <- c("trans.pval", "prot.pval", "meta.pval") |
|
|
130 |
#' |
|
|
131 |
#' # run the unweighted summation of z values |
|
|
132 |
#' combinePvalues(df) |
|
|
133 |
#' |
|
|
134 |
#' # run the weighted variant |
|
|
135 |
#' combinePvalues(df, weights = c(10, 5, 1)) |
|
|
136 |
#' |
|
|
137 |
#' # run the Fisher's combined probability test |
|
|
138 |
#' combinePvalues(df, method = "fisher") |
|
|
139 |
#' |
|
|
140 |
#' # run the Edgington's method |
|
|
141 |
#' combinePvalues(df, method = "edgington") |
|
|
142 |
#' @importFrom metap sumz sumlog sump |
|
|
143 |
#' |
|
|
144 |
#' @export |
|
|
145 |
combinePvalues <- function(df, method = "stouffer", col_pattern = "pval", weights = NULL) { |
|
|
146 |
method <- tolower(method) |
|
|
147 |
if (!method %in% c("stouffer", "fisher", "edgington")) { |
|
|
148 |
stop("You can chose between the 'stouffer', 'edgington', |
|
|
149 |
and 'fisher' method to combine p-values.", |
|
|
150 |
call. = FALSE |
|
|
151 |
) |
|
|
152 |
} |
|
|
153 |
|
|
|
154 |
if (!col_pattern %in% c("pval", "padj")) { |
|
|
155 |
stop("You can chose between 'pval' and 'padj' (legacy), |
|
|
156 |
to select columns for combining p-values.", |
|
|
157 |
call. = FALSE |
|
|
158 |
) |
|
|
159 |
} |
|
|
160 |
|
|
|
161 |
cols <- grep(col_pattern, colnames(df)) |
|
|
162 |
|
|
|
163 |
pvals <- apply(df[,cols], 1, function(row) { |
|
|
164 |
row <- row[!is.na(row)] |
|
|
165 |
|
|
|
166 |
if (length(row) >= 2) { |
|
|
167 |
if (method == "fisher") { |
|
|
168 |
p <- metap::sumlog(row) |
|
|
169 |
p$p |
|
|
170 |
} else if (method == "edgington") { |
|
|
171 |
p <- metap::sump(row) |
|
|
172 |
p$p |
|
|
173 |
} else { |
|
|
174 |
## sumz allows only p-values smaller than 1 |
|
|
175 |
row <- row[row > 0 & row < 1] |
|
|
176 |
|
|
|
177 |
if (length(row) >= 2) { |
|
|
178 |
if (length(weights) > 0) { |
|
|
179 |
p <- metap::sumz(row, weights = weights) |
|
|
180 |
} else { |
|
|
181 |
p <- metap::sumz(row) |
|
|
182 |
} |
|
|
183 |
p$p |
|
|
184 |
} else if (length(row == 1)) { |
|
|
185 |
row[1] |
|
|
186 |
} else { |
|
|
187 |
NA |
|
|
188 |
} |
|
|
189 |
} |
|
|
190 |
} else if (length(row) == 1) { |
|
|
191 |
row[1] |
|
|
192 |
} else { |
|
|
193 |
NA |
|
|
194 |
} |
|
|
195 |
}) |
|
|
196 |
|
|
|
197 |
return(pvals) |
|
|
198 |
} |
|
|
199 |
|