a b/R/enrichment_functions.R
1
#' Calculate pathway enrichment for multiple omics layer.
2
#'
3
#' This function calculates GSEA-based enrichments scores for multiple omics
4
#' layer at once. Input pathways or gene sets have to be prepared in advance by
5
#' means of the function \code{\link[multiGSEA]{initOmicsDataStructure}}. The function uses pre-
6
#' ranked lists for each omics layer to calculate the enrichment score. The
7
#' ranking can be calculated by means of the function
8
#' \link[multiGSEA]{rankFeatures}.
9
#'
10
#' @param pathways Nested list containing all pathway features for the
11
#'   respective omics layer.
12
#' @param ranks Nested list containing the measured and pre-ranked features for
13
#'   each omics layer.
14
#' @param eps This parameter sets the boundary for calculating the p value.
15
#'
16
#' @return Nested list containing the enrichment scores for each given pathway
17
#'   and omics layer.
18
#'
19
#' @examples
20
#'
21
#' # Download pathway definition and extract features
22
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
23
#'
24
#' # load omics data and calculate ranks
25
#' data(transcriptome)
26
#' data(proteome)
27
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
28
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
29
#' names(ranks$transcriptome) <- transcriptome$Symbol
30
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
31
#' names(ranks$proteome) <- proteome$Symbol
32
#'
33
#' ## run the enrichment
34
#' multiGSEA(pathways, ranks)
35
#' @importFrom fgsea fgseaMultilevel
36
#'
37
#' @export
38
multiGSEA <- function(pathways, ranks, eps = 0) {
39
  # Go through all omics layer.
40
  es <- lapply(names(pathways), function(omics) {
41
    fgsea::fgseaMultilevel(pathways[[omics]], ranks[[omics]], eps = eps)
42
  })
43
44
  names(es) <- names(pathways)
45
46
  return(es)
47
}
48
49
50
51
#' Create a reshaped data frame from multiGSEA output.
52
#'
53
#' This function reshapes the output from multiGSEA to get a single data frame
54
#' with columns for p-values and adjusted p-values for each omics layer. Each
55
#' row of the data frame represents one pathway.
56
#'
57
#' @param enrichmentScores Nested List of enrichment scores, calculated by
58
#'   multiGSEA function.
59
#' @param pathwayNames List containing Pathway names.
60
#'
61
#' @return Data frame where rows are pathways and columns are (adjusted)
62
#'   p-values for each omics layer.
63
#'
64
#' @examples
65
#' # Download pathway definition and extract features
66
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
67
#'
68
#' # load omics data and calculate ranks
69
#' data(transcriptome)
70
#' data(proteome)
71
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
72
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
73
#' names(ranks$transcriptome) <- transcriptome$Symbol
74
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
75
#' names(ranks$proteome) <- proteome$Symbol
76
#'
77
#' # run the enrichment
78
#' es <- multiGSEA(pathways, ranks)
79
#'
80
#' extractPvalues(
81
#'   enrichmentScores = es,
82
#'   pathwayNames = names(pathways[[1]])
83
#' )
84
#' @export
85
extractPvalues <- function(enrichmentScores, pathwayNames) {
86
  # Go through all the pathways
87
  res <- lapply(pathwayNames, function(name) {
88
    # Go through all the possible omics layer
89
    unlist(lapply(names(enrichmentScores), function(y) {
90
      df <- enrichmentScores[[y]][which(enrichmentScores[[y]]$pathway == name), c(2, 3)]
91
      if (nrow(df) == 0) {
92
        df <- data.frame(pval = NA, padj = NA)
93
      }
94
      names(df) <- paste0(y, "_", names(df))
95
      df
96
    }))
97
  })
98
99
  # Combine list elements to data frame
100
  # and assign pathway names as rownames
101
  res <- data.frame(do.call(rbind, res))
102
103
  return(res)
104
}
105
106
107
108
#' Calculate a combined p-value for multiple omics layer.
109
#'
110
#' This function applies the Stouffer method, the Edgington method or the
111
#' Fisher\'s combined probability test to combine p-values of independent tests
112
#' that are based on the same null hypothesis. The Stouffer method can also be
113
#' applied in a weighted fashion.
114
#'
115
#' @param df Data frame where rows represent a certain pathway or gene set and
116
#'   columns represent p-values derived from independent tests, e.g., different
117
#'   omics layer.
118
#' @param method String that specifies the method to combine multiple p-values.
119
#'   Default: "stouffer" Options: "stouffer", "fisher", "edgington"
120
#' @param col_pattern String of the pattern that specifies the columns to be
121
#'   combined. Default: "pval", Options: "pval", "padj" (legacy)
122
#' @param weights List of weights that will be used in a weighted Stouffer
123
#'   method.
124
#'
125
#' @return Vector of length \code{nrow(df)} with combined p-values.
126
#'
127
#' @examples
128
#' df <- cbind(runif(5), runif(5), runif(5))
129
#' colnames(df) <- c("trans.pval", "prot.pval", "meta.pval")
130
#'
131
#' # run the unweighted summation of z values
132
#' combinePvalues(df)
133
#'
134
#' # run the weighted variant
135
#' combinePvalues(df, weights = c(10, 5, 1))
136
#'
137
#' # run the Fisher's combined probability test
138
#' combinePvalues(df, method = "fisher")
139
#'
140
#' # run the Edgington's method
141
#' combinePvalues(df, method = "edgington")
142
#' @importFrom metap sumz sumlog sump
143
#'
144
#' @export
145
combinePvalues <- function(df, method = "stouffer", col_pattern = "pval", weights = NULL) {
146
  method <- tolower(method)
147
  if (!method %in% c("stouffer", "fisher", "edgington")) {
148
    stop("You can chose between the 'stouffer', 'edgington',
149
              and 'fisher' method to combine p-values.",
150
      call. = FALSE
151
    )
152
  }
153
154
  if (!col_pattern %in% c("pval", "padj")) {
155
    stop("You can chose between 'pval' and 'padj' (legacy),
156
          to select columns for combining p-values.",
157
      call. = FALSE
158
    )
159
  }
160
161
  cols <- grep(col_pattern, colnames(df))
162
163
  pvals <- apply(df[,cols], 1, function(row) {
164
    row <- row[!is.na(row)]
165
166
    if (length(row) >= 2) {
167
      if (method == "fisher") {
168
        p <- metap::sumlog(row)
169
        p$p
170
      } else if (method == "edgington") {
171
        p <- metap::sump(row)
172
        p$p
173
      } else {
174
        ## sumz allows only p-values smaller than 1
175
        row <- row[row > 0 & row < 1]
176
177
        if (length(row) >= 2) {
178
          if (length(weights) > 0) {
179
            p <- metap::sumz(row, weights = weights)
180
          } else {
181
            p <- metap::sumz(row)
182
          }
183
          p$p
184
        } else if (length(row == 1)) {
185
          row[1]
186
        } else {
187
          NA
188
        }
189
      }
190
    } else if (length(row) == 1) {
191
      row[1]
192
    } else {
193
      NA
194
    }
195
  })
196
197
  return(pvals)
198
}
199