Diff of /R/enrichment_functions.R [000000] .. [72a7c3]

Switch to side-by-side view

--- a
+++ b/R/enrichment_functions.R
@@ -0,0 +1,199 @@
+#' Calculate pathway enrichment for multiple omics layer.
+#'
+#' This function calculates GSEA-based enrichments scores for multiple omics
+#' layer at once. Input pathways or gene sets have to be prepared in advance by
+#' means of the function \code{\link[multiGSEA]{initOmicsDataStructure}}. The function uses pre-
+#' ranked lists for each omics layer to calculate the enrichment score. The
+#' ranking can be calculated by means of the function
+#' \link[multiGSEA]{rankFeatures}.
+#'
+#' @param pathways Nested list containing all pathway features for the
+#'   respective omics layer.
+#' @param ranks Nested list containing the measured and pre-ranked features for
+#'   each omics layer.
+#' @param eps This parameter sets the boundary for calculating the p value.
+#'
+#' @return Nested list containing the enrichment scores for each given pathway
+#'   and omics layer.
+#'
+#' @examples
+#'
+#' # Download pathway definition and extract features
+#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
+#'
+#' # load omics data and calculate ranks
+#' data(transcriptome)
+#' data(proteome)
+#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
+#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
+#' names(ranks$transcriptome) <- transcriptome$Symbol
+#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
+#' names(ranks$proteome) <- proteome$Symbol
+#'
+#' ## run the enrichment
+#' multiGSEA(pathways, ranks)
+#' @importFrom fgsea fgseaMultilevel
+#'
+#' @export
+multiGSEA <- function(pathways, ranks, eps = 0) {
+  # Go through all omics layer.
+  es <- lapply(names(pathways), function(omics) {
+    fgsea::fgseaMultilevel(pathways[[omics]], ranks[[omics]], eps = eps)
+  })
+
+  names(es) <- names(pathways)
+
+  return(es)
+}
+
+
+
+#' Create a reshaped data frame from multiGSEA output.
+#'
+#' This function reshapes the output from multiGSEA to get a single data frame
+#' with columns for p-values and adjusted p-values for each omics layer. Each
+#' row of the data frame represents one pathway.
+#'
+#' @param enrichmentScores Nested List of enrichment scores, calculated by
+#'   multiGSEA function.
+#' @param pathwayNames List containing Pathway names.
+#'
+#' @return Data frame where rows are pathways and columns are (adjusted)
+#'   p-values for each omics layer.
+#'
+#' @examples
+#' # Download pathway definition and extract features
+#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
+#'
+#' # load omics data and calculate ranks
+#' data(transcriptome)
+#' data(proteome)
+#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
+#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
+#' names(ranks$transcriptome) <- transcriptome$Symbol
+#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
+#' names(ranks$proteome) <- proteome$Symbol
+#'
+#' # run the enrichment
+#' es <- multiGSEA(pathways, ranks)
+#'
+#' extractPvalues(
+#'   enrichmentScores = es,
+#'   pathwayNames = names(pathways[[1]])
+#' )
+#' @export
+extractPvalues <- function(enrichmentScores, pathwayNames) {
+  # Go through all the pathways
+  res <- lapply(pathwayNames, function(name) {
+    # Go through all the possible omics layer
+    unlist(lapply(names(enrichmentScores), function(y) {
+      df <- enrichmentScores[[y]][which(enrichmentScores[[y]]$pathway == name), c(2, 3)]
+      if (nrow(df) == 0) {
+        df <- data.frame(pval = NA, padj = NA)
+      }
+      names(df) <- paste0(y, "_", names(df))
+      df
+    }))
+  })
+
+  # Combine list elements to data frame
+  # and assign pathway names as rownames
+  res <- data.frame(do.call(rbind, res))
+
+  return(res)
+}
+
+
+
+#' Calculate a combined p-value for multiple omics layer.
+#'
+#' This function applies the Stouffer method, the Edgington method or the
+#' Fisher\'s combined probability test to combine p-values of independent tests
+#' that are based on the same null hypothesis. The Stouffer method can also be
+#' applied in a weighted fashion.
+#'
+#' @param df Data frame where rows represent a certain pathway or gene set and
+#'   columns represent p-values derived from independent tests, e.g., different
+#'   omics layer.
+#' @param method String that specifies the method to combine multiple p-values.
+#'   Default: "stouffer" Options: "stouffer", "fisher", "edgington"
+#' @param col_pattern String of the pattern that specifies the columns to be
+#'   combined. Default: "pval", Options: "pval", "padj" (legacy)
+#' @param weights List of weights that will be used in a weighted Stouffer
+#'   method.
+#'
+#' @return Vector of length \code{nrow(df)} with combined p-values.
+#'
+#' @examples
+#' df <- cbind(runif(5), runif(5), runif(5))
+#' colnames(df) <- c("trans.pval", "prot.pval", "meta.pval")
+#'
+#' # run the unweighted summation of z values
+#' combinePvalues(df)
+#'
+#' # run the weighted variant
+#' combinePvalues(df, weights = c(10, 5, 1))
+#'
+#' # run the Fisher's combined probability test
+#' combinePvalues(df, method = "fisher")
+#'
+#' # run the Edgington's method
+#' combinePvalues(df, method = "edgington")
+#' @importFrom metap sumz sumlog sump
+#'
+#' @export
+combinePvalues <- function(df, method = "stouffer", col_pattern = "pval", weights = NULL) {
+  method <- tolower(method)
+  if (!method %in% c("stouffer", "fisher", "edgington")) {
+    stop("You can chose between the 'stouffer', 'edgington',
+              and 'fisher' method to combine p-values.",
+      call. = FALSE
+    )
+  }
+
+  if (!col_pattern %in% c("pval", "padj")) {
+    stop("You can chose between 'pval' and 'padj' (legacy),
+          to select columns for combining p-values.",
+      call. = FALSE
+    )
+  }
+
+  cols <- grep(col_pattern, colnames(df))
+
+  pvals <- apply(df[,cols], 1, function(row) {
+    row <- row[!is.na(row)]
+
+    if (length(row) >= 2) {
+      if (method == "fisher") {
+        p <- metap::sumlog(row)
+        p$p
+      } else if (method == "edgington") {
+        p <- metap::sump(row)
+        p$p
+      } else {
+        ## sumz allows only p-values smaller than 1
+        row <- row[row > 0 & row < 1]
+
+        if (length(row) >= 2) {
+          if (length(weights) > 0) {
+            p <- metap::sumz(row, weights = weights)
+          } else {
+            p <- metap::sumz(row)
+          }
+          p$p
+        } else if (length(row == 1)) {
+          row[1]
+        } else {
+          NA
+        }
+      }
+    } else if (length(row) == 1) {
+      row[1]
+    } else {
+      NA
+    }
+  })
+
+  return(pvals)
+}
+