multiGSEA / Git / [72a7c3] /R/enrichment

Models:
AlyssaS/
multiGSEA
Downloads: 1
[72a7c3]: / R / enrichment_functions.R
History
Download this file
200 lines (182 with data), 6.5 kB

#' Calculate pathway enrichment for multiple omics layer.
#'
#' This function calculates GSEA-based enrichments scores for multiple omics
#' layer at once. Input pathways or gene sets have to be prepared in advance by
#' means of the function \code{\link[multiGSEA]{initOmicsDataStructure}}. The function uses pre-
#' ranked lists for each omics layer to calculate the enrichment score. The
#' ranking can be calculated by means of the function
#' \link[multiGSEA]{rankFeatures}.
#'
#' @param pathways Nested list containing all pathway features for the
#'   respective omics layer.
#' @param ranks Nested list containing the measured and pre-ranked features for
#'   each omics layer.
#' @param eps This parameter sets the boundary for calculating the p value.
#'
#' @return Nested list containing the enrichment scores for each given pathway
#'   and omics layer.
#'
#' @examples
#'
#' # Download pathway definition and extract features
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
#'
#' # load omics data and calculate ranks
#' data(transcriptome)
#' data(proteome)
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
#' names(ranks$transcriptome) <- transcriptome$Symbol
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
#' names(ranks$proteome) <- proteome$Symbol
#'
#' ## run the enrichment
#' multiGSEA(pathways, ranks)
#' @importFrom fgsea fgseaMultilevel
#'
#' @export
multiGSEA <- function(pathways, ranks, eps = 0) {
  # Go through all omics layer.
  es <- lapply(names(pathways), function(omics) {
    fgsea::fgseaMultilevel(pathways[[omics]], ranks[[omics]], eps = eps)
  })

  names(es) <- names(pathways)

  return(es)
}



#' Create a reshaped data frame from multiGSEA output.
#'
#' This function reshapes the output from multiGSEA to get a single data frame
#' with columns for p-values and adjusted p-values for each omics layer. Each
#' row of the data frame represents one pathway.
#'
#' @param enrichmentScores Nested List of enrichment scores, calculated by
#'   multiGSEA function.
#' @param pathwayNames List containing Pathway names.
#'
#' @return Data frame where rows are pathways and columns are (adjusted)
#'   p-values for each omics layer.
#'
#' @examples
#' # Download pathway definition and extract features
#' pathways <- getMultiOmicsFeatures(dbs = c("kegg"), layer = c("transcriptome", "proteome"))
#'
#' # load omics data and calculate ranks
#' data(transcriptome)
#' data(proteome)
#' ranks <- initOmicsDataStructure(c("transcriptome", "proteome"))
#' ranks$transcriptome <- rankFeatures(transcriptome$logFC, transcriptome$pValue)
#' names(ranks$transcriptome) <- transcriptome$Symbol
#' ranks$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
#' names(ranks$proteome) <- proteome$Symbol
#'
#' # run the enrichment
#' es <- multiGSEA(pathways, ranks)
#'
#' extractPvalues(
#'   enrichmentScores = es,
#'   pathwayNames = names(pathways[[1]])
#' )
#' @export
extractPvalues <- function(enrichmentScores, pathwayNames) {
  # Go through all the pathways
  res <- lapply(pathwayNames, function(name) {
    # Go through all the possible omics layer
    unlist(lapply(names(enrichmentScores), function(y) {
      df <- enrichmentScores[[y]][which(enrichmentScores[[y]]$pathway == name), c(2, 3)]
      if (nrow(df) == 0) {
        df <- data.frame(pval = NA, padj = NA)
      }
      names(df) <- paste0(y, "_", names(df))
      df
    }))
  })

  # Combine list elements to data frame
  # and assign pathway names as rownames
  res <- data.frame(do.call(rbind, res))

  return(res)
}



#' Calculate a combined p-value for multiple omics layer.
#'
#' This function applies the Stouffer method, the Edgington method or the
#' Fisher\'s combined probability test to combine p-values of independent tests
#' that are based on the same null hypothesis. The Stouffer method can also be
#' applied in a weighted fashion.
#'
#' @param df Data frame where rows represent a certain pathway or gene set and
#'   columns represent p-values derived from independent tests, e.g., different
#'   omics layer.
#' @param method String that specifies the method to combine multiple p-values.
#'   Default: "stouffer" Options: "stouffer", "fisher", "edgington"
#' @param col_pattern String of the pattern that specifies the columns to be
#'   combined. Default: "pval", Options: "pval", "padj" (legacy)
#' @param weights List of weights that will be used in a weighted Stouffer
#'   method.
#'
#' @return Vector of length \code{nrow(df)} with combined p-values.
#'
#' @examples
#' df <- cbind(runif(5), runif(5), runif(5))
#' colnames(df) <- c("trans.pval", "prot.pval", "meta.pval")
#'
#' # run the unweighted summation of z values
#' combinePvalues(df)
#'
#' # run the weighted variant
#' combinePvalues(df, weights = c(10, 5, 1))
#'
#' # run the Fisher's combined probability test
#' combinePvalues(df, method = "fisher")
#'
#' # run the Edgington's method
#' combinePvalues(df, method = "edgington")
#' @importFrom metap sumz sumlog sump
#'
#' @export
combinePvalues <- function(df, method = "stouffer", col_pattern = "pval", weights = NULL) {
  method <- tolower(method)
  if (!method %in% c("stouffer", "fisher", "edgington")) {
    stop("You can chose between the 'stouffer', 'edgington',
              and 'fisher' method to combine p-values.",
      call. = FALSE
    )
  }

  if (!col_pattern %in% c("pval", "padj")) {
    stop("You can chose between 'pval' and 'padj' (legacy),
          to select columns for combining p-values.",
      call. = FALSE
    )
  }

  cols <- grep(col_pattern, colnames(df))

  pvals <- apply(df[,cols], 1, function(row) {
    row <- row[!is.na(row)]

    if (length(row) >= 2) {
      if (method == "fisher") {
        p <- metap::sumlog(row)
        p$p
      } else if (method == "edgington") {
        p <- metap::sump(row)
        p$p
      } else {
        ## sumz allows only p-values smaller than 1
        row <- row[row > 0 & row < 1]

        if (length(row) >= 2) {
          if (length(weights) > 0) {
            p <- metap::sumz(row, weights = weights)
          } else {
            p <- metap::sumz(row)
          }
          p$p
        } else if (length(row == 1)) {
          row[1]
        } else {
          NA
        }
      }
    } else if (length(row) == 1) {
      row[1]
    } else {
      NA
    }
  })

  return(pvals)
}