MOVICS / Git / Diff of /R/compFGA.R

Models:
AlyssaS/
MOVICS
Downloads: 1
Diff of /R/compFGA.R [000000] .. [494cbf]
Switch to side-by-side view

--- a
+++ b/R/compFGA.R
@@ -0,0 +1,284 @@
+#' @name compFGA
+#' @title Comparison of fraction genome altered
+#' @description This function calculates Fraction Genome Altered (FGA), Fraction Genome Gained (FGG), and Fraction Genome Lost (FGL) seperately, and compares them among curent subtypes identified from multi-omics integrative clustering algorithms.
+#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
+#' @param segment A data frame containing segmented copy number and columns must exactly include the following elements: c('sample','chrom','start','end','value'). Column of `value` should be segments value when \code{iscopynumber = FALSE} but copy-number value when \code{iscopynumber = TRUE}. Copy-number will be converted to segments by log2(copy-number/2).
+#' @param iscopynumber A logical value to indicate if the fifth column of segment input is copy-number. If segment file derived from CNV calling provides copy number instead of segment_mean value, this argument must be switched to TRUE. FALSE by default.
+#' @param cnathreshold A numeric value to indicate the cutoff for identifying copy-number gain or loss. 0.2 by default.
+#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
+#' @param barcolor A string vector to indicate the mapping color for bars of FGA, FGG and FGL.
+#' @param clust.col A string vector storing colors for each subtype.
+#' @param fig.path A string value to indicate the output path for storing the barplot.
+#' @param fig.name A string value to indicate the name of the barplot.
+#' @param width A numeric value to indicate the width of barplot.
+#' @param height A numeric value to indicate the height of barplot.
+#' @return A list contains the following components:
+#'
+#'         \code{summary}           a table summarizing the measurements of FGA, FGG, and FGL per sample
+#'
+#'         \code{FGA.p.value}       a nominal p value quantifying the difference of FGA among current subtypes
+#'
+#'         \code{pairwise.FGA.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGA if more than 2 subtypes were identified
+#'
+#'         \code{FGG.p.value}       a nominal p value quantifying the difference of FGG among current subtypes
+#'
+#'         \code{pairwise.FGG.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGG if more than 2 subtypes were identified
+#'
+#'         \code{FGL.p.value}       a nominal p value quantifying the difference of FGL among current subtypes
+#'
+#'         \code{pairwise.FGL.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGL if more than 2 subtypes were identified
+#'
+#'         \code{test.method}       a string value indicating the statistical testing method to calculate p values
+#'
+#' @export
+#' @import ggplot2
+#' @import patchwork
+#' @importFrom dplyr group_by summarize %>%
+#' @importFrom ggpubr stat_compare_means
+#' @examples # There is no example and please refer to vignette.
+#' @references Cerami E, Gao J, Dogrusoz U, et al. (2012). The cBio Cancer Genomics Portal: An Open Platform for Exploring Multidimensional Cancer Genomics Data. Cancer Discov, 2(5):401-404.
+#'
+#' Gao J, Aksoy B A, Dogrusoz U, et al. (2013). Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. Sci Signal, 6(269):pl1-pl1.
+compFGA <- function(moic.res     = NULL,
+                    segment      = NULL,
+                    iscopynumber = FALSE,
+                    cnathreshold = 0.2,
+                    test.method  = "nonparametric",
+                    barcolor     = c("#008B8A", "#F2042C", "#21498D"),
+                    clust.col    = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
+                    fig.path     = getwd(),
+                    fig.name     = NULL,
+                    width        = 8,
+                    height       = 4) {
+
+  # check arguments
+  if(!all(is.element(c("sample","chrom","start","end","value"), colnames(segment)))) {
+    stop("segment data must have the following columns: sample, chrom, start, end, value.")
+  }
+
+  if(iscopynumber) {
+    segment$value <- log2(segment$value/2) # convert copy-number to segment-mean value
+  }
+
+  comsam <- intersect(moic.res$clust.res$samID, unique(segment[,1]))
+  # check data
+  if(length(comsam) == nrow(moic.res$clust.res)) {
+    message("--all samples matched.")
+  } else {
+    message(paste0("--",(nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes."))
+  }
+
+  if(!is.element(test.method, c("nonparametric","parametric"))) {
+    stop("test.method can be one of nonparametric or parametric.")
+  }
+
+  clust.res <- moic.res$clust.res[comsam, , drop = FALSE]
+  segment <- segment[which(segment$sample %in% comsam),]
+  n.moic <- length(unique(clust.res$clust))
+
+  # data process
+  segment$bases <- segment$end - segment$start
+
+  # calculate FGA, FGG and FGL
+  display.progress = function (index, totalN, breakN=20) {
+    if ( index %% ceiling(totalN/breakN)  == 0  ) {
+      cat(paste(round(index*100/totalN), "% ", sep = ""))
+    }
+  }
+  std <- function(x, na.rm = TRUE) {
+    if(na.rm) {
+      x <- as.numeric(na.omit(x))
+      sd(x)/sqrt(length(x))
+    } else {sd(x)/sqrt(length(x))}
+  }
+
+  outTab <- data.frame()
+  for (i in 1:length(unique(segment$sample))) {
+    display.progress(index = i, totalN = length(unique(segment$sample)))
+
+    tmp <- segment[segment$sample == names(table(segment$sample))[i],]
+
+    if (length(tmp[abs(tmp$value) > cnathreshold,"bases"][6]) == 0) {
+      FGA = 0
+    } else {
+      FGA = sum(tmp[abs(tmp$value) > cnathreshold,"bases"]) / sum(tmp[,"bases"])
+    }
+
+    if (length(tmp[tmp$value > cnathreshold,"bases"][6]) == 0) {
+      FGG = 0
+    } else {
+      FGG = sum(tmp[tmp$value > cnathreshold,"bases"]) / sum(tmp[,"bases"])
+    }
+
+    if (length(tmp[tmp$value < (-cnathreshold),"bases"][6]) == 0) {
+      FGL = 0
+    } else {
+      FGL = sum(tmp[tmp$value < (-cnathreshold),"bases"]) / sum(tmp[,"bases"])
+    }
+
+    tmp <- data.frame(samID            = names(table(segment$sample))[i],
+                      FGA              = FGA,
+                      FGG              = FGG,
+                      FGL              = FGL,
+                      stringsAsFactors = FALSE)
+    outTab <- rbind.data.frame(outTab, tmp, stringsAsFactors = FALSE)
+  }
+  outTab$Subtype <- paste0("CS", clust.res[outTab$samID, "clust"])
+
+  # calculate mean and se
+  summaryFGA  <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGA, na.rm = TRUE), se = std(FGA, na.rm = TRUE))
+  summaryFGG  <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGG, na.rm = TRUE), se = std(FGG, na.rm = TRUE))
+  summaryFGL  <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGL, na.rm = TRUE), se = std(FGL, na.rm = TRUE))
+  summaryFGGL <- data.frame(rbind.data.frame(summaryFGG,summaryFGL),class = rep(c("FGG","FGL"),c(nrow(summaryFGG),nrow(summaryFGL))),stringsAsFactors = FALSE)
+
+  # statistical testing
+  # generate boxviolin plot with statistical testing
+  if(n.moic == 2 & test.method == "nonparametric") {
+    statistic <- "wilcox.test"
+    FGA.test  <- wilcox.test(outTab$FGA ~ outTab$Subtype)$p.value
+    FGG.test  <- wilcox.test(outTab$FGG ~ outTab$Subtype)$p.value
+    FGL.test  <- wilcox.test(outTab$FGL ~ outTab$Subtype)$p.value
+  }
+
+  if(n.moic == 2 & test.method == "parametric") {
+    statistic <- "t.test"
+    FGA.test  <- t.test(outTab$FGA ~ outTab$Subtype)$p.value
+    FGG.test  <- t.test(outTab$FGG ~ outTab$Subtype)$p.value
+    FGL.test  <- t.test(outTab$FGL ~ outTab$Subtype)$p.value
+  }
+
+  if(n.moic > 2 & test.method == "nonparametric") {
+    statistic <- "kruskal.test"
+    FGA.test  <- kruskal.test(outTab$FGA ~ outTab$Subtype)$p.value
+    FGG.test  <- kruskal.test(outTab$FGG ~ outTab$Subtype)$p.value
+    FGL.test  <- kruskal.test(outTab$FGL ~ outTab$Subtype)$p.value
+    pairwise.FGA.test <- pairwise.wilcox.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGG.test <- pairwise.wilcox.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGL.test <- pairwise.wilcox.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
+  }
+
+  if(n.moic > 2 & test.method == "parametric") {
+    statistic <- "anova"
+    FGA.test  <- summary(aov(outTab$FGA ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
+    FGG.test  <- summary(aov(outTab$FGG ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
+    FGL.test  <- summary(aov(outTab$FGL ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
+    pairwise.FGA.test <- pairwise.t.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGG.test <- pairwise.t.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGL.test <- pairwise.t.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
+  }
+
+  # generate barplot
+  FGA.col <- barcolor[1]
+  FGG.col <- barcolor[2]
+  FGL.col <- barcolor[3]
+
+  p1 <- ggplot(summaryFGA, aes(x = Subtype, y = mean,fill=rep("0",nrow(summaryFGA)))) +
+    geom_bar(stat = 'identity') +
+    geom_errorbar(aes(ymax = mean+se, ymin = mean-se),position = position_dodge(0.9), width = 0.15) +
+    annotate(geom="text",
+             #hjust = ifelse(n.moic %% 2 == 0, 0.5, 0),
+             x = n.moic / 2 + 0.5,
+             #y = as.numeric(summaryFGA[which.max(summaryFGA$mean),"mean"] + summaryFGA[which.max(summaryFGA$mean),"se"]),
+             y = as.numeric(summaryFGA[which.max(summaryFGA$mean),"mean"]),
+             size = 8, angle = 90, fontface = "bold",
+             #label = paste0(statistic, " p = ", formatC(FGA.test, digits = 1, format = "e")),
+             label = cut(FGA.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
+    scale_x_discrete(name = "",position = "top") +
+    theme_bw() +
+    theme(axis.line.y = element_line(size = 0.8),
+          axis.ticks.y = element_line(size = 0.2),
+          axis.text.y = element_blank(),
+          axis.title.x = element_text(vjust = -0.3,size = 12),
+          axis.text.x = element_text(size = 10, color = "black"),
+          plot.margin = unit(c(0.3, -1.7, 0.3, 0.3), "lines"),
+          legend.title = element_blank()) +
+    coord_flip() +
+    scale_fill_manual(values  = FGA.col, breaks = c("0"), labels = c("Copy number-altered genome")) +
+    scale_y_reverse(expand = c(0.01,0),
+                    name = "FGA (Fraction of Genome Altered)", position = "left")
+
+  p2 <- ggplot(summaryFGGL, aes(x = Subtype, y = ifelse(class == 'FGG',mean,-mean),fill=class)) +
+    geom_bar(stat = 'identity') +
+    geom_errorbar(data=summaryFGGL[summaryFGGL$class=='FGG',],aes(ymax = mean+se, ymin =mean-se),position = position_dodge(0.9), width = 0.15) +
+    geom_errorbar(data=summaryFGGL[summaryFGGL$class=='FGL',],aes(ymax = -mean-se, ymin =-mean+se),position = position_dodge(0.9), width = 0.15) +
+    annotate(geom="text",
+
+             x = n.moic / 2 + 0.5,
+             y = -as.numeric(summaryFGL[which.max(summaryFGL$mean),"mean"]),
+             size = 8, angle = 90, fontface = "bold",
+             label = cut(FGL.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
+    annotate(geom="text",
+             x = n.moic / 2 + 0.5,
+             y = as.numeric(summaryFGG[which.max(summaryFGG$mean),"mean"]),
+             size = 8, angle = 90, fontface = "bold",
+             label = cut(FGG.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
+    scale_x_discrete(name = "") +
+    theme_bw() +
+    theme(axis.line.y = element_line(size = 0.8),
+          axis.ticks.y = element_line(size = 0.2),
+          axis.text.y = element_blank(),
+          axis.title.x = element_text(vjust = -0.3, size = 12),
+          axis.text.x = element_text(size = 10, color = "black"),
+          plot.margin = unit(c(0.3, 0.3, 0.3, -1), "lines"),
+          legend.title = element_blank()) +
+    coord_flip() +
+    scale_fill_manual(values  = c(FGL.col, FGG.col), breaks = c("FGL","FGG"),
+                      labels = c("Copy number-lost genome","Copy number-gained genome")) +
+    scale_y_continuous(expand = c(0.01,0),
+                       name = "FGL or FGG (Fraction of Genome Lost or Gained)")
+
+  pp <- ggplot() +
+    # geom_text(data = summaryFGGL,
+    #           aes(label = Subtype, x=Subtype), y = 0.5,
+    #           size = 0.8*11/.pt, # match font size to theme
+    #           hjust = 0.5, vjust = 0.5) +
+    geom_label(data = summaryFGGL,
+               aes(label = Subtype, x = Subtype, fill = Subtype),
+               y = 0.5,
+               color = "white",
+               size = 0.9*11/.pt, # match font size to theme
+               hjust = 0.4, vjust = 0.5) +
+    scale_fill_manual(values = clust.col) +
+    theme_minimal()+
+    theme(axis.line.y =element_blank(),
+          axis.ticks.y =element_blank(),
+          axis.text.y =element_blank(),
+          axis.title.y =element_blank(),
+          axis.title.x =element_blank(),
+          plot.margin = unit(c(0.3, 0, 0.3, 0), "lines")
+    ) +
+    guides(fill = FALSE) +
+    coord_flip() +
+    scale_y_reverse()
+
+  pal <- p1 + pp + p2 +
+    plot_layout(widths = c(7,1,7), guides = 'collect') & theme(legend.position = 'top')
+
+  # save to pdf
+  if(is.null(fig.name)) {
+    outFig <- "barplot of FGA.pdf"
+  } else {
+    outFig <- paste0(fig.name,".pdf")
+  }
+  ggsave(file.path(fig.path, outFig), width = width, height = height)
+
+  # print to screen
+  print(pal)
+
+  if(n.moic > 2) {
+    return(list(summary = outTab,
+                FGA.p.value = FGA.test,
+                pairwise.FGA.test = pairwise.FGA.test,
+                FGG.p.value = FGG.test,
+                pairwise.FGG.test = pairwise.FGG.test,
+                FGL.p.value = FGL.test,
+                pairwise.FGL.test = pairwise.FGL.test,
+                test.method = statistic))
+  } else {
+    return(list(summary = outTab,
+                FGA.p.value = FGA.test,
+                FGG.p.value = FGG.test,
+                FGL.p.value = FGL.test,
+                test.method = statistic))
+  }
+}