Diff of /R/compSurv.R [000000] .. [494cbf]

Switch to side-by-side view

--- a
+++ b/R/compSurv.R
@@ -0,0 +1,232 @@
+#' @name compSurv
+#' @title Comparison of prognosis by Kaplan-Meier survival curve
+#' @description This function calculates Kaplan-meier estimator and generate survival curves with log-rank test to detect prognostic difference among identified subtypes. If more than 2 subtypes are identified, pair-wise comparisons will be performed with an additional table printed on the survival curve.
+#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
+#' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event)
+#' @param clust.col A string vector storing colors for each subtype.
+#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default.
+#' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve.
+#' @param fig.path A string value to indicate the name of the kaplan-meier curve.
+#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default.
+#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default.
+#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default.
+#' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time. NULL by default (show 0-maximum survival time range).
+#' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components:
+#'
+#'         \code{fitd}       an object returned by \link[survival]{survdiff}.
+#'
+#'         \code{fid}        an object returned by \link[survival]{survfit}.
+#'
+#'         \code{xyrs.est}   x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users.
+#'
+#'         \code{overall.p}  a nominal p.value calculated by Kaplain-Meier estimator with log-rank test.
+#'
+#'         \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}).
+#' @import survival
+#' @import survminer
+#' @import ggplot2
+#' @importFrom grDevices pdf dev.off pdf.options
+#' @importFrom ggpp geom_table
+#' @importFrom tibble tibble
+#' @export
+#' @examples # There is no example and please refer to vignette.
+compSurv <- function(moic.res         = NULL,
+                     surv.info        = NULL,
+                     convt.time       = "d",
+                     surv.cut         = NULL,
+                     xyrs.est         = NULL,
+                     clust.col        = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
+                     p.adjust.method  = "BH",
+                     surv.median.line = "none",
+                     fig.name         = NULL,
+                     fig.path         = getwd()){
+
+  if(!all(is.element(c("futime","fustat"), colnames(surv.info)))) {
+    stop("fail to find variables of futime and fustat.")
+  }
+
+  if(!all(is.element(convt.time, c("d","m","y")))) {
+    stop("unsupported time conversion. Allowed values contain c('d', 'm', 'y').")
+  }
+
+  # get common samples
+  comsam <- intersect(rownames(surv.info),rownames(moic.res$clust.res))
+  mosurv.res <- cbind.data.frame(surv.info[comsam,c("futime","fustat")],
+                                 moic.res$clust.res[comsam, "clust", drop = FALSE])
+  message(paste0("--a total of ",length(comsam), " samples are identified."))
+
+  # remove missing data if possible
+  if(sum(c(is.na(mosurv.res$futime), is.na(mosurv.res$fustat))) > 0) {
+    message("--removed missing values.")
+    mosurv.res <- as.data.frame(na.omit(mosurv.res))
+    message(paste0("--leaving ",nrow(mosurv.res), " observations."))
+  }
+
+  # check time unit
+  if(max(mosurv.res$futime) < 365) {
+    warning("it seems the 'futime' might not in [day] unit, please make sure you provide the correct survival information.")
+  }
+
+  # create new variable of Subtype
+  mosurv.res$Subtype <- paste0("CS", mosurv.res$clust)
+  mosurv.res <- mosurv.res[order(mosurv.res$Subtype),]
+
+  # estimate x-year survival probability
+  if(!is.null(xyrs.est)) {
+    if(max(xyrs.est) * 365 < max(mosurv.res$futime)) {
+      xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365)
+    } else {
+      stop("the maximal survival time is less than the time point you want to estimate!")
+    }
+  } else {
+    xyrs <- "[Not Available]: argument of xyrs.est was not specified."
+  }
+
+  # convert survival time
+  mosurv.res$futime <- switch(convt.time,
+                              "d" = mosurv.res$futime,
+                              "m" = round(mosurv.res$futime/30.5,4),
+                              "y" = round(mosurv.res$futime/365,4))
+
+  date.lab <- switch(convt.time,
+                     "d" = "Days",
+                     "m" = "Months",
+                     "y" = "Years")
+
+  # truncate survival time
+  if(date.lab == "Days" & max(mosurv.res$futime) > 3650) {
+    #xlim = c(0, 3650)
+    brk = 365
+  }
+  if(date.lab == "Days" & max(mosurv.res$futime) <= 3650) {
+    #xlim = c(0, max(mosurv.res$futime))
+    brk = floor(max(mosurv.res$futime)/10)
+  }
+  if(date.lab == "Months" & max(mosurv.res$futime) > 120) {
+    #xlim = c(0, 120)
+    brk = 12
+  }
+  if(date.lab == "Months" & max(mosurv.res$futime) <= 120) {
+    #xlim = c(0, max(mosurv.res$futime))
+    brk = floor(max(mosurv.res$futime)/10)
+  }
+  if(date.lab == "Years" & max(mosurv.res$futime) > 10) {
+    #xlim = c(0, 10)
+    brk = 1
+  }
+  if(date.lab == "Years" & max(mosurv.res$futime) <= 10) {
+    #xlim = c(0, max(mosurv.res$futime))
+    brk = 1
+  }
+
+  if(is.null(surv.cut)) {
+    xlim = c(0, max(mosurv.res$futime))
+  } else {
+    message(paste0("--cut survival curve up to ",surv.cut," ",tolower(date.lab)))
+    xlim = c(0, surv.cut)
+  }
+
+
+  # if(!is.null(surv.cut)) {
+  #   xlim = c(0, surv.cut)
+  #   brk = surv.cut/10
+  # } else {message("--cut survival curve up to 10 years.")}
+
+  n.moic <- length(unique(mosurv.res$Subtype))
+
+  # basic survival analysis
+  fitd <- survdiff(Surv(futime, fustat) ~ Subtype,
+                   data      = mosurv.res,
+                   na.action = na.exclude)
+  p.val <- 1 - pchisq(fitd$chisq, length(fitd$n) - 1)
+  fit <- survfit(Surv(futime, fustat)~ Subtype,
+                 data      = mosurv.res,
+                 type      = "kaplan-meier",
+                 error     = "greenwood",
+                 conf.type = "plain",
+                 na.action = na.exclude)
+
+  # hack strata for better survival curve
+  names(fit$strata) <- gsub("Subtype=", "", names(fit$strata))
+
+  # kaplan-meier curve
+  p <- suppressWarnings(ggsurvplot(fit               = fit,
+                                   conf.int          = FALSE,
+                                   risk.table        = TRUE,
+                                   risk.table.col    = "strata",
+                                   palette           = clust.col[1:n.moic],
+                                   data              = mosurv.res,
+                                   size              = 1,
+                                   xlim              = xlim,
+                                   break.time.by     = brk,
+                                   legend.title      = "",
+                                   surv.median.line  = surv.median.line,
+                                   xlab              = paste0("Time (",date.lab,")"),
+                                   ylab              = "Survival probability (%)",
+                                   risk.table.y.text = FALSE))
+
+  # make survival time as percentage
+  p$plot <- quiet(p$plot + scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0,100,25)))
+
+  if(n.moic > 2) {
+
+    # add nominal pvalue for log-rank test
+    p.lab <- paste0("Overall P",
+                    ifelse(p.val < 0.001, " < 0.001",
+                           paste0(" = ",round(p.val, 3))))
+
+    p$plot <- p$plot + annotate("text",
+                                x = 0, y = 0.55,
+                                hjust = 0,
+                                fontface = 4,
+                                label = p.lab)
+
+    # calculate pair-wise survival comparison
+    ps <- pairwise_survdiff(Surv(futime, fustat)~ Subtype,
+                            data            = mosurv.res,
+                            p.adjust.method = p.adjust.method)
+
+    # add pair-wise comparison table
+    # options(stringsAsFactors = FALSE)
+    addTab <- as.data.frame(as.matrix(ifelse(round(ps$p.value, 3) < 0.001, "<0.001",
+                                             round(ps$p.value, 3))))
+    addTab[is.na(addTab)] <- "-"
+    # options(stringsAsFactors = TRUE)
+
+    df <- tibble(x = 0, y = 0, tb = list(addTab))
+    p$plot <- p$plot + geom_table(data = df, aes(x = x, y = y, label = tb), table.rownames = TRUE)
+
+  } else {
+    # add nominal pvalue for log-rank test
+    p.lab <- paste0("P",
+                    ifelse(p.val < 0.001, " < 0.001",
+                           paste0(" = ",round(p.val, 3))))
+
+    p$plot <- p$plot + annotate("text",
+                                x = 0, y = 0.55,
+                                hjust = 0,
+                                fontface = 4,
+                                label = p.lab)
+  }
+
+  # output curve to pdf
+  if(!is.null(fig.name)) {
+    outFile <- file.path(fig.path,paste0(fig.name,".pdf"))
+  } else {
+    outFile <- file.path(fig.path,paste0("km_curve_",moic.res$mo.method,".pdf"))
+  }
+  pdf.options(reset = TRUE, onefile = FALSE)
+  pdf(outFile, width = 6, height = 7)
+  # ggsave(outFile, width = 6, height = 7)
+  print(p)
+  dev.off()
+
+  # output curve to screen
+  print(p)
+
+  if(n.moic > 2) {
+    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps))
+  } else {
+    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val))
+  }
+}