--- a +++ b/R/compSurv.R @@ -0,0 +1,232 @@ +#' @name compSurv +#' @title Comparison of prognosis by Kaplan-Meier survival curve +#' @description This function calculates Kaplan-meier estimator and generate survival curves with log-rank test to detect prognostic difference among identified subtypes. If more than 2 subtypes are identified, pair-wise comparisons will be performed with an additional table printed on the survival curve. +#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms. +#' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event) +#' @param clust.col A string vector storing colors for each subtype. +#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default. +#' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve. +#' @param fig.path A string value to indicate the name of the kaplan-meier curve. +#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default. +#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default. +#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default. +#' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time. NULL by default (show 0-maximum survival time range). +#' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components: +#' +#' \code{fitd} an object returned by \link[survival]{survdiff}. +#' +#' \code{fid} an object returned by \link[survival]{survfit}. +#' +#' \code{xyrs.est} x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users. +#' +#' \code{overall.p} a nominal p.value calculated by Kaplain-Meier estimator with log-rank test. +#' +#' \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}). +#' @import survival +#' @import survminer +#' @import ggplot2 +#' @importFrom grDevices pdf dev.off pdf.options +#' @importFrom ggpp geom_table +#' @importFrom tibble tibble +#' @export +#' @examples # There is no example and please refer to vignette. +compSurv <- function(moic.res = NULL, + surv.info = NULL, + convt.time = "d", + surv.cut = NULL, + xyrs.est = NULL, + clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"), + p.adjust.method = "BH", + surv.median.line = "none", + fig.name = NULL, + fig.path = getwd()){ + + if(!all(is.element(c("futime","fustat"), colnames(surv.info)))) { + stop("fail to find variables of futime and fustat.") + } + + if(!all(is.element(convt.time, c("d","m","y")))) { + stop("unsupported time conversion. Allowed values contain c('d', 'm', 'y').") + } + + # get common samples + comsam <- intersect(rownames(surv.info),rownames(moic.res$clust.res)) + mosurv.res <- cbind.data.frame(surv.info[comsam,c("futime","fustat")], + moic.res$clust.res[comsam, "clust", drop = FALSE]) + message(paste0("--a total of ",length(comsam), " samples are identified.")) + + # remove missing data if possible + if(sum(c(is.na(mosurv.res$futime), is.na(mosurv.res$fustat))) > 0) { + message("--removed missing values.") + mosurv.res <- as.data.frame(na.omit(mosurv.res)) + message(paste0("--leaving ",nrow(mosurv.res), " observations.")) + } + + # check time unit + if(max(mosurv.res$futime) < 365) { + warning("it seems the 'futime' might not in [day] unit, please make sure you provide the correct survival information.") + } + + # create new variable of Subtype + mosurv.res$Subtype <- paste0("CS", mosurv.res$clust) + mosurv.res <- mosurv.res[order(mosurv.res$Subtype),] + + # estimate x-year survival probability + if(!is.null(xyrs.est)) { + if(max(xyrs.est) * 365 < max(mosurv.res$futime)) { + xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365) + } else { + stop("the maximal survival time is less than the time point you want to estimate!") + } + } else { + xyrs <- "[Not Available]: argument of xyrs.est was not specified." + } + + # convert survival time + mosurv.res$futime <- switch(convt.time, + "d" = mosurv.res$futime, + "m" = round(mosurv.res$futime/30.5,4), + "y" = round(mosurv.res$futime/365,4)) + + date.lab <- switch(convt.time, + "d" = "Days", + "m" = "Months", + "y" = "Years") + + # truncate survival time + if(date.lab == "Days" & max(mosurv.res$futime) > 3650) { + #xlim = c(0, 3650) + brk = 365 + } + if(date.lab == "Days" & max(mosurv.res$futime) <= 3650) { + #xlim = c(0, max(mosurv.res$futime)) + brk = floor(max(mosurv.res$futime)/10) + } + if(date.lab == "Months" & max(mosurv.res$futime) > 120) { + #xlim = c(0, 120) + brk = 12 + } + if(date.lab == "Months" & max(mosurv.res$futime) <= 120) { + #xlim = c(0, max(mosurv.res$futime)) + brk = floor(max(mosurv.res$futime)/10) + } + if(date.lab == "Years" & max(mosurv.res$futime) > 10) { + #xlim = c(0, 10) + brk = 1 + } + if(date.lab == "Years" & max(mosurv.res$futime) <= 10) { + #xlim = c(0, max(mosurv.res$futime)) + brk = 1 + } + + if(is.null(surv.cut)) { + xlim = c(0, max(mosurv.res$futime)) + } else { + message(paste0("--cut survival curve up to ",surv.cut," ",tolower(date.lab))) + xlim = c(0, surv.cut) + } + + + # if(!is.null(surv.cut)) { + # xlim = c(0, surv.cut) + # brk = surv.cut/10 + # } else {message("--cut survival curve up to 10 years.")} + + n.moic <- length(unique(mosurv.res$Subtype)) + + # basic survival analysis + fitd <- survdiff(Surv(futime, fustat) ~ Subtype, + data = mosurv.res, + na.action = na.exclude) + p.val <- 1 - pchisq(fitd$chisq, length(fitd$n) - 1) + fit <- survfit(Surv(futime, fustat)~ Subtype, + data = mosurv.res, + type = "kaplan-meier", + error = "greenwood", + conf.type = "plain", + na.action = na.exclude) + + # hack strata for better survival curve + names(fit$strata) <- gsub("Subtype=", "", names(fit$strata)) + + # kaplan-meier curve + p <- suppressWarnings(ggsurvplot(fit = fit, + conf.int = FALSE, + risk.table = TRUE, + risk.table.col = "strata", + palette = clust.col[1:n.moic], + data = mosurv.res, + size = 1, + xlim = xlim, + break.time.by = brk, + legend.title = "", + surv.median.line = surv.median.line, + xlab = paste0("Time (",date.lab,")"), + ylab = "Survival probability (%)", + risk.table.y.text = FALSE)) + + # make survival time as percentage + p$plot <- quiet(p$plot + scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0,100,25))) + + if(n.moic > 2) { + + # add nominal pvalue for log-rank test + p.lab <- paste0("Overall P", + ifelse(p.val < 0.001, " < 0.001", + paste0(" = ",round(p.val, 3)))) + + p$plot <- p$plot + annotate("text", + x = 0, y = 0.55, + hjust = 0, + fontface = 4, + label = p.lab) + + # calculate pair-wise survival comparison + ps <- pairwise_survdiff(Surv(futime, fustat)~ Subtype, + data = mosurv.res, + p.adjust.method = p.adjust.method) + + # add pair-wise comparison table + # options(stringsAsFactors = FALSE) + addTab <- as.data.frame(as.matrix(ifelse(round(ps$p.value, 3) < 0.001, "<0.001", + round(ps$p.value, 3)))) + addTab[is.na(addTab)] <- "-" + # options(stringsAsFactors = TRUE) + + df <- tibble(x = 0, y = 0, tb = list(addTab)) + p$plot <- p$plot + geom_table(data = df, aes(x = x, y = y, label = tb), table.rownames = TRUE) + + } else { + # add nominal pvalue for log-rank test + p.lab <- paste0("P", + ifelse(p.val < 0.001, " < 0.001", + paste0(" = ",round(p.val, 3)))) + + p$plot <- p$plot + annotate("text", + x = 0, y = 0.55, + hjust = 0, + fontface = 4, + label = p.lab) + } + + # output curve to pdf + if(!is.null(fig.name)) { + outFile <- file.path(fig.path,paste0(fig.name,".pdf")) + } else { + outFile <- file.path(fig.path,paste0("km_curve_",moic.res$mo.method,".pdf")) + } + pdf.options(reset = TRUE, onefile = FALSE) + pdf(outFile, width = 6, height = 7) + # ggsave(outFile, width = 6, height = 7) + print(p) + dev.off() + + # output curve to screen + print(p) + + if(n.moic > 2) { + return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps)) + } else { + return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val)) + } +}