Diff of /R/compSurv.R [000000] .. [494cbf]

Switch to unified view

a b/R/compSurv.R
1
#' @name compSurv
2
#' @title Comparison of prognosis by Kaplan-Meier survival curve
3
#' @description This function calculates Kaplan-meier estimator and generate survival curves with log-rank test to detect prognostic difference among identified subtypes. If more than 2 subtypes are identified, pair-wise comparisons will be performed with an additional table printed on the survival curve.
4
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
5
#' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event)
6
#' @param clust.col A string vector storing colors for each subtype.
7
#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default.
8
#' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve.
9
#' @param fig.path A string value to indicate the name of the kaplan-meier curve.
10
#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default.
11
#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default.
12
#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default.
13
#' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time. NULL by default (show 0-maximum survival time range).
14
#' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components:
15
#'
16
#'         \code{fitd}       an object returned by \link[survival]{survdiff}.
17
#'
18
#'         \code{fid}        an object returned by \link[survival]{survfit}.
19
#'
20
#'         \code{xyrs.est}   x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users.
21
#'
22
#'         \code{overall.p}  a nominal p.value calculated by Kaplain-Meier estimator with log-rank test.
23
#'
24
#'         \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}).
25
#' @import survival
26
#' @import survminer
27
#' @import ggplot2
28
#' @importFrom grDevices pdf dev.off pdf.options
29
#' @importFrom ggpp geom_table
30
#' @importFrom tibble tibble
31
#' @export
32
#' @examples # There is no example and please refer to vignette.
33
compSurv <- function(moic.res         = NULL,
34
                     surv.info        = NULL,
35
                     convt.time       = "d",
36
                     surv.cut         = NULL,
37
                     xyrs.est         = NULL,
38
                     clust.col        = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
39
                     p.adjust.method  = "BH",
40
                     surv.median.line = "none",
41
                     fig.name         = NULL,
42
                     fig.path         = getwd()){
43
44
  if(!all(is.element(c("futime","fustat"), colnames(surv.info)))) {
45
    stop("fail to find variables of futime and fustat.")
46
  }
47
48
  if(!all(is.element(convt.time, c("d","m","y")))) {
49
    stop("unsupported time conversion. Allowed values contain c('d', 'm', 'y').")
50
  }
51
52
  # get common samples
53
  comsam <- intersect(rownames(surv.info),rownames(moic.res$clust.res))
54
  mosurv.res <- cbind.data.frame(surv.info[comsam,c("futime","fustat")],
55
                                 moic.res$clust.res[comsam, "clust", drop = FALSE])
56
  message(paste0("--a total of ",length(comsam), " samples are identified."))
57
58
  # remove missing data if possible
59
  if(sum(c(is.na(mosurv.res$futime), is.na(mosurv.res$fustat))) > 0) {
60
    message("--removed missing values.")
61
    mosurv.res <- as.data.frame(na.omit(mosurv.res))
62
    message(paste0("--leaving ",nrow(mosurv.res), " observations."))
63
  }
64
65
  # check time unit
66
  if(max(mosurv.res$futime) < 365) {
67
    warning("it seems the 'futime' might not in [day] unit, please make sure you provide the correct survival information.")
68
  }
69
70
  # create new variable of Subtype
71
  mosurv.res$Subtype <- paste0("CS", mosurv.res$clust)
72
  mosurv.res <- mosurv.res[order(mosurv.res$Subtype),]
73
74
  # estimate x-year survival probability
75
  if(!is.null(xyrs.est)) {
76
    if(max(xyrs.est) * 365 < max(mosurv.res$futime)) {
77
      xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365)
78
    } else {
79
      stop("the maximal survival time is less than the time point you want to estimate!")
80
    }
81
  } else {
82
    xyrs <- "[Not Available]: argument of xyrs.est was not specified."
83
  }
84
85
  # convert survival time
86
  mosurv.res$futime <- switch(convt.time,
87
                              "d" = mosurv.res$futime,
88
                              "m" = round(mosurv.res$futime/30.5,4),
89
                              "y" = round(mosurv.res$futime/365,4))
90
91
  date.lab <- switch(convt.time,
92
                     "d" = "Days",
93
                     "m" = "Months",
94
                     "y" = "Years")
95
96
  # truncate survival time
97
  if(date.lab == "Days" & max(mosurv.res$futime) > 3650) {
98
    #xlim = c(0, 3650)
99
    brk = 365
100
  }
101
  if(date.lab == "Days" & max(mosurv.res$futime) <= 3650) {
102
    #xlim = c(0, max(mosurv.res$futime))
103
    brk = floor(max(mosurv.res$futime)/10)
104
  }
105
  if(date.lab == "Months" & max(mosurv.res$futime) > 120) {
106
    #xlim = c(0, 120)
107
    brk = 12
108
  }
109
  if(date.lab == "Months" & max(mosurv.res$futime) <= 120) {
110
    #xlim = c(0, max(mosurv.res$futime))
111
    brk = floor(max(mosurv.res$futime)/10)
112
  }
113
  if(date.lab == "Years" & max(mosurv.res$futime) > 10) {
114
    #xlim = c(0, 10)
115
    brk = 1
116
  }
117
  if(date.lab == "Years" & max(mosurv.res$futime) <= 10) {
118
    #xlim = c(0, max(mosurv.res$futime))
119
    brk = 1
120
  }
121
122
  if(is.null(surv.cut)) {
123
    xlim = c(0, max(mosurv.res$futime))
124
  } else {
125
    message(paste0("--cut survival curve up to ",surv.cut," ",tolower(date.lab)))
126
    xlim = c(0, surv.cut)
127
  }
128
129
130
  # if(!is.null(surv.cut)) {
131
  #   xlim = c(0, surv.cut)
132
  #   brk = surv.cut/10
133
  # } else {message("--cut survival curve up to 10 years.")}
134
135
  n.moic <- length(unique(mosurv.res$Subtype))
136
137
  # basic survival analysis
138
  fitd <- survdiff(Surv(futime, fustat) ~ Subtype,
139
                   data      = mosurv.res,
140
                   na.action = na.exclude)
141
  p.val <- 1 - pchisq(fitd$chisq, length(fitd$n) - 1)
142
  fit <- survfit(Surv(futime, fustat)~ Subtype,
143
                 data      = mosurv.res,
144
                 type      = "kaplan-meier",
145
                 error     = "greenwood",
146
                 conf.type = "plain",
147
                 na.action = na.exclude)
148
149
  # hack strata for better survival curve
150
  names(fit$strata) <- gsub("Subtype=", "", names(fit$strata))
151
152
  # kaplan-meier curve
153
  p <- suppressWarnings(ggsurvplot(fit               = fit,
154
                                   conf.int          = FALSE,
155
                                   risk.table        = TRUE,
156
                                   risk.table.col    = "strata",
157
                                   palette           = clust.col[1:n.moic],
158
                                   data              = mosurv.res,
159
                                   size              = 1,
160
                                   xlim              = xlim,
161
                                   break.time.by     = brk,
162
                                   legend.title      = "",
163
                                   surv.median.line  = surv.median.line,
164
                                   xlab              = paste0("Time (",date.lab,")"),
165
                                   ylab              = "Survival probability (%)",
166
                                   risk.table.y.text = FALSE))
167
168
  # make survival time as percentage
169
  p$plot <- quiet(p$plot + scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0,100,25)))
170
171
  if(n.moic > 2) {
172
173
    # add nominal pvalue for log-rank test
174
    p.lab <- paste0("Overall P",
175
                    ifelse(p.val < 0.001, " < 0.001",
176
                           paste0(" = ",round(p.val, 3))))
177
178
    p$plot <- p$plot + annotate("text",
179
                                x = 0, y = 0.55,
180
                                hjust = 0,
181
                                fontface = 4,
182
                                label = p.lab)
183
184
    # calculate pair-wise survival comparison
185
    ps <- pairwise_survdiff(Surv(futime, fustat)~ Subtype,
186
                            data            = mosurv.res,
187
                            p.adjust.method = p.adjust.method)
188
189
    # add pair-wise comparison table
190
    # options(stringsAsFactors = FALSE)
191
    addTab <- as.data.frame(as.matrix(ifelse(round(ps$p.value, 3) < 0.001, "<0.001",
192
                                             round(ps$p.value, 3))))
193
    addTab[is.na(addTab)] <- "-"
194
    # options(stringsAsFactors = TRUE)
195
196
    df <- tibble(x = 0, y = 0, tb = list(addTab))
197
    p$plot <- p$plot + geom_table(data = df, aes(x = x, y = y, label = tb), table.rownames = TRUE)
198
199
  } else {
200
    # add nominal pvalue for log-rank test
201
    p.lab <- paste0("P",
202
                    ifelse(p.val < 0.001, " < 0.001",
203
                           paste0(" = ",round(p.val, 3))))
204
205
    p$plot <- p$plot + annotate("text",
206
                                x = 0, y = 0.55,
207
                                hjust = 0,
208
                                fontface = 4,
209
                                label = p.lab)
210
  }
211
212
  # output curve to pdf
213
  if(!is.null(fig.name)) {
214
    outFile <- file.path(fig.path,paste0(fig.name,".pdf"))
215
  } else {
216
    outFile <- file.path(fig.path,paste0("km_curve_",moic.res$mo.method,".pdf"))
217
  }
218
  pdf.options(reset = TRUE, onefile = FALSE)
219
  pdf(outFile, width = 6, height = 7)
220
  # ggsave(outFile, width = 6, height = 7)
221
  print(p)
222
  dev.off()
223
224
  # output curve to screen
225
  print(p)
226
227
  if(n.moic > 2) {
228
    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps))
229
  } else {
230
    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val))
231
  }
232
}