|
a |
|
b/R/compSurv.R |
|
|
1 |
#' @name compSurv |
|
|
2 |
#' @title Comparison of prognosis by Kaplan-Meier survival curve |
|
|
3 |
#' @description This function calculates Kaplan-meier estimator and generate survival curves with log-rank test to detect prognostic difference among identified subtypes. If more than 2 subtypes are identified, pair-wise comparisons will be performed with an additional table printed on the survival curve. |
|
|
4 |
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms. |
|
|
5 |
#' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event) |
|
|
6 |
#' @param clust.col A string vector storing colors for each subtype. |
|
|
7 |
#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default. |
|
|
8 |
#' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve. |
|
|
9 |
#' @param fig.path A string value to indicate the name of the kaplan-meier curve. |
|
|
10 |
#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default. |
|
|
11 |
#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default. |
|
|
12 |
#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default. |
|
|
13 |
#' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time. NULL by default (show 0-maximum survival time range). |
|
|
14 |
#' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components: |
|
|
15 |
#' |
|
|
16 |
#' \code{fitd} an object returned by \link[survival]{survdiff}. |
|
|
17 |
#' |
|
|
18 |
#' \code{fid} an object returned by \link[survival]{survfit}. |
|
|
19 |
#' |
|
|
20 |
#' \code{xyrs.est} x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users. |
|
|
21 |
#' |
|
|
22 |
#' \code{overall.p} a nominal p.value calculated by Kaplain-Meier estimator with log-rank test. |
|
|
23 |
#' |
|
|
24 |
#' \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}). |
|
|
25 |
#' @import survival |
|
|
26 |
#' @import survminer |
|
|
27 |
#' @import ggplot2 |
|
|
28 |
#' @importFrom grDevices pdf dev.off pdf.options |
|
|
29 |
#' @importFrom ggpp geom_table |
|
|
30 |
#' @importFrom tibble tibble |
|
|
31 |
#' @export |
|
|
32 |
#' @examples # There is no example and please refer to vignette. |
|
|
33 |
compSurv <- function(moic.res = NULL, |
|
|
34 |
surv.info = NULL, |
|
|
35 |
convt.time = "d", |
|
|
36 |
surv.cut = NULL, |
|
|
37 |
xyrs.est = NULL, |
|
|
38 |
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"), |
|
|
39 |
p.adjust.method = "BH", |
|
|
40 |
surv.median.line = "none", |
|
|
41 |
fig.name = NULL, |
|
|
42 |
fig.path = getwd()){ |
|
|
43 |
|
|
|
44 |
if(!all(is.element(c("futime","fustat"), colnames(surv.info)))) { |
|
|
45 |
stop("fail to find variables of futime and fustat.") |
|
|
46 |
} |
|
|
47 |
|
|
|
48 |
if(!all(is.element(convt.time, c("d","m","y")))) { |
|
|
49 |
stop("unsupported time conversion. Allowed values contain c('d', 'm', 'y').") |
|
|
50 |
} |
|
|
51 |
|
|
|
52 |
# get common samples |
|
|
53 |
comsam <- intersect(rownames(surv.info),rownames(moic.res$clust.res)) |
|
|
54 |
mosurv.res <- cbind.data.frame(surv.info[comsam,c("futime","fustat")], |
|
|
55 |
moic.res$clust.res[comsam, "clust", drop = FALSE]) |
|
|
56 |
message(paste0("--a total of ",length(comsam), " samples are identified.")) |
|
|
57 |
|
|
|
58 |
# remove missing data if possible |
|
|
59 |
if(sum(c(is.na(mosurv.res$futime), is.na(mosurv.res$fustat))) > 0) { |
|
|
60 |
message("--removed missing values.") |
|
|
61 |
mosurv.res <- as.data.frame(na.omit(mosurv.res)) |
|
|
62 |
message(paste0("--leaving ",nrow(mosurv.res), " observations.")) |
|
|
63 |
} |
|
|
64 |
|
|
|
65 |
# check time unit |
|
|
66 |
if(max(mosurv.res$futime) < 365) { |
|
|
67 |
warning("it seems the 'futime' might not in [day] unit, please make sure you provide the correct survival information.") |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
# create new variable of Subtype |
|
|
71 |
mosurv.res$Subtype <- paste0("CS", mosurv.res$clust) |
|
|
72 |
mosurv.res <- mosurv.res[order(mosurv.res$Subtype),] |
|
|
73 |
|
|
|
74 |
# estimate x-year survival probability |
|
|
75 |
if(!is.null(xyrs.est)) { |
|
|
76 |
if(max(xyrs.est) * 365 < max(mosurv.res$futime)) { |
|
|
77 |
xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365) |
|
|
78 |
} else { |
|
|
79 |
stop("the maximal survival time is less than the time point you want to estimate!") |
|
|
80 |
} |
|
|
81 |
} else { |
|
|
82 |
xyrs <- "[Not Available]: argument of xyrs.est was not specified." |
|
|
83 |
} |
|
|
84 |
|
|
|
85 |
# convert survival time |
|
|
86 |
mosurv.res$futime <- switch(convt.time, |
|
|
87 |
"d" = mosurv.res$futime, |
|
|
88 |
"m" = round(mosurv.res$futime/30.5,4), |
|
|
89 |
"y" = round(mosurv.res$futime/365,4)) |
|
|
90 |
|
|
|
91 |
date.lab <- switch(convt.time, |
|
|
92 |
"d" = "Days", |
|
|
93 |
"m" = "Months", |
|
|
94 |
"y" = "Years") |
|
|
95 |
|
|
|
96 |
# truncate survival time |
|
|
97 |
if(date.lab == "Days" & max(mosurv.res$futime) > 3650) { |
|
|
98 |
#xlim = c(0, 3650) |
|
|
99 |
brk = 365 |
|
|
100 |
} |
|
|
101 |
if(date.lab == "Days" & max(mosurv.res$futime) <= 3650) { |
|
|
102 |
#xlim = c(0, max(mosurv.res$futime)) |
|
|
103 |
brk = floor(max(mosurv.res$futime)/10) |
|
|
104 |
} |
|
|
105 |
if(date.lab == "Months" & max(mosurv.res$futime) > 120) { |
|
|
106 |
#xlim = c(0, 120) |
|
|
107 |
brk = 12 |
|
|
108 |
} |
|
|
109 |
if(date.lab == "Months" & max(mosurv.res$futime) <= 120) { |
|
|
110 |
#xlim = c(0, max(mosurv.res$futime)) |
|
|
111 |
brk = floor(max(mosurv.res$futime)/10) |
|
|
112 |
} |
|
|
113 |
if(date.lab == "Years" & max(mosurv.res$futime) > 10) { |
|
|
114 |
#xlim = c(0, 10) |
|
|
115 |
brk = 1 |
|
|
116 |
} |
|
|
117 |
if(date.lab == "Years" & max(mosurv.res$futime) <= 10) { |
|
|
118 |
#xlim = c(0, max(mosurv.res$futime)) |
|
|
119 |
brk = 1 |
|
|
120 |
} |
|
|
121 |
|
|
|
122 |
if(is.null(surv.cut)) { |
|
|
123 |
xlim = c(0, max(mosurv.res$futime)) |
|
|
124 |
} else { |
|
|
125 |
message(paste0("--cut survival curve up to ",surv.cut," ",tolower(date.lab))) |
|
|
126 |
xlim = c(0, surv.cut) |
|
|
127 |
} |
|
|
128 |
|
|
|
129 |
|
|
|
130 |
# if(!is.null(surv.cut)) { |
|
|
131 |
# xlim = c(0, surv.cut) |
|
|
132 |
# brk = surv.cut/10 |
|
|
133 |
# } else {message("--cut survival curve up to 10 years.")} |
|
|
134 |
|
|
|
135 |
n.moic <- length(unique(mosurv.res$Subtype)) |
|
|
136 |
|
|
|
137 |
# basic survival analysis |
|
|
138 |
fitd <- survdiff(Surv(futime, fustat) ~ Subtype, |
|
|
139 |
data = mosurv.res, |
|
|
140 |
na.action = na.exclude) |
|
|
141 |
p.val <- 1 - pchisq(fitd$chisq, length(fitd$n) - 1) |
|
|
142 |
fit <- survfit(Surv(futime, fustat)~ Subtype, |
|
|
143 |
data = mosurv.res, |
|
|
144 |
type = "kaplan-meier", |
|
|
145 |
error = "greenwood", |
|
|
146 |
conf.type = "plain", |
|
|
147 |
na.action = na.exclude) |
|
|
148 |
|
|
|
149 |
# hack strata for better survival curve |
|
|
150 |
names(fit$strata) <- gsub("Subtype=", "", names(fit$strata)) |
|
|
151 |
|
|
|
152 |
# kaplan-meier curve |
|
|
153 |
p <- suppressWarnings(ggsurvplot(fit = fit, |
|
|
154 |
conf.int = FALSE, |
|
|
155 |
risk.table = TRUE, |
|
|
156 |
risk.table.col = "strata", |
|
|
157 |
palette = clust.col[1:n.moic], |
|
|
158 |
data = mosurv.res, |
|
|
159 |
size = 1, |
|
|
160 |
xlim = xlim, |
|
|
161 |
break.time.by = brk, |
|
|
162 |
legend.title = "", |
|
|
163 |
surv.median.line = surv.median.line, |
|
|
164 |
xlab = paste0("Time (",date.lab,")"), |
|
|
165 |
ylab = "Survival probability (%)", |
|
|
166 |
risk.table.y.text = FALSE)) |
|
|
167 |
|
|
|
168 |
# make survival time as percentage |
|
|
169 |
p$plot <- quiet(p$plot + scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0,100,25))) |
|
|
170 |
|
|
|
171 |
if(n.moic > 2) { |
|
|
172 |
|
|
|
173 |
# add nominal pvalue for log-rank test |
|
|
174 |
p.lab <- paste0("Overall P", |
|
|
175 |
ifelse(p.val < 0.001, " < 0.001", |
|
|
176 |
paste0(" = ",round(p.val, 3)))) |
|
|
177 |
|
|
|
178 |
p$plot <- p$plot + annotate("text", |
|
|
179 |
x = 0, y = 0.55, |
|
|
180 |
hjust = 0, |
|
|
181 |
fontface = 4, |
|
|
182 |
label = p.lab) |
|
|
183 |
|
|
|
184 |
# calculate pair-wise survival comparison |
|
|
185 |
ps <- pairwise_survdiff(Surv(futime, fustat)~ Subtype, |
|
|
186 |
data = mosurv.res, |
|
|
187 |
p.adjust.method = p.adjust.method) |
|
|
188 |
|
|
|
189 |
# add pair-wise comparison table |
|
|
190 |
# options(stringsAsFactors = FALSE) |
|
|
191 |
addTab <- as.data.frame(as.matrix(ifelse(round(ps$p.value, 3) < 0.001, "<0.001", |
|
|
192 |
round(ps$p.value, 3)))) |
|
|
193 |
addTab[is.na(addTab)] <- "-" |
|
|
194 |
# options(stringsAsFactors = TRUE) |
|
|
195 |
|
|
|
196 |
df <- tibble(x = 0, y = 0, tb = list(addTab)) |
|
|
197 |
p$plot <- p$plot + geom_table(data = df, aes(x = x, y = y, label = tb), table.rownames = TRUE) |
|
|
198 |
|
|
|
199 |
} else { |
|
|
200 |
# add nominal pvalue for log-rank test |
|
|
201 |
p.lab <- paste0("P", |
|
|
202 |
ifelse(p.val < 0.001, " < 0.001", |
|
|
203 |
paste0(" = ",round(p.val, 3)))) |
|
|
204 |
|
|
|
205 |
p$plot <- p$plot + annotate("text", |
|
|
206 |
x = 0, y = 0.55, |
|
|
207 |
hjust = 0, |
|
|
208 |
fontface = 4, |
|
|
209 |
label = p.lab) |
|
|
210 |
} |
|
|
211 |
|
|
|
212 |
# output curve to pdf |
|
|
213 |
if(!is.null(fig.name)) { |
|
|
214 |
outFile <- file.path(fig.path,paste0(fig.name,".pdf")) |
|
|
215 |
} else { |
|
|
216 |
outFile <- file.path(fig.path,paste0("km_curve_",moic.res$mo.method,".pdf")) |
|
|
217 |
} |
|
|
218 |
pdf.options(reset = TRUE, onefile = FALSE) |
|
|
219 |
pdf(outFile, width = 6, height = 7) |
|
|
220 |
# ggsave(outFile, width = 6, height = 7) |
|
|
221 |
print(p) |
|
|
222 |
dev.off() |
|
|
223 |
|
|
|
224 |
# output curve to screen |
|
|
225 |
print(p) |
|
|
226 |
|
|
|
227 |
if(n.moic > 2) { |
|
|
228 |
return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps)) |
|
|
229 |
} else { |
|
|
230 |
return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val)) |
|
|
231 |
} |
|
|
232 |
} |