|
a |
|
b/QuRNom_construction.Rmd |
|
|
1 |
--- |
|
|
2 |
title: "Nomogram" |
|
|
3 |
author: "Pranjal" |
|
|
4 |
date: "2/21/2019" |
|
|
5 |
output: html_document |
|
|
6 |
--- |
|
|
7 |
|
|
|
8 |
```{r setup, include=FALSE} |
|
|
9 |
knitr::opts_chunk$set(echo = TRUE) |
|
|
10 |
``` |
|
|
11 |
## Initial Setup and Package Loads in R |
|
|
12 |
|
|
|
13 |
Packages used for the analysis. |
|
|
14 |
```{r initial_setup, cache=FALSE, message = FALSE, warning = FALSE} |
|
|
15 |
library(glmnet);library(survival);library(survminer);library(readxl);library(ggplot2); library(GGally) |
|
|
16 |
library(knitr); library(rmdformats); library(magrittr) |
|
|
17 |
library(skimr); library(Hmisc); library(Epi); library(vcd) |
|
|
18 |
library(tidyverse) |
|
|
19 |
|
|
|
20 |
## Global options |
|
|
21 |
|
|
|
22 |
options(max.print="75") |
|
|
23 |
opts_chunk$set(comment=NA, |
|
|
24 |
message=FALSE, |
|
|
25 |
warning=FALSE) |
|
|
26 |
opts_knit$set(width=75) |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
skimr::skim_with(numeric = list(hist = NULL), |
|
|
30 |
integer = list(hist = NULL)) |
|
|
31 |
``` |
|
|
32 |
|
|
|
33 |
## Loading the Raw Data into R |
|
|
34 |
|
|
|
35 |
Loading raw dataset into R. |
|
|
36 |
|
|
|
37 |
Training Data from CCF with minimum and max. survival time. |
|
|
38 |
```{r train_set} |
|
|
39 |
train <- read.csv("dataset_train.csv") |
|
|
40 |
#train <- na.omit(train1) |
|
|
41 |
|
|
|
42 |
``` |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
```{r} |
|
|
46 |
train$group1 <- ifelse(train$time>=60,'long','short') |
|
|
47 |
``` |
|
|
48 |
```{r} |
|
|
49 |
colSums(is.na(train)) |
|
|
50 |
``` |
|
|
51 |
|
|
|
52 |
```{r} |
|
|
53 |
complete_train <- train[complete.cases(train), ] |
|
|
54 |
``` |
|
|
55 |
|
|
|
56 |
```{r} |
|
|
57 |
train2 <- with(complete_train, subset(complete_train, !complete_train$Cases %in% complete_train$Cases[therapy == 1])) |
|
|
58 |
|
|
|
59 |
``` |
|
|
60 |
|
|
|
61 |
```{r} |
|
|
62 |
#train1 <- train2[complete.cases(train2), ] |
|
|
63 |
train1 <- complete_train |
|
|
64 |
``` |
|
|
65 |
```{r} |
|
|
66 |
res.cox <- coxph(Surv(time, status) ~ QuRiS + stage, data = train1) |
|
|
67 |
summary(res.cox) |
|
|
68 |
``` |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
```{r} |
|
|
72 |
library("rms") |
|
|
73 |
library("survey") |
|
|
74 |
library("SvyNom") |
|
|
75 |
|
|
|
76 |
dd <- datadist(train1) |
|
|
77 |
options(datadist = "dd") |
|
|
78 |
|
|
|
79 |
dstr2 <- svydesign(id = ~1, strata = ~group1 , data = train1) |
|
|
80 |
|
|
|
81 |
mynom <- svycox.nomogram(.design = dstr2, .model = |
|
|
82 |
Surv(time, status) ~ QuRiS + stage, .data = train1, pred.at = 60, fun.lab = "5-Year DFS") |
|
|
83 |
|
|
|
84 |
``` |
|
|
85 |
|
|
|
86 |
```{r} |
|
|
87 |
plot(mynom$nomog) |
|
|
88 |
``` |
|
|
89 |
|
|
|
90 |
```{r} |
|
|
91 |
svycox.calibrate(mynom) |
|
|
92 |
``` |
|
|
93 |
|
|
|
94 |
|
|
|
95 |
```{r} |
|
|
96 |
mynom[["nomog"]] |
|
|
97 |
``` |
|
|
98 |
|
|
|
99 |
```{r} |
|
|
100 |
ppp <- mynom$nomog$`3-Year DFS` |
|
|
101 |
``` |
|
|
102 |
```{r} |
|
|
103 |
threshold2 <- ppp$x[6] |
|
|
104 |
threshold3 <- ppp$x[4] |
|
|
105 |
threshold4 <- ppp$x[1] |
|
|
106 |
threshold2 |
|
|
107 |
threshold3 |
|
|
108 |
threshold4 |
|
|
109 |
``` |
|
|
110 |
|
|
|
111 |
```{r} |
|
|
112 |
varnames <- c("stage", "QuRiS") |
|
|
113 |
``` |
|
|
114 |
|
|
|
115 |
|
|
|
116 |
```{r} |
|
|
117 |
kk1 <- complete_train[,varnames] |
|
|
118 |
two <- mynom$nomog$QuRiS$points |
|
|
119 |
sig <- mynom$nomog$QuRiS$QuRiS |
|
|
120 |
four <- mynom$nomog$stage$points |
|
|
121 |
``` |
|
|
122 |
|
|
|
123 |
```{r} |
|
|
124 |
for (i in 1:(length(two)-1)){ |
|
|
125 |
kk1$Rec1 <- ifelse(kk1$QuRiS > sig[i], two[i+1],kk1$Rec1) |
|
|
126 |
} |
|
|
127 |
``` |
|
|
128 |
|
|
|
129 |
```{r} |
|
|
130 |
kk1$stage <- as.character(kk1$stage) |
|
|
131 |
kk1$stage[kk1$stage == "stage1"] <- four[1] |
|
|
132 |
kk1$stage[kk1$stage == "stage2"] <- four[2] |
|
|
133 |
``` |
|
|
134 |
|
|
|
135 |
|
|
|
136 |
```{r} |
|
|
137 |
varnames <- c("stage", "Rec1") |
|
|
138 |
MAIN <- kk1[, varnames] |
|
|
139 |
store <- data.matrix(MAIN) |
|
|
140 |
POINTS <- rowSums(store) |
|
|
141 |
``` |
|
|
142 |
```{r} |
|
|
143 |
complete_train <- cbind(complete_train, POINTS) |
|
|
144 |
``` |
|
|
145 |
|
|
|
146 |
|
|
|
147 |
```{r} |
|
|
148 |
|
|
|
149 |
complete_train$Condition <- 2 |
|
|
150 |
complete_train$Condition[complete_train$POINTS <= threshold3 & complete_train$POINTS > threshold2] <- 3 |
|
|
151 |
complete_train$Condition[complete_train$POINTS <= threshold4 & complete_train$POINTS > threshold3] <- 4 |
|
|
152 |
complete_train$Condition[complete_train$POINTS > threshold4] <- 5 |
|
|
153 |
|
|
|
154 |
``` |
|
|
155 |
|
|
|
156 |
```{r} |
|
|
157 |
x.sub <- subset(complete_train, Condition == 5) |
|
|
158 |
dim(x.sub) |
|
|
159 |
aggregate(x.sub[,], list(x.sub$therapy), mean)$time |
|
|
160 |
``` |
|
|
161 |
|
|
|
162 |
```{r} |
|
|
163 |
fit2 <- survfit(Surv(time, status) ~ therapy, data = x.sub) |
|
|
164 |
|
|
|
165 |
|
|
|
166 |
ggsurvplot( |
|
|
167 |
fit2, # survfit object with calculated statistics. |
|
|
168 |
data = x.sub, # data used to fit survival curves. |
|
|
169 |
size = 1.2, |
|
|
170 |
# palette = c("#000099", "#FFFF00", "#CC0000"), |
|
|
171 |
risk.table = TRUE, # show risk table. |
|
|
172 |
pval = TRUE, # show p-value of log-rank test. |
|
|
173 |
xlab = "Time in months" # customize X axis label. |
|
|
174 |
|
|
|
175 |
) |
|
|
176 |
``` |
|
|
177 |
|
|
|
178 |
|
|
|
179 |
```{r} |
|
|
180 |
variables <- c("therapy") |
|
|
181 |
univ_formulas <- sapply(variables, |
|
|
182 |
function(x) as.formula(paste('Surv(time, status)~', x))) |
|
|
183 |
|
|
|
184 |
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)}) |
|
|
185 |
# Extract data |
|
|
186 |
univ_results <- lapply(univ_models, |
|
|
187 |
function(x){ |
|
|
188 |
x <- summary(x) |
|
|
189 |
p.value<-signif(x$wald["pvalue"], digits=2) |
|
|
190 |
wald.test<-signif(x$wald["test"], digits=2) |
|
|
191 |
beta<-signif(x$coef[1], digits=2);#coeficient beta |
|
|
192 |
HR <-signif(x$coef[2], digits=2);#exp(beta) |
|
|
193 |
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) |
|
|
194 |
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) |
|
|
195 |
HR <- paste0(HR, " (", |
|
|
196 |
HR.confint.lower, "-", HR.confint.upper, ")") |
|
|
197 |
res<-c(beta, HR, wald.test, p.value) |
|
|
198 |
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", |
|
|
199 |
"p.value") |
|
|
200 |
return(res) |
|
|
201 |
#return(exp(cbind(coef(x),confint(x)))) |
|
|
202 |
}) |
|
|
203 |
res <- t(as.data.frame(univ_results, check.names = FALSE)) |
|
|
204 |
as.data.frame(res) |
|
|
205 |
``` |
|
|
206 |
|
|
|
207 |
|
|
|
208 |
|
|
|
209 |
```{r} |
|
|
210 |
x.sub <- subset(complete_train, Condition == 4) |
|
|
211 |
dim(x.sub) |
|
|
212 |
aggregate(x.sub[,], list(x.sub$therapy), mean)$time |
|
|
213 |
``` |
|
|
214 |
|
|
|
215 |
```{r} |
|
|
216 |
variables <- c("therapy") |
|
|
217 |
univ_formulas <- sapply(variables, |
|
|
218 |
function(x) as.formula(paste('Surv(time, status)~', x))) |
|
|
219 |
|
|
|
220 |
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)}) |
|
|
221 |
# Extract data |
|
|
222 |
univ_results <- lapply(univ_models, |
|
|
223 |
function(x){ |
|
|
224 |
x <- summary(x) |
|
|
225 |
p.value<-signif(x$wald["pvalue"], digits=2) |
|
|
226 |
wald.test<-signif(x$wald["test"], digits=2) |
|
|
227 |
beta<-signif(x$coef[1], digits=2);#coeficient beta |
|
|
228 |
HR <-signif(x$coef[2], digits=2);#exp(beta) |
|
|
229 |
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) |
|
|
230 |
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) |
|
|
231 |
HR <- paste0(HR, " (", |
|
|
232 |
HR.confint.lower, "-", HR.confint.upper, ")") |
|
|
233 |
res<-c(beta, HR, wald.test, p.value) |
|
|
234 |
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", |
|
|
235 |
"p.value") |
|
|
236 |
return(res) |
|
|
237 |
#return(exp(cbind(coef(x),confint(x)))) |
|
|
238 |
}) |
|
|
239 |
res <- t(as.data.frame(univ_results, check.names = FALSE)) |
|
|
240 |
as.data.frame(res) |
|
|
241 |
``` |
|
|
242 |
|
|
|
243 |
```{r} |
|
|
244 |
x.sub <- subset(complete_train, Condition == 3) |
|
|
245 |
dim(x.sub) |
|
|
246 |
aggregate(x.sub[,], list(x.sub$therapy), mean)$time |
|
|
247 |
``` |
|
|
248 |
|
|
|
249 |
```{r} |
|
|
250 |
variables <- c("therapy") |
|
|
251 |
univ_formulas <- sapply(variables, |
|
|
252 |
function(x) as.formula(paste('Surv(time, status)~', x))) |
|
|
253 |
|
|
|
254 |
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)}) |
|
|
255 |
# Extract data |
|
|
256 |
univ_results <- lapply(univ_models, |
|
|
257 |
function(x){ |
|
|
258 |
x <- summary(x) |
|
|
259 |
p.value<-signif(x$wald["pvalue"], digits=2) |
|
|
260 |
wald.test<-signif(x$wald["test"], digits=2) |
|
|
261 |
beta<-signif(x$coef[1], digits=2);#coeficient beta |
|
|
262 |
HR <-signif(x$coef[2], digits=2);#exp(beta) |
|
|
263 |
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2) |
|
|
264 |
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2) |
|
|
265 |
HR <- paste0(HR, " (", |
|
|
266 |
HR.confint.lower, "-", HR.confint.upper, ")") |
|
|
267 |
res<-c(beta, HR, wald.test, p.value) |
|
|
268 |
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", |
|
|
269 |
"p.value") |
|
|
270 |
return(res) |
|
|
271 |
#return(exp(cbind(coef(x),confint(x)))) |
|
|
272 |
}) |
|
|
273 |
res <- t(as.data.frame(univ_results, check.names = FALSE)) |
|
|
274 |
as.data.frame(res) |
|
|
275 |
``` |
|
|
276 |
|
|
|
277 |
```{r} |
|
|
278 |
x.sub <- subset(complete_train, Condition == 2) |
|
|
279 |
dim(x.sub) |
|
|
280 |
aggregate(x.sub[,], list(x.sub$therapy), mean)$time |
|
|
281 |
``` |
|
|
282 |
|
|
|
283 |
|
|
|
284 |
```{r} |
|
|
285 |
library(forestplot) |
|
|
286 |
# Cochrane data from the 'rmeta'-package |
|
|
287 |
cochrane_from_rmeta <- |
|
|
288 |
structure(list( |
|
|
289 |
mean = c(NA, NA, 0.13, 1.1, 2.3, 1.5), |
|
|
290 |
lower = c(NA, NA, 0.99, 0.1, 0.5,0.33), |
|
|
291 |
upper = c(NA, NA, 0.004, 8.4, 10,6.5)), |
|
|
292 |
.Names = c("mean", "lower", "upper"), |
|
|
293 |
row.names = c(NA, -11L), |
|
|
294 |
class = "data.frame") |
|
|
295 |
|
|
|
296 |
tabletext<-cbind( |
|
|
297 |
# c("", "Study", "Surv.Prob < 0.20", "0.20 < Surv.Prob <0.40", "0.40 < Surv.Prob < 0.60", "0.60 < Surv.Prob < 0.80", "0.80 < Surv. Prob."), |
|
|
298 |
c("", "Estimated Survival-Benefit","<20%",">20% & <40%", ">40% & <20%", ">70%"), |
|
|
299 |
c("Treatment", "Chemo", "37.11","43.97", "20.34", "38.54"), |
|
|
300 |
c("Treatment","Non-Chemo","11.94","28.08","47.94","43.57"), |
|
|
301 |
c("", "P-Value", "<0.01", "0.98", "0.29", "0.62")) |
|
|
302 |
|
|
|
303 |
forestplot(tabletext, |
|
|
304 |
cochrane_from_rmeta$mean, |
|
|
305 |
cochrane_from_rmeta$lower, |
|
|
306 |
cochrane_from_rmeta$upper,new_page = TRUE, |
|
|
307 |
txt_gp = fpTxtGp(label = gpar(fontfamily = "Arial"), cex = 0.8), |
|
|
308 |
hrzl_lines = gpar(col="#444444"), |
|
|
309 |
graph.pos = 3, |
|
|
310 |
colgap=unit(c(2),"mm"), |
|
|
311 |
lineheight = unit(c(15),"mm"), |
|
|
312 |
# cochrane_from_rmeta,new_page = TRUE, |
|
|
313 |
is.summary=c(TRUE,TRUE,rep(FALSE,4)), |
|
|
314 |
align = c("c", "c", "c", "c"), |
|
|
315 |
clip=c(0.0004,10), |
|
|
316 |
xlog=TRUE, |
|
|
317 |
|
|
|
318 |
col=fpColors(box="red",line="darkred", hrz_lines = "#444444"), |
|
|
319 |
vertices=FALSE, |
|
|
320 |
fn.ci_norm=c("fpDrawDiamondCI"), |
|
|
321 |
boxsize=0.2, |
|
|
322 |
cex.axis=8, |
|
|
323 |
xlab="Hazard Ratios") |
|
|
324 |
|
|
|
325 |
x <- unit(0.5, 'npc'); y <- unit(.96, 'npc') |
|
|
326 |
grid.text('D3:Disease Free Survival', x, y, gp = gpar(fontsize=15, font=2)) |
|
|
327 |
|
|
|
328 |
# Fix manually the position of the HR numbers |
|
|
329 |
#xhr <- unit(0.68, 'npc'); yhr <- unit(0.65, 'npc') |
|
|
330 |
#grid.text('0.09', xhr, yhr, gp = gpar(fontsize=10, font=1)) |
|
|
331 |
|
|
|
332 |
#xh2r <- unit(0.75, 'npc'); yh2r <- unit(0.52, 'npc') |
|
|
333 |
#grid.text('0.19', xh2r, yh2r, gp = #gpar(fontsize=10, font=1)) |
|
|
334 |
|
|
|
335 |
#xl2r <- unit(0.80, 'npc'); yl2r <- unit(0.38, 'npc') |
|
|
336 |
#grid.text('0.57', xl2r, yl2r, gp = gpar(fontsize=10, font=1)) |
|
|
337 |
|
|
|
338 |
#xlr <- unit(0.82, 'npc'); ylr <- unit(0.25, 'npc') |
|
|
339 |
#grid.text('1.9', xlr, ylr, gp = gpar(fontsize=10, font=1)) |
|
|
340 |
|
|
|
341 |
#xl2r <- unit(0.92, 'npc'); yl2r <- unit(0.25, 'npc') |
|
|
342 |
#grid.text('13', xl2r, yl2r, gp = gpar(fontsize=10, font=1)) |
|
|
343 |
|
|
|
344 |
``` |