a b/QuRNom_construction.Rmd
1
---
2
title: "Nomogram"
3
author: "Pranjal"
4
date: "2/21/2019"
5
output: html_document
6
---
7
8
```{r setup, include=FALSE}
9
knitr::opts_chunk$set(echo = TRUE)
10
```
11
## Initial Setup and Package Loads in R 
12
13
Packages used for the analysis.
14
```{r initial_setup, cache=FALSE, message = FALSE, warning = FALSE}
15
library(glmnet);library(survival);library(survminer);library(readxl);library(ggplot2); library(GGally)
16
library(knitr); library(rmdformats); library(magrittr)
17
library(skimr); library(Hmisc); library(Epi); library(vcd)
18
library(tidyverse) 
19
20
## Global options
21
22
options(max.print="75")
23
opts_chunk$set(comment=NA,
24
               message=FALSE,
25
               warning=FALSE)
26
opts_knit$set(width=75)
27
28
29
skimr::skim_with(numeric = list(hist = NULL),
30
                 integer = list(hist = NULL))
31
```
32
33
## Loading the Raw Data into R 
34
35
Loading raw dataset into R.
36
37
Training Data from CCF with minimum and max. survival time.
38
```{r train_set}
39
train <- read.csv("dataset_train.csv")
40
#train <- na.omit(train1)
41
42
```
43
44
45
```{r}
46
train$group1 <- ifelse(train$time>=60,'long','short')
47
```
48
```{r}
49
colSums(is.na(train))
50
```
51
52
```{r}
53
complete_train <- train[complete.cases(train), ]
54
```
55
56
```{r}
57
train2 <-  with(complete_train, subset(complete_train, !complete_train$Cases %in% complete_train$Cases[therapy == 1]))
58
59
```
60
61
```{r}
62
#train1 <- train2[complete.cases(train2), ]
63
train1 <- complete_train
64
```
65
```{r}
66
res.cox <-  coxph(Surv(time, status) ~ QuRiS +  stage, data = train1)
67
summary(res.cox)
68
```
69
70
71
```{r}
72
library("rms")
73
library("survey")
74
library("SvyNom")
75
76
dd <- datadist(train1)
77
options(datadist = "dd")
78
79
dstr2 <- svydesign(id = ~1, strata = ~group1 , data = train1)
80
81
mynom <- svycox.nomogram(.design = dstr2, .model =
82
Surv(time, status) ~  QuRiS + stage, .data = train1, pred.at = 60, fun.lab = "5-Year DFS")  
83
84
```
85
86
```{r}
87
plot(mynom$nomog)
88
```
89
90
```{r}
91
svycox.calibrate(mynom)
92
```
93
94
95
```{r}
96
mynom[["nomog"]]
97
```
98
99
```{r}
100
ppp <- mynom$nomog$`3-Year DFS`
101
```
102
```{r}
103
threshold2 <- ppp$x[6]
104
threshold3 <- ppp$x[4]
105
threshold4 <- ppp$x[1]
106
threshold2
107
threshold3
108
threshold4
109
```
110
111
```{r}
112
varnames <- c("stage", "QuRiS")
113
```
114
115
116
```{r}
117
kk1 <- complete_train[,varnames]
118
two <- mynom$nomog$QuRiS$points
119
sig <- mynom$nomog$QuRiS$QuRiS
120
four <- mynom$nomog$stage$points
121
```
122
123
```{r}
124
for (i in 1:(length(two)-1)){
125
kk1$Rec1 <- ifelse(kk1$QuRiS > sig[i], two[i+1],kk1$Rec1)
126
}
127
```
128
129
```{r}
130
kk1$stage <- as.character(kk1$stage)
131
kk1$stage[kk1$stage == "stage1"] <- four[1]
132
kk1$stage[kk1$stage == "stage2"] <- four[2]
133
```
134
135
136
```{r}
137
varnames <- c("stage", "Rec1")
138
MAIN <- kk1[, varnames]
139
store <- data.matrix(MAIN)
140
POINTS <- rowSums(store)
141
```
142
```{r}
143
complete_train <- cbind(complete_train, POINTS)
144
```
145
146
147
```{r}
148
149
complete_train$Condition <- 2
150
complete_train$Condition[complete_train$POINTS <= threshold3 & complete_train$POINTS > threshold2] <- 3
151
complete_train$Condition[complete_train$POINTS <= threshold4 & complete_train$POINTS > threshold3] <- 4
152
complete_train$Condition[complete_train$POINTS > threshold4] <- 5
153
154
```
155
156
```{r}
157
x.sub <- subset(complete_train, Condition == 5)
158
dim(x.sub)
159
aggregate(x.sub[,], list(x.sub$therapy), mean)$time
160
```
161
162
```{r}
163
fit2 <- survfit(Surv(time, status) ~ therapy, data = x.sub)
164
165
166
ggsurvplot(
167
   fit2,                     # survfit object with calculated statistics.
168
   data = x.sub,             # data used to fit survival curves.
169
   size = 1.2,
170
 #  palette = c("#000099",  "#FFFF00", "#CC0000"),
171
   risk.table = TRUE,       # show risk table.
172
   pval = TRUE,             # show p-value of log-rank test.
173
   xlab = "Time in months"   # customize X axis label.
174
175
)
176
```
177
178
179
```{r}
180
variables <- c("therapy")
181
univ_formulas <- sapply(variables,
182
                        function(x) as.formula(paste('Surv(time, status)~', x)))
183
184
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)})
185
# Extract data 
186
univ_results <- lapply(univ_models,
187
                       function(x){ 
188
                         x <- summary(x)
189
                         p.value<-signif(x$wald["pvalue"], digits=2)
190
                         wald.test<-signif(x$wald["test"], digits=2)
191
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
192
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
193
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
194
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
195
                         HR <- paste0(HR, " (", 
196
                                      HR.confint.lower, "-", HR.confint.upper, ")")
197
                         res<-c(beta, HR, wald.test, p.value)
198
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
199
                                       "p.value")
200
                         return(res)
201
                         #return(exp(cbind(coef(x),confint(x))))
202
                       })
203
res <- t(as.data.frame(univ_results, check.names = FALSE))
204
as.data.frame(res)
205
```
206
207
208
209
```{r}
210
x.sub <- subset(complete_train, Condition == 4)
211
dim(x.sub)
212
aggregate(x.sub[,], list(x.sub$therapy), mean)$time
213
```
214
215
```{r}
216
variables <- c("therapy")
217
univ_formulas <- sapply(variables,
218
                        function(x) as.formula(paste('Surv(time, status)~', x)))
219
220
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)})
221
# Extract data 
222
univ_results <- lapply(univ_models,
223
                       function(x){ 
224
                         x <- summary(x)
225
                         p.value<-signif(x$wald["pvalue"], digits=2)
226
                         wald.test<-signif(x$wald["test"], digits=2)
227
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
228
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
229
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
230
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
231
                         HR <- paste0(HR, " (", 
232
                                      HR.confint.lower, "-", HR.confint.upper, ")")
233
                         res<-c(beta, HR, wald.test, p.value)
234
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
235
                                       "p.value")
236
                         return(res)
237
                         #return(exp(cbind(coef(x),confint(x))))
238
                       })
239
res <- t(as.data.frame(univ_results, check.names = FALSE))
240
as.data.frame(res)
241
```
242
243
```{r}
244
x.sub <- subset(complete_train, Condition == 3)
245
dim(x.sub)
246
aggregate(x.sub[,], list(x.sub$therapy), mean)$time
247
```
248
249
```{r}
250
variables <- c("therapy")
251
univ_formulas <- sapply(variables,
252
                        function(x) as.formula(paste('Surv(time, status)~', x)))
253
254
univ_models <- lapply(univ_formulas, function(x){coxph(x, data = x.sub)})
255
# Extract data 
256
univ_results <- lapply(univ_models,
257
                       function(x){ 
258
                         x <- summary(x)
259
                         p.value<-signif(x$wald["pvalue"], digits=2)
260
                         wald.test<-signif(x$wald["test"], digits=2)
261
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
262
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
263
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
264
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
265
                         HR <- paste0(HR, " (", 
266
                                      HR.confint.lower, "-", HR.confint.upper, ")")
267
                         res<-c(beta, HR, wald.test, p.value)
268
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
269
                                       "p.value")
270
                         return(res)
271
                         #return(exp(cbind(coef(x),confint(x))))
272
                       })
273
res <- t(as.data.frame(univ_results, check.names = FALSE))
274
as.data.frame(res)
275
```
276
277
```{r}
278
x.sub <- subset(complete_train, Condition == 2)
279
dim(x.sub)
280
aggregate(x.sub[,], list(x.sub$therapy), mean)$time
281
```
282
283
284
```{r}
285
library(forestplot)
286
# Cochrane data from the 'rmeta'-package
287
cochrane_from_rmeta <- 
288
  structure(list(
289
    mean  = c(NA, NA, 0.13, 1.1, 2.3, 1.5),
290
    lower = c(NA, NA, 0.99, 0.1, 0.5,0.33),
291
    upper = c(NA, NA, 0.004, 8.4, 10,6.5)),
292
    .Names = c("mean", "lower", "upper"), 
293
    row.names = c(NA, -11L), 
294
    class = "data.frame")
295
296
tabletext<-cbind(
297
 # c("", "Study", "Surv.Prob < 0.20", "0.20 < Surv.Prob <0.40",  "0.40 < Surv.Prob < 0.60", "0.60 < Surv.Prob < 0.80", "0.80 < Surv. Prob."),
298
  c("", "Estimated Survival-Benefit","<20%",">20% & <40%", ">40% & <20%", ">70%"),
299
  c("Treatment", "Chemo", "37.11","43.97", "20.34", "38.54"),
300
  c("Treatment","Non-Chemo","11.94","28.08","47.94","43.57"),
301
  c("", "P-Value", "<0.01", "0.98", "0.29", "0.62"))
302
303
forestplot(tabletext,
304
                   cochrane_from_rmeta$mean,
305
                   cochrane_from_rmeta$lower,
306
                   cochrane_from_rmeta$upper,new_page = TRUE,
307
           txt_gp = fpTxtGp(label = gpar(fontfamily = "Arial"), cex = 0.8),
308
           hrzl_lines = gpar(col="#444444"),
309
           graph.pos = 3,
310
          colgap=unit(c(2),"mm"),
311
          lineheight = unit(c(15),"mm"),
312
      #     cochrane_from_rmeta,new_page = TRUE,
313
           is.summary=c(TRUE,TRUE,rep(FALSE,4)),
314
           align = c("c", "c", "c", "c"),
315
           clip=c(0.0004,10), 
316
           xlog=TRUE,
317
      
318
           col=fpColors(box="red",line="darkred", hrz_lines = "#444444"),
319
           vertices=FALSE,
320
          fn.ci_norm=c("fpDrawDiamondCI"),
321
                 boxsize=0.2,
322
           cex.axis=8,
323
          xlab="Hazard Ratios")
324
325
x <- unit(0.5, 'npc'); y <- unit(.96, 'npc')
326
grid.text('D3:Disease Free Survival', x, y, gp = gpar(fontsize=15, font=2))
327
328
# Fix manually the position of the HR numbers
329
#xhr <- unit(0.68, 'npc'); yhr <- unit(0.65, 'npc')
330
#grid.text('0.09', xhr, yhr, gp = gpar(fontsize=10, font=1))
331
332
#xh2r <- unit(0.75, 'npc'); yh2r <- unit(0.52, 'npc')
333
#grid.text('0.19', xh2r, yh2r, gp = #gpar(fontsize=10, font=1))
334
335
#xl2r <- unit(0.80, 'npc'); yl2r <- unit(0.38, 'npc')
336
#grid.text('0.57', xl2r, yl2r, gp = gpar(fontsize=10, font=1))
337
338
#xlr <- unit(0.82, 'npc'); ylr <- unit(0.25, 'npc')
339
#grid.text('1.9', xlr, ylr, gp = gpar(fontsize=10, font=1))
340
341
#xl2r <- unit(0.92, 'npc'); yl2r <- unit(0.25, 'npc')
342
#grid.text('13', xl2r, yl2r, gp = gpar(fontsize=10, font=1))
343
344
```