|
a |
|
b/Scripts/PredictIncoming.R |
|
|
1 |
############################################ |
|
|
2 |
### PREDICTING INCOMING PATIENT SURVIVAL ### |
|
|
3 |
############################################ |
|
|
4 |
|
|
|
5 |
### Method |
|
|
6 |
|
|
|
7 |
## INPUT: |
|
|
8 |
# 1) Set of mutated genes : All genes not inputed in string will be considered as wild type (from App). |
|
|
9 |
# 2) LASSO runs : All refited (forced beta parameters) cox models |
|
|
10 |
# 3) Set of covariates : All covariates available can be edited and added to the model |
|
|
11 |
# 4) The character vector list of genes available (return colnames of data in GetResults function) |
|
|
12 |
# 5) Refitted model with clinical covariates |
|
|
13 |
|
|
|
14 |
## METHOD : |
|
|
15 |
# Iterate through all the runs, using the coefficients found in the LASSO models generate a predicted |
|
|
16 |
# risk score for the incoming patient. Use the predicted risk score in a refitted coxph model with the |
|
|
17 |
# given clinical covariates. |
|
|
18 |
|
|
|
19 |
## OUTPUT : |
|
|
20 |
# 1) Survival curve for the patients inputed |
|
|
21 |
# 2) Summary Table |
|
|
22 |
|
|
|
23 |
### TEST |
|
|
24 |
mutGenes <- c("alk","STK11","TP53") |
|
|
25 |
clinical <- c("Age") |
|
|
26 |
ClinRefit <- FirstRun$ClinRefit |
|
|
27 |
time.type <- "Months" |
|
|
28 |
MD <- 12 |
|
|
29 |
LassoFits <- as.matrix(FirstRun$LassoFits) |
|
|
30 |
RiskScore <- FirstRun$average.risk |
|
|
31 |
means.train <- means.train |
|
|
32 |
|
|
|
33 |
predictIncomingPatient <- function(mutGenes,clinical,ClinRefit,time.type,MD,LassoFits,RiskScore,means.train){ |
|
|
34 |
|
|
|
35 |
#if(length(mutGenes) == 0){stop("INPUT ERROR : Enter at least one gene name")} |
|
|
36 |
#library(plotly) |
|
|
37 |
### Step 1 : Build the dataset ### |
|
|
38 |
## Gene mutation data |
|
|
39 |
# create frame |
|
|
40 |
mut <- as.data.frame(matrix(0L,nrow=1,ncol=ncol(LassoFits))) |
|
|
41 |
colnames(mut) <- colnames(LassoFits) |
|
|
42 |
rownames(mut) <- c("Patient") |
|
|
43 |
|
|
|
44 |
if(length(mutGenes) > 0){ |
|
|
45 |
mutGenes <- gsub(" ","",mutGenes)} |
|
|
46 |
|
|
|
47 |
# Check available genes |
|
|
48 |
if(anyNA(match(mutGenes,colnames(mut)))){stop(paste("The mutated gene you have inputed :", |
|
|
49 |
mutGenes[which(is.na((match(mutGenes,colnames(mut)))))], |
|
|
50 |
"is not available at the moment") )} |
|
|
51 |
mut[,match(mutGenes,colnames(mut))] <- 1 |
|
|
52 |
|
|
|
53 |
## Mutation data complete |
|
|
54 |
|
|
|
55 |
## Clinical |
|
|
56 |
### NEED A WHOLE PART FROM THE APP HERE TRANSFORMING TO VALUES IN THE DATA ### |
|
|
57 |
|
|
|
58 |
## Create frame |
|
|
59 |
avClinNames <- names(ClinRefit$coefficients)[-length(names(ClinRefit$coefficients))] |
|
|
60 |
clin <- as.data.frame(matrix(0L,nrow=1,ncol=length(avClinNames))) |
|
|
61 |
colnames(clin) <- avClinNames |
|
|
62 |
rownames(clin) <- "Patient" |
|
|
63 |
|
|
|
64 |
# match input from the app |
|
|
65 |
clin[,match(clinical,colnames(clin))] <- 1 |
|
|
66 |
|
|
|
67 |
### Part 2 : Get the predicted risk score |
|
|
68 |
LassoFits <- as.matrix(LassoFits) |
|
|
69 |
mut <- as.vector(mut) |
|
|
70 |
|
|
|
71 |
all.pred <- lapply(1:nrow(LassoFits),function(x){ |
|
|
72 |
|
|
|
73 |
### Subset to the coefs of that cv ### |
|
|
74 |
coefs <- LassoFits[x,LassoFits[x,] != 0] |
|
|
75 |
new.temp <- select(mut,names(coefs)) |
|
|
76 |
## substract mean mutation rate of TRAINING SET !!!### |
|
|
77 |
new.x <- new.temp - rep(means.train[[x]][match(names(coefs),names(means.train[[x]]))], each = nrow(new.temp)) |
|
|
78 |
cal.risk.test <- drop(as.matrix(new.x) %*% coefs) |
|
|
79 |
}) |
|
|
80 |
all.pred <- unlist(all.pred) #do.call("cbind",all.pred) |
|
|
81 |
Risk <- mean(all.pred) #apply(all.pred,1,mean) |
|
|
82 |
ori.risk.range <- range(RiskScore) |
|
|
83 |
RiskScore <- rescale(Risk, to = c(0, 10), from = ori.risk.range) |
|
|
84 |
# RiskScore.new <- mean(LassoFits%*%t(mut)) |
|
|
85 |
# RiskScoreRange <- range(RiskScore) |
|
|
86 |
# RiskScore <- rescale(RiskScore.new, to = c(0, 10), from = RiskScoreRange) |
|
|
87 |
# RiskScore <- RiskScore[length(RiskScore)] |
|
|
88 |
### part 3 : Refit with the given refitted clinical variable |
|
|
89 |
clin <- as.data.frame(cbind(clin,RiskScore)) |
|
|
90 |
|
|
|
91 |
### FIRST RUN |
|
|
92 |
|
|
|
93 |
|
|
|
94 |
### GET CI ### |
|
|
95 |
survival.probs <- as.data.frame(matrix(nrow=(nrow(clin)+3),ncol=15)) |
|
|
96 |
rownames(survival.probs) <- c(rownames(clin),"Lower","Upper","Time") |
|
|
97 |
surv.temp <- survfit(ClinRefit, newdata = clin) |
|
|
98 |
for(i in 1:ncol(survival.probs)){ |
|
|
99 |
survival.probs[,i] <- c(as.numeric(summary(surv.temp, times = (i*3-3))$surv), |
|
|
100 |
round(summary(surv.temp, times = (i*3-3))$lower,digits=2), |
|
|
101 |
round(summary(surv.temp, times = (i*3-3))$upper,digits=2), |
|
|
102 |
i*3-3) |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
a <- list( |
|
|
106 |
autotick = FALSE, |
|
|
107 |
dtick = 6, |
|
|
108 |
tickcolor = toRGB("black") |
|
|
109 |
) |
|
|
110 |
|
|
|
111 |
t.survival.probs <- as.data.frame(t(survival.probs)) |
|
|
112 |
y <- list( |
|
|
113 |
title = "Survival Probability" |
|
|
114 |
) |
|
|
115 |
IndSurvKM <- plot_ly(t.survival.probs, x = ~Time, y = ~Patient, name = 'Estimated Survival', type = 'scatter', |
|
|
116 |
mode = 'lines+markers',hoverinfo="hovertext",#color = ~Patient |
|
|
117 |
hovertext = ~paste("Genetic Risk Score :",round(RiskScore,digits=3)) |
|
|
118 |
) %>% layout(yaxis = y,xaxis = ~a) %>% |
|
|
119 |
layout(shapes=list(type='line', x0= 3*MD, x1= 3*MD, y0=0, |
|
|
120 |
y1=1, line=list(dash='dot', width=1,color = "red")), |
|
|
121 |
xaxis = list(title = paste0("Time (",time.type,")"), showgrid = TRUE)) %>% |
|
|
122 |
add_ribbons(data = t.survival.probs, |
|
|
123 |
ymin = ~Lower, |
|
|
124 |
ymax = ~Upper, |
|
|
125 |
line = list(color = 'rgba(7, 164, 181, 0.05)'), |
|
|
126 |
fillcolor = 'rgba(7, 164, 181, 0.2)', |
|
|
127 |
name = "Confidence Interval") |
|
|
128 |
|
|
|
129 |
|
|
|
130 |
### MAKE TABLE SUMMARY |
|
|
131 |
survivalSummary <- as.data.frame(matrix(nrow=1,ncol=5)) |
|
|
132 |
rownames(survivalSummary) <- "New patient" |
|
|
133 |
colnames(survivalSummary) <- c("MedianOS","95%CI","1Ysurvival","3Ysurvival","Predicted genetic risk") |
|
|
134 |
# for each group find closest value to median |
|
|
135 |
if(time.type == "Months"){YR1 <- 1*12;YR3 <- 3*12} |
|
|
136 |
if(time.type == "Days"){YR1 <- 1*365;YR3 <- 3*365} |
|
|
137 |
Fit <- surv.temp |
|
|
138 |
#Fit <- survfit(ClinRefit, newdata = clin)#, se.fit = T, conf.int = "log-log") |
|
|
139 |
med.index <- which.min(abs(Fit$surv-0.5)) |
|
|
140 |
YR1.index <- which.min(abs(Fit$time-YR1)) |
|
|
141 |
YR3.index <- which.min(abs(Fit$time-YR3)) |
|
|
142 |
survivalSummary[1,] <- c(round(Fit$time[med.index],digits=2),paste0("(",round(Fit$time[which.min(abs(Fit$lower-0.5))],digits=2),",", |
|
|
143 |
round(Fit$time[which.min(abs(Fit$upper-0.5))],digits=2),")"), |
|
|
144 |
round(Fit$surv[YR1.index],digits=2),round(Fit$surv[YR3.index],digits=2), |
|
|
145 |
round(RiskScore,digits=3)) |
|
|
146 |
return(list("IndSurvKM"=IndSurvKM,"IndPredTable"=survivalSummary,"RiskScore"=RiskScore)) |
|
|
147 |
#} |
|
|
148 |
} |
|
|
149 |
|
|
|
150 |
|
|
|
151 |
|