Diff of /Scripts/PredictIncoming.R [000000] .. [c46e49]

Switch to unified view

a b/Scripts/PredictIncoming.R
1
############################################
2
### PREDICTING INCOMING PATIENT SURVIVAL ###
3
############################################
4
5
### Method
6
7
## INPUT:
8
# 1) Set of mutated genes : All genes not inputed in string will be considered as wild type (from App).
9
# 2) LASSO runs : All refited (forced beta parameters) cox models
10
# 3) Set of covariates : All covariates available can be edited and added to the model
11
# 4) The character vector list of genes available (return colnames of data in GetResults function)
12
# 5) Refitted model with clinical covariates
13
14
## METHOD :
15
# Iterate through all the runs, using the coefficients found in the LASSO models generate a predicted 
16
# risk score for the incoming patient. Use the predicted risk score in a refitted coxph model with the 
17
# given clinical covariates.
18
19
## OUTPUT :
20
# 1) Survival curve for the patients inputed
21
# 2) Summary Table
22
23
### TEST
24
mutGenes <- c("alk","STK11","TP53")
25
clinical <- c("Age")
26
ClinRefit <- FirstRun$ClinRefit
27
time.type <- "Months"
28
MD <- 12
29
LassoFits <- as.matrix(FirstRun$LassoFits)
30
RiskScore <- FirstRun$average.risk
31
means.train <- means.train
32
33
predictIncomingPatient <- function(mutGenes,clinical,ClinRefit,time.type,MD,LassoFits,RiskScore,means.train){
34
  
35
  #if(length(mutGenes) == 0){stop("INPUT ERROR : Enter at least one gene name")}
36
  #library(plotly)
37
  ### Step 1 : Build the dataset ###
38
  ## Gene mutation data
39
  # create frame 
40
  mut <- as.data.frame(matrix(0L,nrow=1,ncol=ncol(LassoFits)))
41
  colnames(mut) <- colnames(LassoFits)
42
  rownames(mut) <- c("Patient")
43
  
44
  if(length(mutGenes) > 0){
45
    mutGenes <- gsub(" ","",mutGenes)}
46
  
47
  # Check available genes
48
  if(anyNA(match(mutGenes,colnames(mut)))){stop(paste("The mutated gene you have inputed :",
49
                                                      mutGenes[which(is.na((match(mutGenes,colnames(mut)))))],
50
                                                      "is not available at the moment") )}
51
  mut[,match(mutGenes,colnames(mut))] <- 1
52
  
53
  ## Mutation data complete
54
  
55
  ## Clinical 
56
  ### NEED A WHOLE PART FROM THE APP HERE TRANSFORMING TO VALUES IN THE DATA ###
57
  
58
  ## Create frame
59
  avClinNames <- names(ClinRefit$coefficients)[-length(names(ClinRefit$coefficients))]
60
  clin <- as.data.frame(matrix(0L,nrow=1,ncol=length(avClinNames)))
61
  colnames(clin) <- avClinNames 
62
  rownames(clin) <- "Patient"
63
  
64
  # match input from the app
65
  clin[,match(clinical,colnames(clin))] <- 1
66
  
67
  ### Part 2 : Get the predicted risk score
68
  LassoFits <- as.matrix(LassoFits)
69
  mut <- as.vector(mut)
70
  
71
  all.pred <- lapply(1:nrow(LassoFits),function(x){
72
    
73
    ### Subset to the coefs of that cv ###
74
    coefs <- LassoFits[x,LassoFits[x,] != 0]
75
    new.temp <- select(mut,names(coefs))
76
    ## substract mean mutation rate of TRAINING SET !!!###
77
    new.x <- new.temp - rep(means.train[[x]][match(names(coefs),names(means.train[[x]]))], each = nrow(new.temp))
78
    cal.risk.test <- drop(as.matrix(new.x) %*% coefs)
79
  })
80
  all.pred <- unlist(all.pred) #do.call("cbind",all.pred)
81
  Risk <- mean(all.pred) #apply(all.pred,1,mean)
82
  ori.risk.range <- range(RiskScore)
83
  RiskScore <- rescale(Risk, to = c(0, 10), from = ori.risk.range)
84
  # RiskScore.new <- mean(LassoFits%*%t(mut)) 
85
  # RiskScoreRange <- range(RiskScore)
86
  # RiskScore <- rescale(RiskScore.new, to = c(0, 10), from = RiskScoreRange)
87
  # RiskScore <- RiskScore[length(RiskScore)]
88
  ### part 3 : Refit with the given refitted clinical variable
89
  clin <- as.data.frame(cbind(clin,RiskScore))
90
  
91
  ### FIRST RUN
92
  
93
  
94
  ### GET CI ###
95
  survival.probs <- as.data.frame(matrix(nrow=(nrow(clin)+3),ncol=15))
96
  rownames(survival.probs) <- c(rownames(clin),"Lower","Upper","Time")
97
  surv.temp <- survfit(ClinRefit, newdata = clin)
98
  for(i in 1:ncol(survival.probs)){
99
    survival.probs[,i] <- c(as.numeric(summary(surv.temp, times = (i*3-3))$surv),
100
                            round(summary(surv.temp, times = (i*3-3))$lower,digits=2),
101
                            round(summary(surv.temp, times = (i*3-3))$upper,digits=2),
102
                            i*3-3)
103
  }
104
  
105
  a <- list(
106
    autotick = FALSE,
107
    dtick = 6,
108
    tickcolor = toRGB("black")
109
  )
110
  
111
  t.survival.probs <- as.data.frame(t(survival.probs))
112
  y <- list(
113
    title = "Survival Probability"
114
  )
115
  IndSurvKM <- plot_ly(t.survival.probs, x = ~Time, y = ~Patient, name = 'Estimated Survival', type = 'scatter',
116
                       mode = 'lines+markers',hoverinfo="hovertext",#color = ~Patient
117
                       hovertext = ~paste("Genetic Risk Score :",round(RiskScore,digits=3))
118
  ) %>% layout(yaxis = y,xaxis = ~a) %>%
119
    layout(shapes=list(type='line', x0= 3*MD, x1= 3*MD, y0=0,
120
                       y1=1, line=list(dash='dot', width=1,color = "red")),
121
           xaxis = list(title = paste0("Time (",time.type,")"), showgrid = TRUE)) %>%
122
    add_ribbons(data = t.survival.probs,
123
                ymin = ~Lower,
124
                ymax = ~Upper,
125
                line = list(color = 'rgba(7, 164, 181, 0.05)'),
126
                fillcolor = 'rgba(7, 164, 181, 0.2)',
127
                name = "Confidence Interval")
128
  
129
  
130
  ### MAKE TABLE SUMMARY
131
  survivalSummary <- as.data.frame(matrix(nrow=1,ncol=5))
132
  rownames(survivalSummary) <- "New patient"
133
  colnames(survivalSummary) <- c("MedianOS","95%CI","1Ysurvival","3Ysurvival","Predicted genetic risk")
134
  # for each group find closest value to median 
135
  if(time.type == "Months"){YR1 <- 1*12;YR3 <- 3*12}
136
  if(time.type == "Days"){YR1 <- 1*365;YR3 <- 3*365}
137
  Fit <- surv.temp 
138
  #Fit <- survfit(ClinRefit, newdata = clin)#, se.fit = T, conf.int = "log-log")
139
  med.index <- which.min(abs(Fit$surv-0.5))
140
  YR1.index <- which.min(abs(Fit$time-YR1))
141
  YR3.index <- which.min(abs(Fit$time-YR3))
142
  survivalSummary[1,] <- c(round(Fit$time[med.index],digits=2),paste0("(",round(Fit$time[which.min(abs(Fit$lower-0.5))],digits=2),",",
143
                                                                      round(Fit$time[which.min(abs(Fit$upper-0.5))],digits=2),")"),
144
                           round(Fit$surv[YR1.index],digits=2),round(Fit$surv[YR3.index],digits=2),
145
                           round(RiskScore,digits=3))
146
  return(list("IndSurvKM"=IndSurvKM,"IndPredTable"=survivalSummary,"RiskScore"=RiskScore))
147
  #}
148
}
149
150
151