|
a |
|
b/function/cross_validation.R |
|
|
1 |
## cross validation |
|
|
2 |
|
|
|
3 |
runCV = function(i,Tstart,predict_times,data,id){ |
|
|
4 |
library(dplyr) |
|
|
5 |
trainingData <- data[which(data$PatientID %in% id[i,1]),] |
|
|
6 |
testingData <- data[which(data$PatientID %in% id[-i,1]),] |
|
|
7 |
trainingData.id <- trainingData[!duplicated(trainingData$PatientID), ] |
|
|
8 |
testingData.id <- testingData[!duplicated(testingData$PatientID), ] |
|
|
9 |
CV_results <- function(trainingData, testingData.id, predict_times){ |
|
|
10 |
test <- try({ |
|
|
11 |
################## |
|
|
12 |
# Joint Modeling # |
|
|
13 |
################## |
|
|
14 |
library("JMbayes") |
|
|
15 |
library("splines") |
|
|
16 |
library("survminer") |
|
|
17 |
library(survival) |
|
|
18 |
library(dplyr) |
|
|
19 |
lmeFit1 <- lme( logAF ~ ns(TestDate,2),data=trainingData,random = ~ ns(TestDate,2) | PatientID, |
|
|
20 |
control = lmeControl(opt = "optim",msMaxIter =10000)) |
|
|
21 |
coxFit1 <- coxph(Surv(DFS,DFS_status)~T_stage+TP53,data=trainingData.id,x = TRUE) |
|
|
22 |
|
|
|
23 |
# origin |
|
|
24 |
jointFit1 = try(jointModelBayes(lmeFit1, coxFit1, timeVar = "TestDate",n.iter = 20000L),TRUE) |
|
|
25 |
|
|
|
26 |
# value + slope |
|
|
27 |
dForm <- list(fixed = ~ 0 + dns(TestDate, 2), random = ~ 0 + dns(TestDate, 2), |
|
|
28 |
indFixed = 2:3, indRandom = 2:3) |
|
|
29 |
jointFit2 <- try(jointModelBayes(lmeFit1, coxFit1, timeVar = "TestDate", |
|
|
30 |
param = "td-both", extraForm = dForm),T) |
|
|
31 |
# value + cumulative |
|
|
32 |
iForm <- list(fixed = ~ 0 + TestDate + ins(TestDate, 2), random = ~ 0 + TestDate + ins(TestDate, 2), |
|
|
33 |
indFixed = 1:3, indRandom = 1:3) |
|
|
34 |
|
|
|
35 |
jointFit3 <-try(jointModelBayes(lmeFit1, coxFit1, timeVar = "TestDate", |
|
|
36 |
param = "td-extra", extraForm = iForm) ,T) |
|
|
37 |
###################################### |
|
|
38 |
JM_Models <- list("JM_value" = jointFit1, |
|
|
39 |
"JM_value-slope" = jointFit2, |
|
|
40 |
"JM_cumulative" = jointFit3 |
|
|
41 |
) |
|
|
42 |
combos <- expand.grid("model_name" = names(JM_Models), "time" = predict_times) |
|
|
43 |
## test |
|
|
44 |
# AUC |
|
|
45 |
# time = 12 |
|
|
46 |
# jm = jointFit3 |
|
|
47 |
auc_fun <- function (time) { |
|
|
48 |
auc_objs <- mapply(function(jm,newdata=testingData,Tstart=Tstart,Thoriz =time){ |
|
|
49 |
res = try(aucJM(jm,newdata=testingData,Tstart=Tstart,Thoriz =time,idVar ="PatientID" ,simulate=T,M=100),T) |
|
|
50 |
if (class(res)=="try-error") res = list('auc'=NA) |
|
|
51 |
return(res) |
|
|
52 |
}, JM_Models, |
|
|
53 |
MoreArgs = list(newdata=testingData,Tstart=Tstart,Thoriz =time), |
|
|
54 |
SIMPLIFY = FALSE |
|
|
55 |
) |
|
|
56 |
sapply(auc_objs, "[[", "auc") |
|
|
57 |
} |
|
|
58 |
JM_AUCs <- sapply(predict_times, auc_fun) |
|
|
59 |
# PE |
|
|
60 |
pe_fun <- function (time) { |
|
|
61 |
pe_objs <- mapply(function(jm,newdata=testingData,Tstart=Tstart,Thoriz =time){ |
|
|
62 |
res = try(prederrJM(jm,newdata=testingData,Tstart=Tstart,Thoriz =time,idVar ="PatientID", lossFun = "square" ,simulate=T,M=100),T) |
|
|
63 |
if (class(res)=="try-error") res = list('prederr'=NA) |
|
|
64 |
return(res) |
|
|
65 |
}, JM_Models, MoreArgs = list(newdata=testingData,Tstart=Tstart,Thoriz =time), SIMPLIFY = FALSE) |
|
|
66 |
sapply(pe_objs, "[[", "prederr") |
|
|
67 |
} |
|
|
68 |
JM_PEs <- sapply(predict_times, pe_fun) |
|
|
69 |
## training |
|
|
70 |
auc_fun_tr <- function (time) { |
|
|
71 |
auc_objs <- mapply(function(jm,newdata=trainingData,Tstart=Tstart,Thoriz =time){ |
|
|
72 |
res = try(aucJM(jm,newdata=trainingData,Tstart=Tstart,Thoriz =time,idVar ="PatientID" ,simulate=T,M=100),T) |
|
|
73 |
if (class(res)=="try-error") res = list('auc'=NA) |
|
|
74 |
return(res) |
|
|
75 |
}, JM_Models, |
|
|
76 |
MoreArgs = list(newdata=trainingData,Tstart=Tstart,Thoriz =time), SIMPLIFY = FALSE) |
|
|
77 |
sapply(auc_objs, "[[", "auc") |
|
|
78 |
} |
|
|
79 |
JM_AUCs_tr <- sapply(predict_times, auc_fun_tr) |
|
|
80 |
# PE |
|
|
81 |
pe_fun_tr <- function (time) { |
|
|
82 |
pe_objs <- mapply(function(jm,newdata=trainingData,Tstart=Tstart,Thoriz =time){ |
|
|
83 |
res = try(prederrJM(jm,newdata=trainingData,Tstart=Tstart,Thoriz =time,idVar ="PatientID", lossFun = "square" ,simulate=T,M=100),T) |
|
|
84 |
if (class(res)=="try-error") res = list('prederr'=NA) |
|
|
85 |
return(res) |
|
|
86 |
}, JM_Models, MoreArgs = list(newdata=trainingData,Tstart=Tstart,Thoriz =time), SIMPLIFY = FALSE) |
|
|
87 |
sapply(pe_objs, "[[", "prederr") |
|
|
88 |
} |
|
|
89 |
JM_PEs_tr <- sapply(predict_times, pe_fun_tr) |
|
|
90 |
############### |
|
|
91 |
# cox # |
|
|
92 |
############### |
|
|
93 |
dataLM <- JMbayes:::dataLM |
|
|
94 |
LM_models_fun <- function () { |
|
|
95 |
# Landmark data sets |
|
|
96 |
D1 = dataLM(trainingData, Tstart, idVar ="PatientID" ,respVar = "Test_status", timeVar = "TestDate", |
|
|
97 |
evTimeVar = "DFS") |
|
|
98 |
CoxLM1 <- coxph(Surv(DFS, DFS_status) ~ Test_status+T_stage+TP53, |
|
|
99 |
data = D1) |
|
|
100 |
CoxLM2 <- coxph(Surv(DFS, DFS_status) ~ P2+T_stage+TP53, |
|
|
101 |
data = D1) |
|
|
102 |
list("LM_bi" = CoxLM1, |
|
|
103 |
LM_P2 = CoxLM2 |
|
|
104 |
) |
|
|
105 |
} |
|
|
106 |
###################################################################################### |
|
|
107 |
# Calculate Performance Measures |
|
|
108 |
|
|
|
109 |
## test |
|
|
110 |
# AUC |
|
|
111 |
LM_models <- LM_models_fun() |
|
|
112 |
|
|
|
113 |
auc_fun <- function (time) { |
|
|
114 |
auc_objs <- mapply(function(jm,newdata0,Tstart0,Thoriz0){ |
|
|
115 |
if ("P2" %in% names(jm$assign)){ |
|
|
116 |
newdata0 = subset(newdata0,!is.na(P2)) |
|
|
117 |
} |
|
|
118 |
res = try(aucJM(jm,newdata=newdata0,Tstart=Tstart0,Thoriz =Thoriz0, |
|
|
119 |
idVar ="PatientID" ,respVar = "Test_status", |
|
|
120 |
timeVar = "TestDate",evTimeVar = "DFS"),TRUE) |
|
|
121 |
if (class(res)=="try-error"){res=list('auc'=NA)} |
|
|
122 |
return(res) |
|
|
123 |
}, |
|
|
124 |
LM_models, |
|
|
125 |
MoreArgs = list(newdata0=testingData,Tstart0=Tstart,Thoriz0 =time), SIMPLIFY = FALSE) |
|
|
126 |
sapply(auc_objs, "[[", "auc") |
|
|
127 |
} |
|
|
128 |
LM_AUCs <- sapply(predict_times, auc_fun) |
|
|
129 |
# PE |
|
|
130 |
pe_fun <- function (time) { |
|
|
131 |
pe_objs <- mapply(function(jm,newdata0,Tstart0,Thoriz0){ |
|
|
132 |
if ("P2" %in% names(jm$assign)){ |
|
|
133 |
newdata0 = subset(newdata0,!is.na(P2)) |
|
|
134 |
} |
|
|
135 |
res = try(prederrJM(jm,newdata=newdata0,Tstart=Tstart0,Thoriz =Thoriz0, |
|
|
136 |
idVar ="PatientID" ,respVar = "Test_status", |
|
|
137 |
timeVar = "TestDate",evTimeVar = "DFS", |
|
|
138 |
lossFun = "square"),TRUE) |
|
|
139 |
if (class(res)=="try-error"){res=list('prederr'=NA)} |
|
|
140 |
return(res) |
|
|
141 |
}, |
|
|
142 |
LM_models, |
|
|
143 |
MoreArgs = list(newdata0=testingData,Tstart0=Tstart,Thoriz0 =time), SIMPLIFY = FALSE) |
|
|
144 |
sapply(pe_objs, "[[", "prederr") |
|
|
145 |
} |
|
|
146 |
LM_PEs <- sapply(predict_times, pe_fun) |
|
|
147 |
|
|
|
148 |
## training |
|
|
149 |
# AUC |
|
|
150 |
auc_fun_tr <- function (time) { |
|
|
151 |
auc_objs <- mapply(function(jm,newdata0,Tstart0,Thoriz0){ |
|
|
152 |
if ("P2" %in% names(jm$assign)){ |
|
|
153 |
newdata0 = subset(newdata0,!is.na(P2)) |
|
|
154 |
} |
|
|
155 |
res = try(aucJM(jm,newdata=newdata0,Tstart=Tstart0,Thoriz =Thoriz0, |
|
|
156 |
idVar ="PatientID" ,respVar = "Test_status", |
|
|
157 |
timeVar = "TestDate",evTimeVar = "DFS"),TRUE) |
|
|
158 |
if (class(res)=="try-error"){res=list('auc'=NA)} |
|
|
159 |
return(res) |
|
|
160 |
}, |
|
|
161 |
LM_models, |
|
|
162 |
MoreArgs = list(newdata0=trainingData,Tstart0=Tstart,Thoriz0 =time), SIMPLIFY = FALSE) |
|
|
163 |
sapply(auc_objs, "[[", "auc") |
|
|
164 |
} |
|
|
165 |
LM_AUCs_tr <- sapply(predict_times, auc_fun_tr) |
|
|
166 |
# PE |
|
|
167 |
pe_fun_tr <- function (time) { |
|
|
168 |
pe_objs <- mapply(function(jm,newdata0,Tstart0,Thoriz0){ |
|
|
169 |
if ("P2" %in% names(jm$assign)){ |
|
|
170 |
newdata0 = subset(newdata0,!is.na(P2)) |
|
|
171 |
} |
|
|
172 |
res = try(prederrJM(jm,newdata=newdata0,Tstart=Tstart0,Thoriz =Thoriz0, |
|
|
173 |
idVar ="PatientID" ,respVar = "Test_status", |
|
|
174 |
timeVar = "TestDate",evTimeVar = "DFS", |
|
|
175 |
lossFun = "square"),TRUE) |
|
|
176 |
if (class(res)=="try-error"){res=list('prederr'=NA)} |
|
|
177 |
return(res) |
|
|
178 |
}, |
|
|
179 |
LM_models, |
|
|
180 |
MoreArgs = list(newdata0=trainingData,Tstart0=Tstart,Thoriz0 =time), SIMPLIFY = FALSE) |
|
|
181 |
sapply(pe_objs, "[[", "prederr") |
|
|
182 |
} |
|
|
183 |
LM_PEs_tr <- sapply(predict_times, pe_fun_tr) |
|
|
184 |
###################################################################################### |
|
|
185 |
|
|
|
186 |
# Collect results |
|
|
187 |
colnames(JM_AUCs) = colnames(JM_PEs) = colnames(LM_AUCs) = colnames(LM_PEs) = predict_times |
|
|
188 |
colnames(JM_AUCs_tr) = colnames(JM_PEs_tr) = colnames(LM_AUCs_tr) = colnames(LM_PEs_tr) = predict_times |
|
|
189 |
|
|
|
190 |
JM = rbind(as.data.frame(JM_AUCs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="testing"), |
|
|
191 |
as.data.frame(JM_PEs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="testing"), |
|
|
192 |
as.data.frame(JM_AUCs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="training"), |
|
|
193 |
as.data.frame(JM_PEs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="training") |
|
|
194 |
) |
|
|
195 |
cox = rbind(as.data.frame(LM_AUCs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="testing"), |
|
|
196 |
as.data.frame(LM_PEs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="testing"), |
|
|
197 |
as.data.frame(LM_AUCs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="training"), |
|
|
198 |
as.data.frame(LM_PEs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="training") |
|
|
199 |
) |
|
|
200 |
results_accuracy = rbind(JM%>%mutate(model1="JM"),cox%>%mutate(model1="cox")) |
|
|
201 |
results_accuracy |
|
|
202 |
},TRUE) |
|
|
203 |
if (!inherits(test, "try-error")) { |
|
|
204 |
return(test) |
|
|
205 |
} else { |
|
|
206 |
JM_AUCs = matrix(nrow = 3,ncol=length(predict_times), |
|
|
207 |
dimnames = list(c("JM_value","JM_value-slope","JM_cumulative"),predict_times)) |
|
|
208 |
JM_PEs = matrix(nrow = 3,ncol=length(predict_times), |
|
|
209 |
dimnames = list(c("JM_value","JM_value-slope","JM_cumulative"),predict_times)) |
|
|
210 |
LM_AUCs = matrix(nrow = 2,ncol=length(predict_times), |
|
|
211 |
dimnames = list(c("LM_bi","LM_P2"),predict_times)) |
|
|
212 |
LM_PEs = matrix(nrow = 2,ncol=length(predict_times), |
|
|
213 |
dimnames = list(c("LM_bi","LM_P2"),predict_times)) |
|
|
214 |
JM_AUCs_tr = matrix(nrow = 3,ncol=length(predict_times), |
|
|
215 |
dimnames = list(c("JM_value","JM_value-slope","JM_cumulative"),predict_times)) |
|
|
216 |
JM_PEs_tr = matrix(nrow = 3,ncol=length(predict_times), |
|
|
217 |
dimnames = list(c("JM_value","JM_value-slope","JM_cumulative"),predict_times)) |
|
|
218 |
LM_AUCs_tr = matrix(nrow = 2,ncol=length(predict_times), |
|
|
219 |
dimnames = list(c("LM_bi","LM_P2"),predict_times)) |
|
|
220 |
LM_PEs_tr = matrix(nrow = 2,ncol=length(predict_times), |
|
|
221 |
dimnames = list(c("LM_bi","LM_P2"),predict_times)) |
|
|
222 |
JM = rbind(as.data.frame(JM_AUCs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="testing"), |
|
|
223 |
as.data.frame(JM_PEs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="testing"), |
|
|
224 |
as.data.frame(JM_AUCs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="training"), |
|
|
225 |
as.data.frame(JM_PEs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="training") |
|
|
226 |
) |
|
|
227 |
cox = rbind(as.data.frame(LM_AUCs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="testing"), |
|
|
228 |
as.data.frame(LM_PEs)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="testing"), |
|
|
229 |
as.data.frame(LM_AUCs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="AUC",type="training"), |
|
|
230 |
as.data.frame(LM_PEs_tr)%>%tibble::rownames_to_column(var="model2")%>%mutate(variable="PE",type="training") |
|
|
231 |
) |
|
|
232 |
results_accuracy = rbind(JM%>%mutate(model1="JM"),cox%>%mutate(model1="cox")) |
|
|
233 |
results_accuracy |
|
|
234 |
} |
|
|
235 |
} |
|
|
236 |
CV_results(trainingData,testingData,predict_times) |
|
|
237 |
} |
|
|
238 |
|
|
|
239 |
|
|
|
240 |
##### test ######### |
|
|
241 |
# i= splits$Fold1.Rep02 |
|
|
242 |
# data = mydata |
|
|
243 |
# id = mydata.id |
|
|
244 |
# runCV(i,Tstart,predict_times,mydata,mydata.id) |
|
|
245 |
|