|
a |
|
b/R/fromDataInternal.R |
|
|
1 |
#This file contains the private functions |
|
|
2 |
#for the predict from data part of the package |
|
|
3 |
#apart from those used within the simulate function |
|
|
4 |
#see fromDataSimInternal.R. For functions called by |
|
|
5 |
#AddTimeColumn see timeInternal.R |
|
|
6 |
|
|
|
7 |
##'@include common.R simQOutput.R timeInternal.R |
|
|
8 |
NULL |
|
|
9 |
|
|
|
10 |
|
|
|
11 |
# Deal with subjects who have time = 0 or NA when creating |
|
|
12 |
# the EventData object |
|
|
13 |
# |
|
|
14 |
# @param remove.0.time logical if TRUE then all subjects with time = NA or 0 are removed from the |
|
|
15 |
# data set and not included in the object. If FALSE then they are included in the simulation |
|
|
16 |
# (but not in the model fitting) |
|
|
17 |
# @param subject.data A data frame to be included in the subject.data slot |
|
|
18 |
# of the EventData object |
|
|
19 |
# @return subject.data with appropriate subjects removed/time set to 0 |
|
|
20 |
warnNAtime <- function(remove.0.time,subject.data){ |
|
|
21 |
|
|
|
22 |
if(!is.logical(remove.0.time) || length(remove.0.time) > 1){ |
|
|
23 |
stop("Invalid remove.0.time argument") |
|
|
24 |
} |
|
|
25 |
|
|
|
26 |
index <- which(subject.data$time==0 | is.na(subject.data$time)) |
|
|
27 |
if(length(index)==0){ |
|
|
28 |
return(subject.data) |
|
|
29 |
} |
|
|
30 |
|
|
|
31 |
|
|
|
32 |
if(remove.0.time){ |
|
|
33 |
warning(paste("Subjects",paste(subject.data[index,]$subject,collapse=", "),"have time=NA or 0", |
|
|
34 |
"and have been removed from the data set.")) |
|
|
35 |
subject.data <- subject.data[!1:nrow(subject.data) %in% index,] |
|
|
36 |
} |
|
|
37 |
else{ |
|
|
38 |
warning(paste("Subjects", paste(subject.data[index,]$subject, collapse=", "), |
|
|
39 |
"have time=NA or 0, their time has been set to 0 and", |
|
|
40 |
"they will not be used when fitting the model but will have event times simulated", |
|
|
41 |
"when performing the predictions unless withdrawn is set to 1.")) |
|
|
42 |
subject.data$time[is.na(subject.data$time)] <- 0 |
|
|
43 |
} |
|
|
44 |
|
|
|
45 |
subject.data |
|
|
46 |
} |
|
|
47 |
|
|
|
48 |
# Get the multiplicative factor to convert units to |
|
|
49 |
# days for the KM-plot |
|
|
50 |
# @param units "Days", "Months" or "Years" |
|
|
51 |
# @param daysinyear The number of days in a year (e.g. 365 or 365.25) |
|
|
52 |
# @return 1, daysinyear/12 or daysinyear respectively |
|
|
53 |
GetScaleForKM <- function(units,daysinyear){ |
|
|
54 |
if(!units %in% c("Days","Months","Years")){ |
|
|
55 |
stop("Invalid units argument") |
|
|
56 |
} |
|
|
57 |
|
|
|
58 |
xscale <- 1 |
|
|
59 |
if(units == "Months") xscale <- daysinyear/12 |
|
|
60 |
if(units == "Years") xscale <- daysinyear |
|
|
61 |
|
|
|
62 |
return(xscale) |
|
|
63 |
} |
|
|
64 |
|
|
|
65 |
# Function which inputs a vector of dates/strings |
|
|
66 |
# and outputs a vector of dates |
|
|
67 |
# |
|
|
68 |
# If S3 dates are input they are returned unchanged. |
|
|
69 |
# If character strings are input then they must all |
|
|
70 |
# have the same format either "YYYY-MM-DD", "DD/MM/YYYY" |
|
|
71 |
# or "DD Month YYYY" |
|
|
72 |
# |
|
|
73 |
# @param vec The vector of characters |
|
|
74 |
# @return a vector of Dates. |
|
|
75 |
# If conversion fails an error is thrown |
|
|
76 |
FixDates <- function(vec){ |
|
|
77 |
|
|
|
78 |
if(all(is.na(vec))) return(as.Date(vec)) |
|
|
79 |
if(class(vec)=="Date") return(vec) |
|
|
80 |
|
|
|
81 |
vec[vec==""] <- NA |
|
|
82 |
formats <- c("%d/%m/%Y","%d %b %Y","%Y-%m-%d") |
|
|
83 |
|
|
|
84 |
ans <- lapply(formats,function(x){ |
|
|
85 |
as.Date(vec,format=x) |
|
|
86 |
}) |
|
|
87 |
|
|
|
88 |
which_date <- unlist(lapply(1:3,function(x){ |
|
|
89 |
sum(is.na(ans[[x]]))==sum(is.na(vec)) |
|
|
90 |
})) |
|
|
91 |
|
|
|
92 |
if(sum(which_date)!= 1){ |
|
|
93 |
stop("Error reading date format it should be of the form |
|
|
94 |
YYYY-MM-DD, DD/MM/YYYY or DD Month YYYY ") |
|
|
95 |
} |
|
|
96 |
|
|
|
97 |
ans <- ans[[which(which_date,TRUE)]] |
|
|
98 |
years <- as.character((ans),format="%Y") |
|
|
99 |
years <- years[!is.na(years)] |
|
|
100 |
|
|
|
101 |
if(any(as.numeric(years) < 1000) ){ |
|
|
102 |
stop("All years must have 4 digits, e.g. 01/01/2015 not 01/01/15") |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
return(ans) |
|
|
106 |
} |
|
|
107 |
|
|
|
108 |
|
|
|
109 |
# Set the censored.at.follow.up column for EventData@@subject.data |
|
|
110 |
# @param subject.data An EventData@@subject.data data frame without the |
|
|
111 |
# censored.at.follow.up column |
|
|
112 |
# @param followup The follow period (in days) |
|
|
113 |
# @return subject.data with additional censored.at.follow.up column |
|
|
114 |
# Warnings will be output if dates have to be changed due to invalid data |
|
|
115 |
# for example if event occurs after followup period |
|
|
116 |
IncludeFollowUp <- function(subject.data,followup){ |
|
|
117 |
|
|
|
118 |
subject.data$censored.at.follow.up <- as.numeric(subject.data$time > followup) |
|
|
119 |
subject.data$time <-ifelse(subject.data$censored.at.follow.up==1,followup,subject.data$time) |
|
|
120 |
|
|
|
121 |
|
|
|
122 |
resetToFollowUp <- function(data,idx,warning.msg){ |
|
|
123 |
if(length(idx)>0){ |
|
|
124 |
data$withdrawn[idx] <- 0 |
|
|
125 |
data$has.event[idx] <- 0 |
|
|
126 |
data$event.type[idx] <- NA |
|
|
127 |
warning(paste("Subjects",paste(data[idx,"subject"],collapse=", "),warning.msg)) |
|
|
128 |
} |
|
|
129 |
data |
|
|
130 |
} |
|
|
131 |
|
|
|
132 |
subject.data <- resetToFollowUp(data=subject.data, |
|
|
133 |
idx=which(subject.data$withdrawn==1 & subject.data$censored.at.follow.up==1), |
|
|
134 |
warning.msg="have withdrawn dates after followup period and so have been censored at followup") |
|
|
135 |
|
|
|
136 |
resetToFollowUp(data=subject.data, |
|
|
137 |
idx=which(subject.data$has.event==1 & subject.data$censored.at.follow.up==1), |
|
|
138 |
warning.msg="had event after followup period and so have been censored at followup instead of having an event") |
|
|
139 |
|
|
|
140 |
} |
|
|
141 |
|
|
|
142 |
|
|
|
143 |
# Create an empty data frame for |
|
|
144 |
# event.pred.data or time.pred.data |
|
|
145 |
# slots of the \code{WeibullResults} class |
|
|
146 |
# @return This empty data frame |
|
|
147 |
EmptyPredictionDF <- function(){ |
|
|
148 |
|
|
|
149 |
data.frame(time=as.Date(character()), |
|
|
150 |
event=numeric(), |
|
|
151 |
CI_low=numeric(), |
|
|
152 |
CI_high=numeric(), |
|
|
153 |
daysatrisk=numeric()) |
|
|
154 |
} |
|
|
155 |
|
|
|
156 |
|
|
|
157 |
# Create the text for summarizing a \code{FromDataResults object} |
|
|
158 |
# @param object A \code{FromDataResults} object |
|
|
159 |
# @param round.method See \code{eventPrediction:::myRound.Date} |
|
|
160 |
# @param text.width numeric The width of the text to be returned. See \code{base::strwrap} |
|
|
161 |
# @param show.predictions Logical if TRUE then include the time.pred.data and |
|
|
162 |
# event.pred.data information in the text |
|
|
163 |
# @param show.at.risk Output the median number of at risk years |
|
|
164 |
# @return A string (which can be output using cat) |
|
|
165 |
getFromDataResultsSummaryText <- function(object,round.method="None",text.width=60,show.predictions=TRUE,show.at.risk=TRUE){ |
|
|
166 |
|
|
|
167 |
daysinyear <- standarddaysinyear() |
|
|
168 |
|
|
|
169 |
data <- object@event.data |
|
|
170 |
indat <- data@subject.data |
|
|
171 |
indat$event.date <- ifelse(indat$has.event, LastDate(indat),NA) |
|
|
172 |
|
|
|
173 |
N.subjects <- length(indat$rand.date) #subjects in data set |
|
|
174 |
|
|
|
175 |
title <- "" |
|
|
176 |
if(N.subjects!=0){ |
|
|
177 |
last.date <- if(any(!is.na(indat$event.date))) as.Date(max(indat$event.date, na.rm=TRUE),origin="1970-01-01") |
|
|
178 |
else "NA" |
|
|
179 |
title <- paste0(N.subjects, " patients recruited where last patient in on ", max(indat$rand.date)) |
|
|
180 |
|
|
|
181 |
if(class(last.date)=="Date"){ |
|
|
182 |
title <- paste0(title, " and last event observed at ", last.date, |
|
|
183 |
" (",sum( indat$has.event, na.rm=TRUE), " events). ") |
|
|
184 |
} |
|
|
185 |
else{ |
|
|
186 |
title <- paste0(title," and no events observed. ") |
|
|
187 |
} |
|
|
188 |
} |
|
|
189 |
|
|
|
190 |
|
|
|
191 |
if(object@Naccrual > 0){ |
|
|
192 |
title <- paste0(title, |
|
|
193 |
" Out of ", N.subjects+object@Naccrual, " patients ", object@Naccrual, |
|
|
194 |
" were simulated using ", object@accrualGenerator@text," ") |
|
|
195 |
} |
|
|
196 |
|
|
|
197 |
|
|
|
198 |
title <- paste(title, "Using ", object@simParams@type ," survival model. ",sep="") |
|
|
199 |
|
|
|
200 |
NA.note <- "" |
|
|
201 |
|
|
|
202 |
if(nrow(object@event.pred.data)!=0 && show.predictions){ |
|
|
203 |
df <- object@event.pred.data |
|
|
204 |
ans <- lapply(1:nrow(df),function(x){ |
|
|
205 |
target <- myRound.Date(c(df[x,"CI_low"],df[x,"time"],df[x,"CI_high"]),round.method) |
|
|
206 |
|
|
|
207 |
retVal <- paste0("The time at which ", df[x,"event"]," events have occurred is predicted to be ", |
|
|
208 |
target[2], " [", target[1], ", ", target[3], "]" ) |
|
|
209 |
|
|
|
210 |
if(show.at.risk){ |
|
|
211 |
retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk at this time",sep="") |
|
|
212 |
} |
|
|
213 |
paste(retVal,". ",sep="") |
|
|
214 |
}) |
|
|
215 |
|
|
|
216 |
if(any(object@event.pred.data$time==as.Date(Inf,origin="1970-01-01"))){ |
|
|
217 |
NA.note <- paste("An expected time of NA implies that,", |
|
|
218 |
"in fewer than 50% of simulations the given number of events occurred.") |
|
|
219 |
}else if(any(object@event.pred.data$CI_high==as.Date(Inf,origin="1970-01-01"))){ |
|
|
220 |
NA.note <- paste("A CI value of NA implies that, due to subject dropout, ", |
|
|
221 |
"in fewer than ",100*(1-object@limit), |
|
|
222 |
"% of the simulations the given number of events occurred.", sep="") |
|
|
223 |
} |
|
|
224 |
|
|
|
225 |
title <- paste(title,paste(ans,collapse=""),sep="") |
|
|
226 |
} |
|
|
227 |
|
|
|
228 |
if(nrow(object@time.pred.data)!=0 && show.predictions){ |
|
|
229 |
df <- object@time.pred.data |
|
|
230 |
ans <- lapply(1:nrow(df),function(x){ |
|
|
231 |
retVal <- paste0("On ", df[x,"time"]," the predicted number of events is ", |
|
|
232 |
df[x,"event"], " [", df[x,"CI_low"], ", ", df[x,"CI_high"], "]" ) |
|
|
233 |
|
|
|
234 |
if(show.at.risk){ |
|
|
235 |
retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk",sep="") |
|
|
236 |
} |
|
|
237 |
paste(retVal,". ",sep="") |
|
|
238 |
}) |
|
|
239 |
|
|
|
240 |
title <- paste(title,paste(ans,collapse=""),sep="") |
|
|
241 |
} |
|
|
242 |
|
|
|
243 |
title <- paste(title,NA.note,sep="") |
|
|
244 |
|
|
|
245 |
return(AddLineBreaks(title,text.width=text.width)) |
|
|
246 |
} |
|
|
247 |
|
|
|
248 |
|
|
|
249 |
# Round dates |
|
|
250 |
# |
|
|
251 |
# @param mydates a vector of 3 dates, the |
|
|
252 |
# lower CI value, the median and the upper CI value |
|
|
253 |
# @param round.method character, if "toMonths" then |
|
|
254 |
# the following procedure is applied: |
|
|
255 |
# lower CI: take the month of the date 15 days earlier \cr |
|
|
256 |
# median: take the month of the given date \cr |
|
|
257 |
# upper CI: take the month of the date 15 days later \cr |
|
|
258 |
# otherwise the dates are unchanged |
|
|
259 |
# @return The rounded dates |
|
|
260 |
myRound.Date <- function(mydates,round.method){ |
|
|
261 |
|
|
|
262 |
if(round.method=="toMonths"){ |
|
|
263 |
c(format( mydates[1] - 15, format="%b %Y" ), |
|
|
264 |
format( mydates[2], format="%b %Y"), |
|
|
265 |
format( mydates[3] + 15, format="%b %Y" )) |
|
|
266 |
} |
|
|
267 |
else{ |
|
|
268 |
mydates |
|
|
269 |
} |
|
|
270 |
} |
|
|
271 |
|
|
|
272 |
|
|
|
273 |
# Calculate the last date on study for subjects |
|
|
274 |
# |
|
|
275 |
# Code: \code{ifelse(time==0,rand.date,time+rand.date-1)} |
|
|
276 |
# |
|
|
277 |
# @param time A vector of times on study |
|
|
278 |
# @param rand.date A vector of randomization dates |
|
|
279 |
# @return A vector of dates |
|
|
280 |
internal.Date <- function(time,rand.date){ |
|
|
281 |
as.Date(ifelse(time==0,rand.date,time+rand.date-1),origin="1970-01-01") |
|
|
282 |
} |
|
|
283 |
|
|
|
284 |
# Calculate the last date on study for subjects |
|
|
285 |
# |
|
|
286 |
# The last date is the date of censoring or date of withdrawal |
|
|
287 |
# or date of event for subjects |
|
|
288 |
# @param indat A data frame e.g. EventData@@subject.data |
|
|
289 |
# @return A vector of dates |
|
|
290 |
LastDate <- function(indat){ |
|
|
291 |
internal.Date(indat$time,indat$rand.date) |
|
|
292 |
} |
|
|
293 |
|
|
|
294 |
|
|
|
295 |
|
|
|
296 |
# Create a data frame which contains only subjects |
|
|
297 |
# who have been censored before a given date |
|
|
298 |
# @param data A data frame e.g. EventData@@subject.data |
|
|
299 |
# @param censor.date The given censored cutoff date |
|
|
300 |
# @return The subset of \code{data} which is censored (i.e. does not |
|
|
301 |
# have an event and is not withdrawn) before the given date |
|
|
302 |
GetLaggedSubjects <- function(data,censor.date){ |
|
|
303 |
data <- data[LastDate(data) < censor.date,] |
|
|
304 |
data[data$has.event==0 & data$withdrawn==0 & data$censored.at.follow.up==0,] |
|
|
305 |
} |
|
|
306 |
|
|
|
307 |
|
|
|
308 |
|
|
|
309 |
# Function to derive the time on study for subjects given |
|
|
310 |
# columns such as dth.date, event.date etc. |
|
|
311 |
# |
|
|
312 |
# See the vignette for the logic used in calculating the time. |
|
|
313 |
# Note the changing of times so they are <= followup period is |
|
|
314 |
# carried out in a subsequent function |
|
|
315 |
# |
|
|
316 |
# see timeInternal.R |
|
|
317 |
# |
|
|
318 |
# @param data A data frame |
|
|
319 |
# @param rand.date string, the column name of randomization dates |
|
|
320 |
# @param has.event string, the column name of whether a subject had an event |
|
|
321 |
# (1) or not (0) |
|
|
322 |
# @param withdrawn string, the column name of whether a subject has withdrawn |
|
|
323 |
# (1) or not (0) |
|
|
324 |
# @param subject, the column name of the subject ID |
|
|
325 |
# @param time.list A list of column names used to calculate the time on study for example |
|
|
326 |
# \code{time=list(last.date="lastDate",dth.date="dthDate", prog.date="progDate", |
|
|
327 |
# withdrawn.date="withdrawnDate")} |
|
|
328 |
# @return A vector containing time on study |
|
|
329 |
AddTimeColumn <- function(data,rand.date,has.event,withdrawn,subject,time.list){ |
|
|
330 |
|
|
|
331 |
allowed.colnames <- c("dth.date","event.date","prog.date","withdrawn.date","last.date") |
|
|
332 |
validate.time.list.arguments(data,rand.date,has.event,withdrawn,time.list,allowed.colnames) |
|
|
333 |
|
|
|
334 |
#create the date objects |
|
|
335 |
for(x in allowed.colnames){ |
|
|
336 |
if(!is.null(time.list[[x]])) data[,x] <- FixDates(data[,time.list[[x]]]) |
|
|
337 |
} |
|
|
338 |
|
|
|
339 |
#check prog is <= dth if they exist |
|
|
340 |
if("dth.date" %in% names(time.list) && "prog.date" %in% names(time.list)){ |
|
|
341 |
idx <- which(!is.na(data[,"dth.date"]) & |
|
|
342 |
!is.na(data[,"prog.date"]) & |
|
|
343 |
data[,"dth.date"] < data[,"prog.date"]) |
|
|
344 |
if(length(idx)>0){ |
|
|
345 |
warning(paste("Subjects",paste(data[idx,subject] ,collapse=", ") , |
|
|
346 |
"have progression date after death date. This is invalid and should be fixed")) |
|
|
347 |
} |
|
|
348 |
} |
|
|
349 |
|
|
|
350 |
#This vector will be populated with subjects time on study and returned |
|
|
351 |
ans <- rep(NA,nrow(data)) |
|
|
352 |
|
|
|
353 |
#first deal with subjects who are censored |
|
|
354 |
are.censored <- which(!data[,has.event] & !data[,withdrawn]) |
|
|
355 |
ans <- Time.Deal.With.Censored(ans,data,rand.date,are.censored,time.list,allowed.colnames[1:4],subject) |
|
|
356 |
|
|
|
357 |
|
|
|
358 |
#next deal with subjects who have event |
|
|
359 |
subs.had.event <- which(data[,has.event]==1) |
|
|
360 |
ans <- Time.Deal.With.Had.Event(ans,data,rand.date,subs.had.event,time.list,allowed.colnames[1:3],subject,has.event) |
|
|
361 |
|
|
|
362 |
#finally subjects who have withdrawn |
|
|
363 |
has.withdrawn <- which(data[,withdrawn]==1 & data[,has.event]==0) |
|
|
364 |
ans <- Time.Deal.With.Withdrawn(ans,data,rand.date,has.withdrawn,time.list,withdrawn,subject,has.event) |
|
|
365 |
|
|
|
366 |
|
|
|
367 |
#output warning if subject has withdranw date but have not been withdrawn |
|
|
368 |
if(!is.null(time.list[["withdrawn.date"]])){ |
|
|
369 |
r <- which(any(!is.na(data[,"withdrawn.date"]) & data[,withdrawn]==0)) |
|
|
370 |
if(any(r)){ |
|
|
371 |
warning(paste("Subjects",paste(data[r,subject],collapse=", "), "have not withdrawn yet have a withdrawn date.", |
|
|
372 |
"The withdrawn dates are ignored")) |
|
|
373 |
} |
|
|
374 |
} |
|
|
375 |
|
|
|
376 |
#if we have not been able to calculate a time then output |
|
|
377 |
#an error if had event or warning if censored/withdrawn |
|
|
378 |
if(any(is.na(ans))){ |
|
|
379 |
r <- which(is.na(ans)) |
|
|
380 |
if(any(data[r,has.event]==1)){ |
|
|
381 |
s <- intersect(which(data[,has.event]==1), r) |
|
|
382 |
stop(paste("Subjects",paste(data[s,subject],collapse=", "), "have an event but no time on study can be calculated.", |
|
|
383 |
"Is there any missing data in your data set?")) |
|
|
384 |
} |
|
|
385 |
|
|
|
386 |
warning(paste("Subjects",paste(data[r,subject],collapse=", "), |
|
|
387 |
"are censored/withdrawn subject(s) and no time on study can be calculated.", |
|
|
388 |
"For these subjects the time on study is set to 0. Is there any missing data", |
|
|
389 |
"in your data set?")) |
|
|
390 |
} |
|
|
391 |
|
|
|
392 |
return(as.numeric(ans)) |
|
|
393 |
|
|
|
394 |
} |
|
|
395 |
|
|
|
396 |
|
|
|
397 |
# Maximum Likelihood estimate of uniform |
|
|
398 |
# accrual parameter k |
|
|
399 |
# |
|
|
400 |
# @param B recruitment period |
|
|
401 |
# @param s.i a vector of recruitment |
|
|
402 |
# times (numeric, time since start of recruitment period) |
|
|
403 |
# @return estimate of k = 1/(logB- mean(log(s.i))) |
|
|
404 |
MLestimateK <- function(B,s.i){ |
|
|
405 |
if(any(s.i<=0) || B <= 0 || any(s.i > B)) stop("Invalid arguments in MLestimateK") |
|
|
406 |
|
|
|
407 |
s <- mean(log(s.i)) |
|
|
408 |
return(1/(log(B)-s)) |
|
|
409 |
} |
|
|
410 |
|
|
|
411 |
|
|
|
412 |
# Calculate the average recruitment rate (subjects/day) |
|
|
413 |
# @param N number of subjects recruited |
|
|
414 |
# @param first.date the date of the first recruitment (numeric) |
|
|
415 |
# @param last.date the date of the last recruitment (numeric) |
|
|
416 |
# @return The average recruitment |
|
|
417 |
average.rec <- function(N,first.date,last.date){ |
|
|
418 |
N/(last.date-first.date+1) |
|
|
419 |
} |
|
|
420 |
|
|
|
421 |
|