[f2e496]: / R / fromDataInternal.R

Download this file

422 lines (335 with data), 15.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
#This file contains the private functions
#for the predict from data part of the package
#apart from those used within the simulate function
#see fromDataSimInternal.R. For functions called by
#AddTimeColumn see timeInternal.R
##'@include common.R simQOutput.R timeInternal.R
NULL
# Deal with subjects who have time = 0 or NA when creating
# the EventData object
#
# @param remove.0.time logical if TRUE then all subjects with time = NA or 0 are removed from the
# data set and not included in the object. If FALSE then they are included in the simulation
# (but not in the model fitting)
# @param subject.data A data frame to be included in the subject.data slot
# of the EventData object
# @return subject.data with appropriate subjects removed/time set to 0
warnNAtime <- function(remove.0.time,subject.data){
if(!is.logical(remove.0.time) || length(remove.0.time) > 1){
stop("Invalid remove.0.time argument")
}
index <- which(subject.data$time==0 | is.na(subject.data$time))
if(length(index)==0){
return(subject.data)
}
if(remove.0.time){
warning(paste("Subjects",paste(subject.data[index,]$subject,collapse=", "),"have time=NA or 0",
"and have been removed from the data set."))
subject.data <- subject.data[!1:nrow(subject.data) %in% index,]
}
else{
warning(paste("Subjects", paste(subject.data[index,]$subject, collapse=", "),
"have time=NA or 0, their time has been set to 0 and",
"they will not be used when fitting the model but will have event times simulated",
"when performing the predictions unless withdrawn is set to 1."))
subject.data$time[is.na(subject.data$time)] <- 0
}
subject.data
}
# Get the multiplicative factor to convert units to
# days for the KM-plot
# @param units "Days", "Months" or "Years"
# @param daysinyear The number of days in a year (e.g. 365 or 365.25)
# @return 1, daysinyear/12 or daysinyear respectively
GetScaleForKM <- function(units,daysinyear){
if(!units %in% c("Days","Months","Years")){
stop("Invalid units argument")
}
xscale <- 1
if(units == "Months") xscale <- daysinyear/12
if(units == "Years") xscale <- daysinyear
return(xscale)
}
# Function which inputs a vector of dates/strings
# and outputs a vector of dates
#
# If S3 dates are input they are returned unchanged.
# If character strings are input then they must all
# have the same format either "YYYY-MM-DD", "DD/MM/YYYY"
# or "DD Month YYYY"
#
# @param vec The vector of characters
# @return a vector of Dates.
# If conversion fails an error is thrown
FixDates <- function(vec){
if(all(is.na(vec))) return(as.Date(vec))
if(class(vec)=="Date") return(vec)
vec[vec==""] <- NA
formats <- c("%d/%m/%Y","%d %b %Y","%Y-%m-%d")
ans <- lapply(formats,function(x){
as.Date(vec,format=x)
})
which_date <- unlist(lapply(1:3,function(x){
sum(is.na(ans[[x]]))==sum(is.na(vec))
}))
if(sum(which_date)!= 1){
stop("Error reading date format it should be of the form
YYYY-MM-DD, DD/MM/YYYY or DD Month YYYY ")
}
ans <- ans[[which(which_date,TRUE)]]
years <- as.character((ans),format="%Y")
years <- years[!is.na(years)]
if(any(as.numeric(years) < 1000) ){
stop("All years must have 4 digits, e.g. 01/01/2015 not 01/01/15")
}
return(ans)
}
# Set the censored.at.follow.up column for EventData@@subject.data
# @param subject.data An EventData@@subject.data data frame without the
# censored.at.follow.up column
# @param followup The follow period (in days)
# @return subject.data with additional censored.at.follow.up column
# Warnings will be output if dates have to be changed due to invalid data
# for example if event occurs after followup period
IncludeFollowUp <- function(subject.data,followup){
subject.data$censored.at.follow.up <- as.numeric(subject.data$time > followup)
subject.data$time <-ifelse(subject.data$censored.at.follow.up==1,followup,subject.data$time)
resetToFollowUp <- function(data,idx,warning.msg){
if(length(idx)>0){
data$withdrawn[idx] <- 0
data$has.event[idx] <- 0
data$event.type[idx] <- NA
warning(paste("Subjects",paste(data[idx,"subject"],collapse=", "),warning.msg))
}
data
}
subject.data <- resetToFollowUp(data=subject.data,
idx=which(subject.data$withdrawn==1 & subject.data$censored.at.follow.up==1),
warning.msg="have withdrawn dates after followup period and so have been censored at followup")
resetToFollowUp(data=subject.data,
idx=which(subject.data$has.event==1 & subject.data$censored.at.follow.up==1),
warning.msg="had event after followup period and so have been censored at followup instead of having an event")
}
# Create an empty data frame for
# event.pred.data or time.pred.data
# slots of the \code{WeibullResults} class
# @return This empty data frame
EmptyPredictionDF <- function(){
data.frame(time=as.Date(character()),
event=numeric(),
CI_low=numeric(),
CI_high=numeric(),
daysatrisk=numeric())
}
# Create the text for summarizing a \code{FromDataResults object}
# @param object A \code{FromDataResults} object
# @param round.method See \code{eventPrediction:::myRound.Date}
# @param text.width numeric The width of the text to be returned. See \code{base::strwrap}
# @param show.predictions Logical if TRUE then include the time.pred.data and
# event.pred.data information in the text
# @param show.at.risk Output the median number of at risk years
# @return A string (which can be output using cat)
getFromDataResultsSummaryText <- function(object,round.method="None",text.width=60,show.predictions=TRUE,show.at.risk=TRUE){
daysinyear <- standarddaysinyear()
data <- object@event.data
indat <- data@subject.data
indat$event.date <- ifelse(indat$has.event, LastDate(indat),NA)
N.subjects <- length(indat$rand.date) #subjects in data set
title <- ""
if(N.subjects!=0){
last.date <- if(any(!is.na(indat$event.date))) as.Date(max(indat$event.date, na.rm=TRUE),origin="1970-01-01")
else "NA"
title <- paste0(N.subjects, " patients recruited where last patient in on ", max(indat$rand.date))
if(class(last.date)=="Date"){
title <- paste0(title, " and last event observed at ", last.date,
" (",sum( indat$has.event, na.rm=TRUE), " events). ")
}
else{
title <- paste0(title," and no events observed. ")
}
}
if(object@Naccrual > 0){
title <- paste0(title,
" Out of ", N.subjects+object@Naccrual, " patients ", object@Naccrual,
" were simulated using ", object@accrualGenerator@text," ")
}
title <- paste(title, "Using ", object@simParams@type ," survival model. ",sep="")
NA.note <- ""
if(nrow(object@event.pred.data)!=0 && show.predictions){
df <- object@event.pred.data
ans <- lapply(1:nrow(df),function(x){
target <- myRound.Date(c(df[x,"CI_low"],df[x,"time"],df[x,"CI_high"]),round.method)
retVal <- paste0("The time at which ", df[x,"event"]," events have occurred is predicted to be ",
target[2], " [", target[1], ", ", target[3], "]" )
if(show.at.risk){
retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk at this time",sep="")
}
paste(retVal,". ",sep="")
})
if(any(object@event.pred.data$time==as.Date(Inf,origin="1970-01-01"))){
NA.note <- paste("An expected time of NA implies that,",
"in fewer than 50% of simulations the given number of events occurred.")
}else if(any(object@event.pred.data$CI_high==as.Date(Inf,origin="1970-01-01"))){
NA.note <- paste("A CI value of NA implies that, due to subject dropout, ",
"in fewer than ",100*(1-object@limit),
"% of the simulations the given number of events occurred.", sep="")
}
title <- paste(title,paste(ans,collapse=""),sep="")
}
if(nrow(object@time.pred.data)!=0 && show.predictions){
df <- object@time.pred.data
ans <- lapply(1:nrow(df),function(x){
retVal <- paste0("On ", df[x,"time"]," the predicted number of events is ",
df[x,"event"], " [", df[x,"CI_low"], ", ", df[x,"CI_high"], "]" )
if(show.at.risk){
retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk",sep="")
}
paste(retVal,". ",sep="")
})
title <- paste(title,paste(ans,collapse=""),sep="")
}
title <- paste(title,NA.note,sep="")
return(AddLineBreaks(title,text.width=text.width))
}
# Round dates
#
# @param mydates a vector of 3 dates, the
# lower CI value, the median and the upper CI value
# @param round.method character, if "toMonths" then
# the following procedure is applied:
# lower CI: take the month of the date 15 days earlier \cr
# median: take the month of the given date \cr
# upper CI: take the month of the date 15 days later \cr
# otherwise the dates are unchanged
# @return The rounded dates
myRound.Date <- function(mydates,round.method){
if(round.method=="toMonths"){
c(format( mydates[1] - 15, format="%b %Y" ),
format( mydates[2], format="%b %Y"),
format( mydates[3] + 15, format="%b %Y" ))
}
else{
mydates
}
}
# Calculate the last date on study for subjects
#
# Code: \code{ifelse(time==0,rand.date,time+rand.date-1)}
#
# @param time A vector of times on study
# @param rand.date A vector of randomization dates
# @return A vector of dates
internal.Date <- function(time,rand.date){
as.Date(ifelse(time==0,rand.date,time+rand.date-1),origin="1970-01-01")
}
# Calculate the last date on study for subjects
#
# The last date is the date of censoring or date of withdrawal
# or date of event for subjects
# @param indat A data frame e.g. EventData@@subject.data
# @return A vector of dates
LastDate <- function(indat){
internal.Date(indat$time,indat$rand.date)
}
# Create a data frame which contains only subjects
# who have been censored before a given date
# @param data A data frame e.g. EventData@@subject.data
# @param censor.date The given censored cutoff date
# @return The subset of \code{data} which is censored (i.e. does not
# have an event and is not withdrawn) before the given date
GetLaggedSubjects <- function(data,censor.date){
data <- data[LastDate(data) < censor.date,]
data[data$has.event==0 & data$withdrawn==0 & data$censored.at.follow.up==0,]
}
# Function to derive the time on study for subjects given
# columns such as dth.date, event.date etc.
#
# See the vignette for the logic used in calculating the time.
# Note the changing of times so they are <= followup period is
# carried out in a subsequent function
#
# see timeInternal.R
#
# @param data A data frame
# @param rand.date string, the column name of randomization dates
# @param has.event string, the column name of whether a subject had an event
# (1) or not (0)
# @param withdrawn string, the column name of whether a subject has withdrawn
# (1) or not (0)
# @param subject, the column name of the subject ID
# @param time.list A list of column names used to calculate the time on study for example
# \code{time=list(last.date="lastDate",dth.date="dthDate", prog.date="progDate",
# withdrawn.date="withdrawnDate")}
# @return A vector containing time on study
AddTimeColumn <- function(data,rand.date,has.event,withdrawn,subject,time.list){
allowed.colnames <- c("dth.date","event.date","prog.date","withdrawn.date","last.date")
validate.time.list.arguments(data,rand.date,has.event,withdrawn,time.list,allowed.colnames)
#create the date objects
for(x in allowed.colnames){
if(!is.null(time.list[[x]])) data[,x] <- FixDates(data[,time.list[[x]]])
}
#check prog is <= dth if they exist
if("dth.date" %in% names(time.list) && "prog.date" %in% names(time.list)){
idx <- which(!is.na(data[,"dth.date"]) &
!is.na(data[,"prog.date"]) &
data[,"dth.date"] < data[,"prog.date"])
if(length(idx)>0){
warning(paste("Subjects",paste(data[idx,subject] ,collapse=", ") ,
"have progression date after death date. This is invalid and should be fixed"))
}
}
#This vector will be populated with subjects time on study and returned
ans <- rep(NA,nrow(data))
#first deal with subjects who are censored
are.censored <- which(!data[,has.event] & !data[,withdrawn])
ans <- Time.Deal.With.Censored(ans,data,rand.date,are.censored,time.list,allowed.colnames[1:4],subject)
#next deal with subjects who have event
subs.had.event <- which(data[,has.event]==1)
ans <- Time.Deal.With.Had.Event(ans,data,rand.date,subs.had.event,time.list,allowed.colnames[1:3],subject,has.event)
#finally subjects who have withdrawn
has.withdrawn <- which(data[,withdrawn]==1 & data[,has.event]==0)
ans <- Time.Deal.With.Withdrawn(ans,data,rand.date,has.withdrawn,time.list,withdrawn,subject,has.event)
#output warning if subject has withdranw date but have not been withdrawn
if(!is.null(time.list[["withdrawn.date"]])){
r <- which(any(!is.na(data[,"withdrawn.date"]) & data[,withdrawn]==0))
if(any(r)){
warning(paste("Subjects",paste(data[r,subject],collapse=", "), "have not withdrawn yet have a withdrawn date.",
"The withdrawn dates are ignored"))
}
}
#if we have not been able to calculate a time then output
#an error if had event or warning if censored/withdrawn
if(any(is.na(ans))){
r <- which(is.na(ans))
if(any(data[r,has.event]==1)){
s <- intersect(which(data[,has.event]==1), r)
stop(paste("Subjects",paste(data[s,subject],collapse=", "), "have an event but no time on study can be calculated.",
"Is there any missing data in your data set?"))
}
warning(paste("Subjects",paste(data[r,subject],collapse=", "),
"are censored/withdrawn subject(s) and no time on study can be calculated.",
"For these subjects the time on study is set to 0. Is there any missing data",
"in your data set?"))
}
return(as.numeric(ans))
}
# Maximum Likelihood estimate of uniform
# accrual parameter k
#
# @param B recruitment period
# @param s.i a vector of recruitment
# times (numeric, time since start of recruitment period)
# @return estimate of k = 1/(logB- mean(log(s.i)))
MLestimateK <- function(B,s.i){
if(any(s.i<=0) || B <= 0 || any(s.i > B)) stop("Invalid arguments in MLestimateK")
s <- mean(log(s.i))
return(1/(log(B)-s))
}
# Calculate the average recruitment rate (subjects/day)
# @param N number of subjects recruited
# @param first.date the date of the first recruitment (numeric)
# @param last.date the date of the last recruitment (numeric)
# @return The average recruitment
average.rec <- function(N,first.date,last.date){
N/(last.date-first.date+1)
}