a b/R/fromDataInternal.R
1
#This file contains the private functions
2
#for the predict from data part of the package
3
#apart from those used within the simulate function
4
#see fromDataSimInternal.R. For functions called by 
5
#AddTimeColumn see timeInternal.R
6
7
##'@include common.R simQOutput.R timeInternal.R
8
NULL
9
10
11
# Deal with subjects who have time = 0 or NA when creating 
12
# the EventData object
13
# 
14
# @param remove.0.time logical if TRUE then all subjects with time = NA or 0 are removed from the
15
# data set and not included in the object. If FALSE then they are included in the simulation 
16
# (but not in the model fitting)
17
# @param subject.data A data frame to be included in the subject.data slot 
18
# of the EventData object
19
# @return subject.data with appropriate subjects removed/time set to 0
20
warnNAtime <- function(remove.0.time,subject.data){
21
  
22
  if(!is.logical(remove.0.time) || length(remove.0.time) > 1){
23
    stop("Invalid remove.0.time argument")
24
  }
25
  
26
  index <- which(subject.data$time==0 | is.na(subject.data$time))
27
  if(length(index)==0){
28
    return(subject.data)
29
  }
30
  
31
  
32
  if(remove.0.time){
33
    warning(paste("Subjects",paste(subject.data[index,]$subject,collapse=", "),"have time=NA or 0",
34
                  "and have been removed from the data set."))
35
    subject.data <- subject.data[!1:nrow(subject.data) %in% index,] 
36
  }
37
  else{
38
    warning(paste("Subjects", paste(subject.data[index,]$subject, collapse=", "),
39
                  "have time=NA or 0, their time has been set to 0 and",
40
                  "they will not be used when fitting the model but will have event times simulated",
41
                  "when performing the predictions unless withdrawn is set to 1."))
42
    subject.data$time[is.na(subject.data$time)] <- 0
43
  }
44
  
45
  subject.data
46
}
47
48
# Get the multiplicative factor to convert units to
49
# days for the KM-plot
50
# @param units "Days", "Months" or "Years"
51
# @param daysinyear The number of days in a year (e.g. 365 or 365.25)
52
# @return 1, daysinyear/12 or daysinyear respectively
53
GetScaleForKM <- function(units,daysinyear){
54
  if(!units %in% c("Days","Months","Years")){
55
    stop("Invalid units argument")
56
  }
57
  
58
  xscale <- 1
59
  if(units == "Months") xscale <- daysinyear/12
60
  if(units == "Years") xscale <- daysinyear
61
  
62
  return(xscale)
63
}
64
65
# Function which inputs a vector of dates/strings
66
# and outputs a vector of dates
67
# 
68
# If S3 dates are input they are returned unchanged.
69
# If character strings are input then they must all
70
# have the same format either "YYYY-MM-DD", "DD/MM/YYYY"
71
# or "DD Month YYYY"
72
# 
73
# @param vec The vector of characters
74
# @return a vector of Dates.
75
# If conversion fails an error is thrown
76
FixDates <- function(vec){
77
     
78
   if(all(is.na(vec))) return(as.Date(vec))
79
   if(class(vec)=="Date") return(vec)
80
   
81
   vec[vec==""] <- NA
82
   formats <- c("%d/%m/%Y","%d %b %Y","%Y-%m-%d")
83
       
84
   ans <- lapply(formats,function(x){
85
     as.Date(vec,format=x)
86
   })
87
   
88
   which_date <- unlist(lapply(1:3,function(x){
89
     sum(is.na(ans[[x]]))==sum(is.na(vec))
90
   }))
91
   
92
   if(sum(which_date)!= 1){
93
     stop("Error reading date format it should be of the form
94
        YYYY-MM-DD, DD/MM/YYYY or DD Month YYYY ") 
95
   }
96
   
97
  ans <- ans[[which(which_date,TRUE)]]
98
  years <- as.character((ans),format="%Y") 
99
  years <- years[!is.na(years)]
100
   
101
  if(any(as.numeric(years) < 1000) ){
102
    stop("All years must have 4 digits, e.g. 01/01/2015 not 01/01/15")
103
  } 
104
   
105
  return(ans) 
106
}
107
108
109
# Set the censored.at.follow.up column for EventData@@subject.data
110
# @param subject.data An EventData@@subject.data data frame without the
111
# censored.at.follow.up column
112
# @param followup The follow period (in days)
113
# @return subject.data with additional censored.at.follow.up column
114
# Warnings will be output if dates have to be changed due to invalid data 
115
# for example if event occurs after followup period 
116
IncludeFollowUp <- function(subject.data,followup){
117
  
118
  subject.data$censored.at.follow.up <- as.numeric(subject.data$time > followup)
119
  subject.data$time <-ifelse(subject.data$censored.at.follow.up==1,followup,subject.data$time)
120
  
121
  
122
  resetToFollowUp <- function(data,idx,warning.msg){
123
    if(length(idx)>0){
124
      data$withdrawn[idx] <- 0
125
      data$has.event[idx] <- 0
126
      data$event.type[idx] <- NA
127
      warning(paste("Subjects",paste(data[idx,"subject"],collapse=", "),warning.msg))
128
    }
129
    data
130
  }
131
  
132
  subject.data <- resetToFollowUp(data=subject.data,
133
                   idx=which(subject.data$withdrawn==1 & subject.data$censored.at.follow.up==1),
134
                   warning.msg="have withdrawn dates after followup period and so have been censored at followup")
135
  
136
  resetToFollowUp(data=subject.data,
137
                 idx=which(subject.data$has.event==1 & subject.data$censored.at.follow.up==1),
138
                 warning.msg="had event after followup period and so have been censored at followup instead of having an event")
139
140
}
141
142
143
# Create an empty data frame for 
144
# event.pred.data or time.pred.data
145
# slots of the \code{WeibullResults} class
146
# @return This empty data frame
147
EmptyPredictionDF <- function(){
148
  
149
  data.frame(time=as.Date(character()),
150
                 event=numeric(),
151
                 CI_low=numeric(),
152
                 CI_high=numeric(),
153
                 daysatrisk=numeric())
154
}
155
156
157
# Create the text for summarizing a \code{FromDataResults object}
158
# @param object A \code{FromDataResults} object
159
# @param round.method See \code{eventPrediction:::myRound.Date}
160
# @param text.width numeric The width of the text to be returned. See \code{base::strwrap}
161
# @param show.predictions Logical if TRUE then include the time.pred.data and
162
# event.pred.data information in the text 
163
# @param show.at.risk Output the median number of at risk years
164
# @return A string (which can be output using cat)
165
getFromDataResultsSummaryText <- function(object,round.method="None",text.width=60,show.predictions=TRUE,show.at.risk=TRUE){
166
  
167
  daysinyear <- standarddaysinyear()
168
  
169
  data <- object@event.data
170
  indat <- data@subject.data
171
  indat$event.date <- ifelse(indat$has.event, LastDate(indat),NA)
172
  
173
  N.subjects <- length(indat$rand.date) #subjects in data set
174
  
175
  title <- ""
176
  if(N.subjects!=0){
177
    last.date <- if(any(!is.na(indat$event.date))) as.Date(max(indat$event.date, na.rm=TRUE),origin="1970-01-01")
178
                 else "NA"
179
    title <- paste0(N.subjects, " patients recruited where last patient in on ", max(indat$rand.date))
180
    
181
    if(class(last.date)=="Date"){
182
      title <- paste0(title, " and last event observed at ", last.date,
183
              " (",sum( indat$has.event, na.rm=TRUE), " events). ")
184
    }
185
    else{
186
      title <- paste0(title," and no events observed. ")
187
    }
188
  }
189
    
190
    
191
  if(object@Naccrual > 0){
192
      title <- paste0(title, 
193
                      " Out of ", N.subjects+object@Naccrual, " patients ",  object@Naccrual, 
194
                      " were simulated using ", object@accrualGenerator@text," ")
195
  }
196
  
197
  
198
  title <- paste(title, "Using ", object@simParams@type ," survival model. ",sep="")
199
  
200
  NA.note <- ""
201
  
202
  if(nrow(object@event.pred.data)!=0 && show.predictions){
203
    df <- object@event.pred.data 
204
    ans <- lapply(1:nrow(df),function(x){
205
      target <- myRound.Date(c(df[x,"CI_low"],df[x,"time"],df[x,"CI_high"]),round.method)
206
      
207
      retVal <- paste0("The time at which ", df[x,"event"]," events have occurred is predicted to be ",
208
             target[2], " [", target[1], ", ", target[3], "]" )
209
    
210
      if(show.at.risk){
211
        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk at this time",sep="")
212
      }  
213
      paste(retVal,". ",sep="")
214
    }) 
215
  
216
    if(any(object@event.pred.data$time==as.Date(Inf,origin="1970-01-01"))){
217
      NA.note <- paste("An expected time of NA implies that,",
218
                       "in fewer than 50% of simulations the given number of events occurred.")
219
    }else if(any(object@event.pred.data$CI_high==as.Date(Inf,origin="1970-01-01"))){
220
      NA.note <- paste("A CI value of NA implies that, due to subject dropout, ",
221
                       "in fewer than ",100*(1-object@limit),
222
                        "% of the simulations the given number of events occurred.", sep="")
223
    }
224
    
225
    title <- paste(title,paste(ans,collapse=""),sep="")
226
  }
227
  
228
  if(nrow(object@time.pred.data)!=0 && show.predictions){
229
    df <- object@time.pred.data 
230
    ans <- lapply(1:nrow(df),function(x){
231
      retVal <- paste0("On ", df[x,"time"]," the predicted number of events is ",
232
             df[x,"event"], " [", df[x,"CI_low"], ", ", df[x,"CI_high"], "]" )
233
    
234
      if(show.at.risk){
235
        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk",sep="")
236
      }  
237
      paste(retVal,". ",sep="")  
238
    }) 
239
    
240
    title <- paste(title,paste(ans,collapse=""),sep="")
241
  }
242
  
243
  title <- paste(title,NA.note,sep="")
244
  
245
  return(AddLineBreaks(title,text.width=text.width))
246
}
247
248
249
# Round dates
250
# 
251
# @param mydates a vector of 3 dates, the
252
# lower CI value, the median and the upper CI value
253
# @param round.method character, if "toMonths" then 
254
# the following procedure is applied:
255
# lower CI: take the month of the date 15 days earlier \cr
256
# median: take the month of the given date \cr
257
# upper CI: take the month of the date 15 days later \cr 
258
# otherwise the dates are unchanged
259
# @return The rounded dates
260
myRound.Date <- function(mydates,round.method){
261
 
262
  if(round.method=="toMonths"){
263
     c(format( mydates[1] - 15, format="%b %Y" ),
264
       format( mydates[2], format="%b %Y"),
265
       format( mydates[3] + 15, format="%b %Y" ))
266
  }
267
  else{
268
    mydates
269
  }
270
}
271
272
273
# Calculate the last date on study for subjects
274
# 
275
# Code: \code{ifelse(time==0,rand.date,time+rand.date-1)}
276
# 
277
# @param time A vector of times on study
278
# @param rand.date A vector of randomization dates
279
# @return A vector of dates
280
internal.Date <- function(time,rand.date){
281
   as.Date(ifelse(time==0,rand.date,time+rand.date-1),origin="1970-01-01")
282
}
283
284
# Calculate the last date on study for subjects
285
#
286
# The last date is the date of censoring or date of withdrawal
287
# or date of event for subjects
288
# @param indat  A data frame e.g. EventData@@subject.data
289
# @return A vector of dates
290
LastDate <- function(indat){
291
  internal.Date(indat$time,indat$rand.date)
292
}
293
294
295
296
# Create a data frame which contains only subjects
297
# who have been censored before a given date
298
# @param data A data frame e.g. EventData@@subject.data
299
# @param censor.date The given censored cutoff date
300
# @return The subset of \code{data} which is censored (i.e. does not
301
# have an event and is not withdrawn) before the given date
302
GetLaggedSubjects <- function(data,censor.date){
303
  data <- data[LastDate(data) < censor.date,]
304
  data[data$has.event==0 & data$withdrawn==0 & data$censored.at.follow.up==0,]
305
}
306
307
308
309
# Function to derive the time on study for subjects given 
310
# columns such as dth.date, event.date etc.
311
# 
312
# See the vignette for the logic used in calculating the time. 
313
# Note the changing of times so they are <= followup period is
314
# carried out in a subsequent function
315
# 
316
# see timeInternal.R
317
#
318
# @param data A data frame 
319
# @param rand.date string, the column name of randomization dates
320
# @param has.event string, the column name of whether a subject had an event
321
# (1) or not (0) 
322
# @param withdrawn string, the column name of whether a subject has withdrawn
323
# (1) or not (0)  
324
# @param subject, the column name of the subject ID
325
# @param time.list A list of column names used to calculate the time on study for example
326
# \code{time=list(last.date="lastDate",dth.date="dthDate", prog.date="progDate",
327
# withdrawn.date="withdrawnDate")}
328
# @return A vector containing time on study
329
AddTimeColumn <- function(data,rand.date,has.event,withdrawn,subject,time.list){
330
  
331
  allowed.colnames <- c("dth.date","event.date","prog.date","withdrawn.date","last.date")
332
  validate.time.list.arguments(data,rand.date,has.event,withdrawn,time.list,allowed.colnames)
333
  
334
  #create the date objects
335
  for(x in allowed.colnames){
336
    if(!is.null(time.list[[x]])) data[,x] <- FixDates(data[,time.list[[x]]]) 
337
  }
338
  
339
  #check prog is <= dth if they exist
340
  if("dth.date" %in% names(time.list) && "prog.date" %in% names(time.list)){
341
    idx <- which(!is.na(data[,"dth.date"]) & 
342
                 !is.na(data[,"prog.date"]) & 
343
                   data[,"dth.date"] < data[,"prog.date"])
344
    if(length(idx)>0){
345
      warning(paste("Subjects",paste(data[idx,subject] ,collapse=", ")  ,
346
                    "have progression date after death date. This is invalid and should be fixed"))
347
    }
348
  }
349
  
350
  #This vector will be populated with subjects time on study and returned 
351
  ans <- rep(NA,nrow(data))
352
  
353
  #first deal with subjects who are censored
354
  are.censored <- which(!data[,has.event] & !data[,withdrawn])
355
  ans <- Time.Deal.With.Censored(ans,data,rand.date,are.censored,time.list,allowed.colnames[1:4],subject)
356
  
357
  
358
  #next deal with subjects who have event
359
  subs.had.event <- which(data[,has.event]==1)
360
  ans <- Time.Deal.With.Had.Event(ans,data,rand.date,subs.had.event,time.list,allowed.colnames[1:3],subject,has.event)
361
  
362
  #finally subjects who have withdrawn
363
  has.withdrawn <- which(data[,withdrawn]==1 & data[,has.event]==0)
364
  ans <- Time.Deal.With.Withdrawn(ans,data,rand.date,has.withdrawn,time.list,withdrawn,subject,has.event)
365
  
366
    
367
  #output warning if subject has withdranw date but have not been withdrawn
368
  if(!is.null(time.list[["withdrawn.date"]])){
369
    r <- which(any(!is.na(data[,"withdrawn.date"]) & data[,withdrawn]==0))
370
    if(any(r)){
371
      warning(paste("Subjects",paste(data[r,subject],collapse=", "), "have not withdrawn yet have a withdrawn date.",
372
              "The withdrawn dates are ignored"))
373
    }
374
  }
375
376
  #if we have not been able to calculate a time then output
377
  #an error if had event or warning if censored/withdrawn
378
  if(any(is.na(ans))){
379
    r <- which(is.na(ans))
380
    if(any(data[r,has.event]==1)){
381
      s <- intersect(which(data[,has.event]==1), r)
382
      stop(paste("Subjects",paste(data[s,subject],collapse=", "), "have an event but no time on study can be calculated.",
383
                 "Is there any missing data in your data set?"))
384
    }
385
    
386
    warning(paste("Subjects",paste(data[r,subject],collapse=", "),
387
                  "are censored/withdrawn subject(s) and no time on study can be calculated.",
388
                  "For these subjects the time on study is set to 0. Is there any missing data",
389
                  "in your data set?"))
390
  }  
391
  
392
  return(as.numeric(ans))
393
      
394
} 
395
396
397
# Maximum Likelihood estimate of uniform
398
# accrual parameter k
399
# 
400
# @param B recruitment period
401
# @param s.i a vector of recruitment
402
# times (numeric, time since start of recruitment period)
403
# @return estimate of k = 1/(logB- mean(log(s.i)))
404
MLestimateK <- function(B,s.i){
405
  if(any(s.i<=0) || B <= 0 || any(s.i > B)) stop("Invalid arguments in MLestimateK")
406
  
407
  s <- mean(log(s.i))
408
  return(1/(log(B)-s))
409
}
410
411
412
# Calculate the average recruitment rate (subjects/day)
413
# @param N number of subjects recruited
414
# @param first.date the date of the first recruitment (numeric)
415
# @param last.date the date of the last recruitment (numeric)
416
# @return The average recruitment
417
average.rec <- function(N,first.date,last.date){ 
418
  N/(last.date-first.date+1)
419
}
420
421