Diff of /R/eventData.R [000000] .. [f2e496]

Switch to unified view

a b/R/eventData.R
1
# This file contains the public functions associated with eventData object
2
# in the predict from data part of the package. Note the estimateAccrualParameter
3
# function can be found in the accrual.R file.
4
5
##' @include fromDataSimParam.R eventPrediction_package.R
6
NULL
7
8
##' @import survival methods
9
NULL
10
11
##' @importFrom  mvtnorm rmvnorm
12
NULL
13
14
##' @importFrom  scales date_format
15
NULL
16
17
# Function that will check the validity of EventData object
18
# when constructed
19
# @param object EventData object
20
checkEventData <- function(object){
21
  errors <- ""
22
  
23
  required.colnames <- c("has.event", "rand.date", "withdrawn", "subject", "time","censored.at.follow.up")
24
  
25
  data <- object@subject.data
26
  
27
  if(nrow(data)>0){
28
  
29
    if(any(!data$has.event %in% c(0,1))) errors <- paste(errors,"has.event must be 1 or 0" )
30
    if(any(!data$withdrawn %in% c(0,1))) errors <- paste(errors,"withdrawn must be 1 or 0" )
31
    if(any(!data$censored.at.follow.up %in% c(0,1))) errors <- paste(errors,"censored.at.follow.up must be 1 or 0" )
32
33
    if(any(!required.colnames %in% colnames(data))) errors <- paste(errors,"invalid column names")
34
  
35
    
36
    if(class(data$rand.date)!="Date") errors <- paste(errors,"Invalid rand.date")
37
    if(class(data$time)!="numeric") {
38
      errors <- paste(errors,"Times must be numeric")
39
    }
40
    else{
41
      if(all(data$time==0)) errors <- paste(errors,"time value incorrect all are 0!")  
42
      if(any(data$time < 0 | is.na(data$time))) {
43
        errors <- paste(errors,"subjects cannot have non-positive time on trial. Please check subjects ",
44
                      paste(data[data$time <= 0 | is.na(data$time),]$subject,collapse=", "))
45
      
46
      }
47
    }
48
  
49
    if(any(duplicated(data$subject))) errors <- paste(errors,"subject ID must be unique")
50
  
51
    if(all(data$has.event==0)){
52
      warning("No events have occurred - a model cannot be fit")
53
    }
54
    if(all(data$has.event==1)) errors <- paste(errors,"all events have occurred!")
55
  
56
    idx <- which(data$has.event==1& data$withdrawn==1)
57
    if(length(idx)){
58
      errors <- paste(errors,"subjects cannot be both withdrawn and have an event. Please check subjects ",
59
                    paste(data$subject[idx],collapse=", "))
60
    }
61
  
62
  }
63
  
64
  if(!is.numeric(object@followup)||length(object@followup)!= 1|| object@followup <= 0){
65
    errors <- paste(errors,"Invalid followup argument")
66
  }
67
  
68
  if(errors == "") TRUE else errors
69
}
70
  
71
##' Class representing data to use for new predictions
72
##' @slot subject.data a data frame with 6 columns
73
##' "subject", "rand.date", "has.event", "withdrawn", "censored.at.follow.up" and "time" and "site" and "event.type"
74
##' see vignette and \code{EventData} for further details. 
75
##' @slot followup A numeric value for the fixed follow up period a subject
76
##' is followed. This is in days. If there is no fixed followup period then 
77
##' Inf should be used
78
##' @export
79
setClass("EventData", 
80
          slots= list(subject.data = "data.frame",
81
                      followup="numeric"),
82
          validity = checkEventData)
83
84
85
##' Constructor for event data object
86
##' 
87
##' All dates must be in one of the following formats:
88
##' YYYY-MM-DD, DD/MM/YY or DD Month YYYY
89
##'  
90
##' 
91
##' @param data A data frame 
92
##' @param subject string, the column name of subject identifiers
93
##' @param rand.date string, the column name of randomization dates
94
##' @param has.event string, the column name of whether a subject had an event
95
##' (1) or not (0) 
96
##' @param withdrawn string, the column name of whether a subject has withdrawn
97
##' (1) or not (0)  
98
##' @param time Either a string, the column name of time each subject has been on the study
99
##' or a list with elements with (some of) the following named elements.
100
##' \code{last.date}, \code{event.date}, \code{prog.date}, \code{dth.date} and \code{withdrawn.date}
101
##' In this case the package will attempt to derive the time column 
102
##' See the vignette and then eventPrediction:::AddTimeColumn for further details
103
##' @param site optional column for subject site
104
##' @param event.type optional column for the event type (e.g. unstable angina) if not included then `Had Event' will be used 
105
##' @param remove.0.time logical, if TRUE then all subjects with time = NA or 0 are removed from the
106
##' data set and not included in the object. If FALSE then they are included in the simulation (but not in the model fitting)
107
##' @param followup A numeric value for the fixed follow up period a subject
108
##' is followed. This is in days. If there is no fixed followup period then 
109
##' Inf should be used
110
##' @return An \code{EventData} object
111
##' @export
112
EventData <- function(data,subject,rand.date,has.event,withdrawn,time,site=NULL,event.type=NULL,remove.0.time=FALSE,followup=Inf){
113
  
114
  #set time argument
115
  arg <- if(!is.list(time))  time else NULL
116
   
117
  #check columns are in data frame 
118
  for(x in c(subject,rand.date,has.event,withdrawn,arg)){
119
    if(!is.null(x) && !x %in% colnames(data)){
120
      stop(paste("Column name",x,"not found in data frame"))
121
    }
122
  }
123
  
124
  if(!is.numeric(followup) || length(followup)> 1 || followup <= 0){
125
    stop("Invalid followup argument")
126
  }
127
  
128
  #validate the input columns, deriving time from other columns if needed
129
  data[,rand.date] <- if(nrow(data)>0) FixDates(data[,rand.date]) else data[,rand.date]
130
  time <- if(!is.list(time))  data[,time] else AddTimeColumn(data,rand.date,has.event,withdrawn,subject,time)
131
  site <- if(is.null(site)) rep(NA,nrow(data)) else data[,site] 
132
  
133
  if(is.null(event.type)) 
134
    event.type <- ifelse(data[,has.event],"Has Event",factor(NA))
135
  else{
136
    event.type <- factor(data[,event.type])
137
    if("" %in% levels(event.type)){
138
      event.type[event.type==""] <- factor(NA)
139
      event.type <- droplevels(event.type)
140
    }
141
  }
142
  
143
  #unvalidated data frame
144
  subject.data <- data.frame(
145
    subject=data[,subject],
146
    rand.date=data[,rand.date],
147
    time=time,
148
    has.event=data[,has.event],
149
    withdrawn=data[,withdrawn],
150
    site=site,
151
    event.type=event.type
152
  )
153
  
154
     
155
  keep <- which(as.character(subject.data$subject) > "" & !is.na(subject.data$rand.date))
156
  if(length(keep) != nrow(subject.data)){
157
    warning(paste("Subjects",paste(subject.data[setdiff(1:nrow(subject.data),keep),subject],collapse=", "),
158
                  "have been removed due to invalid rand.date or missing subject ID."))
159
  }
160
  
161
  subject.data <- subject.data[keep,]
162
  
163
  subject.data <- warnNAtime(remove.0.time,subject.data)
164
  
165
  subject.data <- IncludeFollowUp(subject.data,followup)
166
  
167
  #finally run some validation checks which change the input data and output warnings
168
  validation.checks <- function(subject.data,idx,warning.message,fn){
169
    if(length(idx)>0){
170
      warning(paste("Subjects",paste(subject.data[idx,subject],collapse=", "),warning.message))
171
      subject.data <- fn(subject.data,idx)
172
    }
173
    subject.data
174
  }
175
  
176
  subject.data <- validation.checks(subject.data,
177
                    idx=which(subject.data$has.event==1 & is.na(subject.data$event.type)),
178
                    warning.message="have an event and no event type, their event type is set to 'Had Event'",
179
                    fn=function(subject.data,idx){
180
                      if(!"Had Event" %in% levels(subject.data$event.type)){
181
                        levels(subject.data$event.type) <- c(levels(subject.data$event.type),"Had Event")
182
                      }
183
                      subject.data$event.type[idx] <- "Had Event"
184
                      return(subject.data)
185
                    })
186
  
187
  subject.data <- validation.checks(subject.data,
188
                    idx=which(subject.data$has.event!=1 & !is.na(subject.data$event.type)),
189
                    warning.message="have an event type but did not have an event",
190
                    fn=function(subject.data,idx){
191
                      subject.data$event.type[idx] <- factor(NA)
192
                      subject.data$event.type <- droplevels(subject.data$event.type)
193
                      return(subject.data)
194
                    })
195
  
196
  subject.data <- validation.checks(subject.data,
197
                    idx=which(subject.data$has.event==1& subject.data$withdrawn==1),
198
                    warning.message="have an event and are withdrawn, assuming they have an event at the given time",
199
                    fn=function(subject.data,idx){
200
                      subject.data$withdrawn[idx] <- 0
201
                      return(subject.data)
202
                    })
203
204
  #further validation occurs in the validity function of the class 
205
  return(new("EventData", subject.data = subject.data, followup=followup))
206
}
207
208
209
210
##' @name show
211
##' @rdname show-methods
212
##' @aliases show,EventData-method
213
##' @export
214
setMethod("show",
215
          "EventData",
216
    function(object) {
217
         cat("EventData, use object@subject.data$param to access individual columns:\n")
218
         cat(str(object@subject.data))
219
     })
220
221
222
##' @name summary
223
##' @rdname summary-methods
224
##' @aliases summary,EventData-method
225
##' @export
226
setMethod("summary","EventData",
227
  function(object){
228
    df <- object@subject.data 
229
    daysinyear <- standarddaysinyear()
230
    
231
    if(nrow(df)>0){
232
    
233
      cat(paste("Number of subjects:",nrow(df),"\n"))
234
      cat(paste("Number of events:",sum(df$has.event==1),"\n"))
235
    
236
      if(sum(df$has.event==1) != 0 && length(levels(df$event.type)) != 1){
237
        tab <- table(df$event.type)
238
        lapply(names(tab),function(y){cat(paste(" Of type ",y,": ",tab[y],"\n",sep=""))})
239
      }
240
    
241
      cat(paste("Number of withdrawn:", sum(df$withdrawn==1),"\n"))
242
      cat(paste("First subject randomized:",as.Date(min(df$rand.date),origin="1970-01-01"),"\n"))
243
      cat(paste("Last subject randomized:",as.Date(max(df$rand.date),origin="1970-01-01"),"\n"))
244
    
245
      if(sum(object@subject.data$has.event)>0){    
246
        cat(paste("First Event:",as.Date(min(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
247
        cat(paste("Last Event:",as.Date(max(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
248
      }
249
      
250
      av <- average.rec(nrow(df),as.numeric(min(df$rand.date)),as.numeric(max(df$rand.date))) 
251
      cat(paste("Average recruitment (subjects/day):",roundForceOutputZeros(av,2),"\n"))
252
    
253
      if(!is.infinite(object@followup)){
254
        cat(paste("Subjects followed for ",round(object@followup), " days (",
255
                round(object@followup/daysinyear,2)," years)\n",sep=""))
256
      
257
        cat(paste("Number of subjects censored at end of follow up period:",
258
                  sum(object@subject.data$censored.at.follow.up),"\n"))
259
      
260
      }
261
    }
262
    else{
263
      cat("Empty data frame!\n")
264
    }
265
})
266
267
268
269
##' Fit a survival model to an EventData object
270
##' @rdname fit-methods
271
##' @name fit
272
##' @param object an EventData object
273
##' @param ... Additional arguments to the function
274
##' @param dist character, either weibull or loglogistic, which model
275
##' is to be fit 
276
##' @return an EventModel object
277
##' @export
278
if (!isGeneric("fit")){
279
  setGeneric("fit", function(object,...) standardGeneric("fit"))
280
}
281
282
283
284
##' @name fit
285
##' @rdname fit-methods
286
##' @aliases fit,EventData-method
287
##' @export
288
setMethod("fit","EventData",function(object,dist="weibull"){
289
  
290
  if(!dist %in% c("weibull","loglogistic")){
291
    stop("dist must be weibull or loglogistic")
292
  }
293
  
294
  if(nrow(object@subject.data)==0)stop("Empty data frame!") 
295
  if(sum(object@subject.data$has.event)==0){
296
    stop("Cannot fit a model to a dataset with no events")
297
  }
298
  
299
  #subjects with time = 0 are set to NA for the model fitting
300
  #so they are ignored
301
  indat <- object@subject.data
302
  indat$time <- ifelse(indat$time==0,NA,indat$time)
303
  
304
  model<-survreg(Surv(time, has.event) ~ 1, data=indat, dist=dist, y = TRUE)
305
  
306
  new("EventModel",model=model,event.data=object,
307
      simParams=FromDataParam(object=model,type=dist))
308
})
309
310
311
312
##' @rdname plot-methods
313
##' @name plot
314
##' @aliases plot,EventData,missing-method
315
##' @export
316
setMethod( "plot",
317
  signature( x="EventData", y="missing" ),
318
  function(x, xlab="log(t)", ylab="log(-log(S(t)))", main="", ...) {
319
    
320
    if(nrow(x@subject.data)==0)stop("Empty data frame!")
321
    if(sum(x@subject.data$has.event)==0){
322
      stop("Cannot fit a model to a dataset with no events")
323
    }
324
    
325
    model <- survfit(Surv(time, has.event) ~ 1, data=x@subject.data,...)
326
      
327
    res <- data.frame(t=model$time, s=model$surv)
328
    res <- res[res$t>0 & res$s>0 & res$s<1,]
329
    
330
    df <- data.frame(x=log(res$t),y=log(-log(res$s)))
331
    
332
    p <- ggplot(df, aes_string(x="x", y="y")) + geom_point() +
333
      stat_smooth(method="lm", se=FALSE, color="red", size=1 ) +
334
      xlab(xlab) + ylab(ylab) +
335
      ggtitle(main) + theme_bw() +
336
      theme(panel.grid.minor = element_line(colour="gray", size=0.5))
337
    p
338
  }
339
)
340
341
342
343
##' Method to create new \code{EventData} object only including recruitment times
344
##' @param object The  object from which to create the new \code{EventData} object
345
##' @rdname OnlyUseRecTimes-methods
346
##' @return An \code{EventData} object with time=0 and no events or withdrawn subjects
347
##' @name OnlyUseRecTimes
348
##' @export
349
setGeneric("OnlyUseRecTimes",function(object) standardGeneric("OnlyUseRecTimes"))
350
351
##' @rdname OnlyUseRecTimes-methods
352
##' @name OnlyUseRecTimes
353
##' @aliases OnlyUseRecTimes,EventData-method
354
##' @export
355
setMethod("OnlyUseRecTimes","EventData",function(object){
356
  object@subject.data$withdrawn <- 0
357
  object@subject.data$has.event <- 0
358
  object@subject.data$censored.at.follow.up <- 0
359
  object@subject.data$time <- 0
360
  object@subject.data$event.type <- factor(NA)
361
  object@subject.data$event.type <- droplevels(object@subject.data$event.type)
362
  object
363
})
364
365
366
##' Cut the Event Data
367
##' 
368
##' Create a new EventData from an existing one which shows how the
369
##' data would have looked on a given date (assuming no lag in reporting of 
370
##' events and if necessary subjects have been censored at cutoff point).
371
##' If subject was censored before the cut date then their censored date remains
372
##' unchanged
373
##' @param object The EventData object
374
##' @param date The date to cut the data at
375
##' @rdname CutData-methods
376
##' @name CutData
377
##' @return \code{EventData} object with the data censored at the appropriate point
378
##' @export
379
setGeneric("CutData",function(object,date) standardGeneric("CutData"))
380
381
382
##' @rdname CutData-methods
383
##' @name CutData
384
##' @aliases CutData,EventData-method
385
##' @export
386
setMethod("CutData","EventData",function(object,date){
387
388
  cut.date <- FixDates(date)
389
  
390
  subject.data <- object@subject.data
391
  
392
  #only include subjects who have been randomized by this time
393
  subject.data <- subject.data[subject.data$rand.date<=cut.date,]
394
  if(nrow(subject.data)==0){
395
    stop("Cut date before first subject randomization")
396
  }
397
  
398
  censored.times <- cut.date - subject.data$rand.date + 1
399
  idx <- which(censored.times < subject.data$time)
400
  
401
  if(length(idx)==0&&nrow(subject.data)==nrow(object@subject.data)){
402
    warning("Cut date is too late (no ongoing subjects at this time) and so has no effect")
403
  }
404
  
405
  
406
  
407
  subject.data$time <- pmin(censored.times,subject.data$time)
408
  subject.data$has.event[idx] <- 0
409
  subject.data$event.type[idx] <- factor(NA)
410
  subject.data$event.type <- droplevels(subject.data$event.type)
411
  subject.data$withdrawn[idx] <- 0
412
  
413
  EventData(
414
    data=subject.data,
415
    subject="subject",
416
    rand.date="rand.date",
417
    has.event="has.event",
418
    withdrawn="withdrawn",
419
    time="time",
420
    site="site",
421
    event.type="event.type",
422
    followup=object@followup
423
  )
424
  
425
})
426
427
##' This function calculates the event type for pfs data
428
##' 
429
##' This function is called before creating the EventData object 
430
##' @param has.event A vector or whether subjects had event 
431
##' @param prog.date A vector of dates (or strings in the standard eventPrediction allowed formats)
432
##' of progression.date or blank if unknown 
433
##' @param dth.date A vector of dates (or strings in the standard eventPrediction allowed formats)
434
##' of death date or blank if unknown
435
##' @return A vector with NA if subject does not have event, 
436
##' "Death" if dth.date <= prog.date or prog.date blank,
437
##' "Progression (not death)" if prog.date < dth.date or dth.date blank
438
##' "Progression (unknown if death)" if both prog.date and dth.date are blank
439
##' @export
440
CalculateProgEventTypes <- function(has.event,prog.date,dth.date){
441
  #called before creating EventDate object so extra validation needed  
442
  
443
  if(any(!has.event %in% c(0,1))){
444
    stop("Invalid has.event argument")
445
  }
446
  
447
  prog.date <- FixDates(prog.date)
448
  dth.date <- FixDates(dth.date)
449
  
450
  if(!(length(prog.date)==length(dth.date) && length(prog.date) ==length(has.event))){
451
    stop("Invalid lengths of arguments")
452
  }
453
  
454
  unlist(lapply(seq_along(has.event),function(x){
455
    if(!has.event[x]) return(NA)
456
    
457
    dth <- dth.date[x]
458
    prog <- prog.date[x]
459
    
460
    if(!is.na(prog) && !is.na(dth)){
461
      if(dth<=prog) return("Death")  
462
      return("Progression (not death)")
463
    }  
464
    
465
    
466
    #just dth return dth
467
    if(!is.na(dth)) return("Death")
468
    
469
    #just prog return prog
470
    if(!is.na(prog)) return("Progression (not death)")
471
    
472
    #Otherwise are relying on lastDate so do not know which
473
    return("Progression (unknown if death)")
474
    
475
  }))
476
  
477
  
478
}
479
480
481
##' Create an \code{EventData} object with no subjects
482
##' 
483
##' @param followup The time subjects are followed until censoring  
484
##' @export
485
EmptyEventData <- function(followup=Inf){
486
  
487
  if(length(followup) > 1 || !is.numeric(followup) || followup < 0){
488
    stop("Invalid followup argument")
489
  }
490
  
491
  d <- data.frame(subject=character(0),
492
                  randDate=numeric(0),
493
                  has.event=numeric(0),
494
                  withdrawn=numeric(0),
495
                  time=numeric(0))
496
  
497
  #convert to an empty EventData object
498
   EventData(data=d,
499
             subject="subject",
500
             rand.date="randDate",
501
             has.event="has.event",
502
             withdrawn="withdrawn",
503
             time="time",
504
             followup=followup)
505
}
506
507
##' Calculate the number of days at risk
508
##' 
509
##' For \code{EventData} object this is the number of days
510
##' at risk of the input data. For \code{SingleSimDetails} object
511
##' this is the median numberof days at risk in the simulation set at the given time
512
##' 
513
##' @param object The object to calculate the days at risk  
514
##' @param ... Additional arguments passed to the function
515
##' @param times A vector of dates for calculating cutting the simulated date at in order to
516
##' calculate the number of days at risk by this point.
517
##' @name CalculateDaysAtRisk
518
##' @rdname CalculateDaysAtRisk-methods
519
##' @export
520
setGeneric("CalculateDaysAtRisk",function(object,...)standardGeneric("CalculateDaysAtRisk"))
521
522
523
##' @rdname CalculateDaysAtRisk-methods
524
##' @name CalculateDaysAtRisk
525
##' @aliases CalculateDaysAtRisk,EventData-method
526
##' @export
527
setMethod("CalculateDaysAtRisk","EventData",
528
          function(object){
529
            return(sum(object@subject.data$time))            
530
          }
531
)
532
533
##' Makes a barplot with the number of events over time
534
##' 
535
##' @param object The object to create the event versus time plot 
536
##' @param ... Additional arguments passed to the function
537
##' @param timeunit The resoloution on the x-axis (month, weeks or quarter)
538
##' @name EventVsTimePlot
539
##' @rdname EventVsTimePlot-methods
540
##' @export
541
setGeneric("EventVsTimePlot",function(object,timeunit="Months",...)standardGeneric("EventVsTimePlot"))
542
543
544
##' @rdname EventVsTimePlot-methods
545
##' @name EventVsTimePlot
546
##' @aliases EventVsTimePlot,EventData-method
547
##' @export
548
setMethod("EventVsTimePlot","EventData",
549
          function( object, timeunit="month" ){
550
            my.data <- object@subject.data
551
            my.data$lastdate <- LastDate( my.data )
552
            my.data$has.event <- as.integer( my.data$has.event )
553
            my.data$mytimeunit <- as.Date( cut( my.data$lastdate, 
554
                                                breaks = timeunit ) )
555
            
556
            mybreaks <- if( timeunit == "quarter" ) "3 months" else "1 month" 
557
            
558
            ggplot( data = my.data, aes_string( x="mytimeunit", y="has.event" )) +
559
              stat_summary( fun.y = sum, geom = "bar") + 
560
              scale_x_date(
561
                date_labels = "%Y-%b",
562
                date_breaks = mybreaks ) + 
563
              theme_bw() + 
564
              theme( axis.text.x = element_text(angle = 45, hjust = 1) ) +
565
              xlab( "" ) +
566
              ylab( "Observed events" )
567
          }
568
)
569