[f2e496]: / R / eventData.R

Download this file

570 lines (452 with data), 20.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
# This file contains the public functions associated with eventData object
# in the predict from data part of the package. Note the estimateAccrualParameter
# function can be found in the accrual.R file.
##' @include fromDataSimParam.R eventPrediction_package.R
NULL
##' @import survival methods
NULL
##' @importFrom mvtnorm rmvnorm
NULL
##' @importFrom scales date_format
NULL
# Function that will check the validity of EventData object
# when constructed
# @param object EventData object
checkEventData <- function(object){
errors <- ""
required.colnames <- c("has.event", "rand.date", "withdrawn", "subject", "time","censored.at.follow.up")
data <- object@subject.data
if(nrow(data)>0){
if(any(!data$has.event %in% c(0,1))) errors <- paste(errors,"has.event must be 1 or 0" )
if(any(!data$withdrawn %in% c(0,1))) errors <- paste(errors,"withdrawn must be 1 or 0" )
if(any(!data$censored.at.follow.up %in% c(0,1))) errors <- paste(errors,"censored.at.follow.up must be 1 or 0" )
if(any(!required.colnames %in% colnames(data))) errors <- paste(errors,"invalid column names")
if(class(data$rand.date)!="Date") errors <- paste(errors,"Invalid rand.date")
if(class(data$time)!="numeric") {
errors <- paste(errors,"Times must be numeric")
}
else{
if(all(data$time==0)) errors <- paste(errors,"time value incorrect all are 0!")
if(any(data$time < 0 | is.na(data$time))) {
errors <- paste(errors,"subjects cannot have non-positive time on trial. Please check subjects ",
paste(data[data$time <= 0 | is.na(data$time),]$subject,collapse=", "))
}
}
if(any(duplicated(data$subject))) errors <- paste(errors,"subject ID must be unique")
if(all(data$has.event==0)){
warning("No events have occurred - a model cannot be fit")
}
if(all(data$has.event==1)) errors <- paste(errors,"all events have occurred!")
idx <- which(data$has.event==1& data$withdrawn==1)
if(length(idx)){
errors <- paste(errors,"subjects cannot be both withdrawn and have an event. Please check subjects ",
paste(data$subject[idx],collapse=", "))
}
}
if(!is.numeric(object@followup)||length(object@followup)!= 1|| object@followup <= 0){
errors <- paste(errors,"Invalid followup argument")
}
if(errors == "") TRUE else errors
}
##' Class representing data to use for new predictions
##' @slot subject.data a data frame with 6 columns
##' "subject", "rand.date", "has.event", "withdrawn", "censored.at.follow.up" and "time" and "site" and "event.type"
##' see vignette and \code{EventData} for further details.
##' @slot followup A numeric value for the fixed follow up period a subject
##' is followed. This is in days. If there is no fixed followup period then
##' Inf should be used
##' @export
setClass("EventData",
slots= list(subject.data = "data.frame",
followup="numeric"),
validity = checkEventData)
##' Constructor for event data object
##'
##' All dates must be in one of the following formats:
##' YYYY-MM-DD, DD/MM/YY or DD Month YYYY
##'
##'
##' @param data A data frame
##' @param subject string, the column name of subject identifiers
##' @param rand.date string, the column name of randomization dates
##' @param has.event string, the column name of whether a subject had an event
##' (1) or not (0)
##' @param withdrawn string, the column name of whether a subject has withdrawn
##' (1) or not (0)
##' @param time Either a string, the column name of time each subject has been on the study
##' or a list with elements with (some of) the following named elements.
##' \code{last.date}, \code{event.date}, \code{prog.date}, \code{dth.date} and \code{withdrawn.date}
##' In this case the package will attempt to derive the time column
##' See the vignette and then eventPrediction:::AddTimeColumn for further details
##' @param site optional column for subject site
##' @param event.type optional column for the event type (e.g. unstable angina) if not included then `Had Event' will be used
##' @param remove.0.time logical, if TRUE then all subjects with time = NA or 0 are removed from the
##' data set and not included in the object. If FALSE then they are included in the simulation (but not in the model fitting)
##' @param followup A numeric value for the fixed follow up period a subject
##' is followed. This is in days. If there is no fixed followup period then
##' Inf should be used
##' @return An \code{EventData} object
##' @export
EventData <- function(data,subject,rand.date,has.event,withdrawn,time,site=NULL,event.type=NULL,remove.0.time=FALSE,followup=Inf){
#set time argument
arg <- if(!is.list(time)) time else NULL
#check columns are in data frame
for(x in c(subject,rand.date,has.event,withdrawn,arg)){
if(!is.null(x) && !x %in% colnames(data)){
stop(paste("Column name",x,"not found in data frame"))
}
}
if(!is.numeric(followup) || length(followup)> 1 || followup <= 0){
stop("Invalid followup argument")
}
#validate the input columns, deriving time from other columns if needed
data[,rand.date] <- if(nrow(data)>0) FixDates(data[,rand.date]) else data[,rand.date]
time <- if(!is.list(time)) data[,time] else AddTimeColumn(data,rand.date,has.event,withdrawn,subject,time)
site <- if(is.null(site)) rep(NA,nrow(data)) else data[,site]
if(is.null(event.type))
event.type <- ifelse(data[,has.event],"Has Event",factor(NA))
else{
event.type <- factor(data[,event.type])
if("" %in% levels(event.type)){
event.type[event.type==""] <- factor(NA)
event.type <- droplevels(event.type)
}
}
#unvalidated data frame
subject.data <- data.frame(
subject=data[,subject],
rand.date=data[,rand.date],
time=time,
has.event=data[,has.event],
withdrawn=data[,withdrawn],
site=site,
event.type=event.type
)
keep <- which(as.character(subject.data$subject) > "" & !is.na(subject.data$rand.date))
if(length(keep) != nrow(subject.data)){
warning(paste("Subjects",paste(subject.data[setdiff(1:nrow(subject.data),keep),subject],collapse=", "),
"have been removed due to invalid rand.date or missing subject ID."))
}
subject.data <- subject.data[keep,]
subject.data <- warnNAtime(remove.0.time,subject.data)
subject.data <- IncludeFollowUp(subject.data,followup)
#finally run some validation checks which change the input data and output warnings
validation.checks <- function(subject.data,idx,warning.message,fn){
if(length(idx)>0){
warning(paste("Subjects",paste(subject.data[idx,subject],collapse=", "),warning.message))
subject.data <- fn(subject.data,idx)
}
subject.data
}
subject.data <- validation.checks(subject.data,
idx=which(subject.data$has.event==1 & is.na(subject.data$event.type)),
warning.message="have an event and no event type, their event type is set to 'Had Event'",
fn=function(subject.data,idx){
if(!"Had Event" %in% levels(subject.data$event.type)){
levels(subject.data$event.type) <- c(levels(subject.data$event.type),"Had Event")
}
subject.data$event.type[idx] <- "Had Event"
return(subject.data)
})
subject.data <- validation.checks(subject.data,
idx=which(subject.data$has.event!=1 & !is.na(subject.data$event.type)),
warning.message="have an event type but did not have an event",
fn=function(subject.data,idx){
subject.data$event.type[idx] <- factor(NA)
subject.data$event.type <- droplevels(subject.data$event.type)
return(subject.data)
})
subject.data <- validation.checks(subject.data,
idx=which(subject.data$has.event==1& subject.data$withdrawn==1),
warning.message="have an event and are withdrawn, assuming they have an event at the given time",
fn=function(subject.data,idx){
subject.data$withdrawn[idx] <- 0
return(subject.data)
})
#further validation occurs in the validity function of the class
return(new("EventData", subject.data = subject.data, followup=followup))
}
##' @name show
##' @rdname show-methods
##' @aliases show,EventData-method
##' @export
setMethod("show",
"EventData",
function(object) {
cat("EventData, use object@subject.data$param to access individual columns:\n")
cat(str(object@subject.data))
})
##' @name summary
##' @rdname summary-methods
##' @aliases summary,EventData-method
##' @export
setMethod("summary","EventData",
function(object){
df <- object@subject.data
daysinyear <- standarddaysinyear()
if(nrow(df)>0){
cat(paste("Number of subjects:",nrow(df),"\n"))
cat(paste("Number of events:",sum(df$has.event==1),"\n"))
if(sum(df$has.event==1) != 0 && length(levels(df$event.type)) != 1){
tab <- table(df$event.type)
lapply(names(tab),function(y){cat(paste(" Of type ",y,": ",tab[y],"\n",sep=""))})
}
cat(paste("Number of withdrawn:", sum(df$withdrawn==1),"\n"))
cat(paste("First subject randomized:",as.Date(min(df$rand.date),origin="1970-01-01"),"\n"))
cat(paste("Last subject randomized:",as.Date(max(df$rand.date),origin="1970-01-01"),"\n"))
if(sum(object@subject.data$has.event)>0){
cat(paste("First Event:",as.Date(min(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
cat(paste("Last Event:",as.Date(max(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
}
av <- average.rec(nrow(df),as.numeric(min(df$rand.date)),as.numeric(max(df$rand.date)))
cat(paste("Average recruitment (subjects/day):",roundForceOutputZeros(av,2),"\n"))
if(!is.infinite(object@followup)){
cat(paste("Subjects followed for ",round(object@followup), " days (",
round(object@followup/daysinyear,2)," years)\n",sep=""))
cat(paste("Number of subjects censored at end of follow up period:",
sum(object@subject.data$censored.at.follow.up),"\n"))
}
}
else{
cat("Empty data frame!\n")
}
})
##' Fit a survival model to an EventData object
##' @rdname fit-methods
##' @name fit
##' @param object an EventData object
##' @param ... Additional arguments to the function
##' @param dist character, either weibull or loglogistic, which model
##' is to be fit
##' @return an EventModel object
##' @export
if (!isGeneric("fit")){
setGeneric("fit", function(object,...) standardGeneric("fit"))
}
##' @name fit
##' @rdname fit-methods
##' @aliases fit,EventData-method
##' @export
setMethod("fit","EventData",function(object,dist="weibull"){
if(!dist %in% c("weibull","loglogistic")){
stop("dist must be weibull or loglogistic")
}
if(nrow(object@subject.data)==0)stop("Empty data frame!")
if(sum(object@subject.data$has.event)==0){
stop("Cannot fit a model to a dataset with no events")
}
#subjects with time = 0 are set to NA for the model fitting
#so they are ignored
indat <- object@subject.data
indat$time <- ifelse(indat$time==0,NA,indat$time)
model<-survreg(Surv(time, has.event) ~ 1, data=indat, dist=dist, y = TRUE)
new("EventModel",model=model,event.data=object,
simParams=FromDataParam(object=model,type=dist))
})
##' @rdname plot-methods
##' @name plot
##' @aliases plot,EventData,missing-method
##' @export
setMethod( "plot",
signature( x="EventData", y="missing" ),
function(x, xlab="log(t)", ylab="log(-log(S(t)))", main="", ...) {
if(nrow(x@subject.data)==0)stop("Empty data frame!")
if(sum(x@subject.data$has.event)==0){
stop("Cannot fit a model to a dataset with no events")
}
model <- survfit(Surv(time, has.event) ~ 1, data=x@subject.data,...)
res <- data.frame(t=model$time, s=model$surv)
res <- res[res$t>0 & res$s>0 & res$s<1,]
df <- data.frame(x=log(res$t),y=log(-log(res$s)))
p <- ggplot(df, aes_string(x="x", y="y")) + geom_point() +
stat_smooth(method="lm", se=FALSE, color="red", size=1 ) +
xlab(xlab) + ylab(ylab) +
ggtitle(main) + theme_bw() +
theme(panel.grid.minor = element_line(colour="gray", size=0.5))
p
}
)
##' Method to create new \code{EventData} object only including recruitment times
##' @param object The object from which to create the new \code{EventData} object
##' @rdname OnlyUseRecTimes-methods
##' @return An \code{EventData} object with time=0 and no events or withdrawn subjects
##' @name OnlyUseRecTimes
##' @export
setGeneric("OnlyUseRecTimes",function(object) standardGeneric("OnlyUseRecTimes"))
##' @rdname OnlyUseRecTimes-methods
##' @name OnlyUseRecTimes
##' @aliases OnlyUseRecTimes,EventData-method
##' @export
setMethod("OnlyUseRecTimes","EventData",function(object){
object@subject.data$withdrawn <- 0
object@subject.data$has.event <- 0
object@subject.data$censored.at.follow.up <- 0
object@subject.data$time <- 0
object@subject.data$event.type <- factor(NA)
object@subject.data$event.type <- droplevels(object@subject.data$event.type)
object
})
##' Cut the Event Data
##'
##' Create a new EventData from an existing one which shows how the
##' data would have looked on a given date (assuming no lag in reporting of
##' events and if necessary subjects have been censored at cutoff point).
##' If subject was censored before the cut date then their censored date remains
##' unchanged
##' @param object The EventData object
##' @param date The date to cut the data at
##' @rdname CutData-methods
##' @name CutData
##' @return \code{EventData} object with the data censored at the appropriate point
##' @export
setGeneric("CutData",function(object,date) standardGeneric("CutData"))
##' @rdname CutData-methods
##' @name CutData
##' @aliases CutData,EventData-method
##' @export
setMethod("CutData","EventData",function(object,date){
cut.date <- FixDates(date)
subject.data <- object@subject.data
#only include subjects who have been randomized by this time
subject.data <- subject.data[subject.data$rand.date<=cut.date,]
if(nrow(subject.data)==0){
stop("Cut date before first subject randomization")
}
censored.times <- cut.date - subject.data$rand.date + 1
idx <- which(censored.times < subject.data$time)
if(length(idx)==0&&nrow(subject.data)==nrow(object@subject.data)){
warning("Cut date is too late (no ongoing subjects at this time) and so has no effect")
}
subject.data$time <- pmin(censored.times,subject.data$time)
subject.data$has.event[idx] <- 0
subject.data$event.type[idx] <- factor(NA)
subject.data$event.type <- droplevels(subject.data$event.type)
subject.data$withdrawn[idx] <- 0
EventData(
data=subject.data,
subject="subject",
rand.date="rand.date",
has.event="has.event",
withdrawn="withdrawn",
time="time",
site="site",
event.type="event.type",
followup=object@followup
)
})
##' This function calculates the event type for pfs data
##'
##' This function is called before creating the EventData object
##' @param has.event A vector or whether subjects had event
##' @param prog.date A vector of dates (or strings in the standard eventPrediction allowed formats)
##' of progression.date or blank if unknown
##' @param dth.date A vector of dates (or strings in the standard eventPrediction allowed formats)
##' of death date or blank if unknown
##' @return A vector with NA if subject does not have event,
##' "Death" if dth.date <= prog.date or prog.date blank,
##' "Progression (not death)" if prog.date < dth.date or dth.date blank
##' "Progression (unknown if death)" if both prog.date and dth.date are blank
##' @export
CalculateProgEventTypes <- function(has.event,prog.date,dth.date){
#called before creating EventDate object so extra validation needed
if(any(!has.event %in% c(0,1))){
stop("Invalid has.event argument")
}
prog.date <- FixDates(prog.date)
dth.date <- FixDates(dth.date)
if(!(length(prog.date)==length(dth.date) && length(prog.date) ==length(has.event))){
stop("Invalid lengths of arguments")
}
unlist(lapply(seq_along(has.event),function(x){
if(!has.event[x]) return(NA)
dth <- dth.date[x]
prog <- prog.date[x]
if(!is.na(prog) && !is.na(dth)){
if(dth<=prog) return("Death")
return("Progression (not death)")
}
#just dth return dth
if(!is.na(dth)) return("Death")
#just prog return prog
if(!is.na(prog)) return("Progression (not death)")
#Otherwise are relying on lastDate so do not know which
return("Progression (unknown if death)")
}))
}
##' Create an \code{EventData} object with no subjects
##'
##' @param followup The time subjects are followed until censoring
##' @export
EmptyEventData <- function(followup=Inf){
if(length(followup) > 1 || !is.numeric(followup) || followup < 0){
stop("Invalid followup argument")
}
d <- data.frame(subject=character(0),
randDate=numeric(0),
has.event=numeric(0),
withdrawn=numeric(0),
time=numeric(0))
#convert to an empty EventData object
EventData(data=d,
subject="subject",
rand.date="randDate",
has.event="has.event",
withdrawn="withdrawn",
time="time",
followup=followup)
}
##' Calculate the number of days at risk
##'
##' For \code{EventData} object this is the number of days
##' at risk of the input data. For \code{SingleSimDetails} object
##' this is the median numberof days at risk in the simulation set at the given time
##'
##' @param object The object to calculate the days at risk
##' @param ... Additional arguments passed to the function
##' @param times A vector of dates for calculating cutting the simulated date at in order to
##' calculate the number of days at risk by this point.
##' @name CalculateDaysAtRisk
##' @rdname CalculateDaysAtRisk-methods
##' @export
setGeneric("CalculateDaysAtRisk",function(object,...)standardGeneric("CalculateDaysAtRisk"))
##' @rdname CalculateDaysAtRisk-methods
##' @name CalculateDaysAtRisk
##' @aliases CalculateDaysAtRisk,EventData-method
##' @export
setMethod("CalculateDaysAtRisk","EventData",
function(object){
return(sum(object@subject.data$time))
}
)
##' Makes a barplot with the number of events over time
##'
##' @param object The object to create the event versus time plot
##' @param ... Additional arguments passed to the function
##' @param timeunit The resoloution on the x-axis (month, weeks or quarter)
##' @name EventVsTimePlot
##' @rdname EventVsTimePlot-methods
##' @export
setGeneric("EventVsTimePlot",function(object,timeunit="Months",...)standardGeneric("EventVsTimePlot"))
##' @rdname EventVsTimePlot-methods
##' @name EventVsTimePlot
##' @aliases EventVsTimePlot,EventData-method
##' @export
setMethod("EventVsTimePlot","EventData",
function( object, timeunit="month" ){
my.data <- object@subject.data
my.data$lastdate <- LastDate( my.data )
my.data$has.event <- as.integer( my.data$has.event )
my.data$mytimeunit <- as.Date( cut( my.data$lastdate,
breaks = timeunit ) )
mybreaks <- if( timeunit == "quarter" ) "3 months" else "1 month"
ggplot( data = my.data, aes_string( x="mytimeunit", y="has.event" )) +
stat_summary( fun.y = sum, geom = "bar") +
scale_x_date(
date_labels = "%Y-%b",
date_breaks = mybreaks ) +
theme_bw() +
theme( axis.text.x = element_text(angle = 45, hjust = 1) ) +
xlab( "" ) +
ylab( "Observed events" )
}
)