Diff of /R/eventData.R [000000] .. [f2e496]

Switch to side-by-side view

--- a
+++ b/R/eventData.R
@@ -0,0 +1,569 @@
+# This file contains the public functions associated with eventData object
+# in the predict from data part of the package. Note the estimateAccrualParameter
+# function can be found in the accrual.R file.
+
+##' @include fromDataSimParam.R eventPrediction_package.R
+NULL
+
+##' @import survival methods
+NULL
+
+##' @importFrom  mvtnorm rmvnorm
+NULL
+
+##' @importFrom  scales date_format
+NULL
+
+# Function that will check the validity of EventData object
+# when constructed
+# @param object EventData object
+checkEventData <- function(object){
+  errors <- ""
+  
+  required.colnames <- c("has.event", "rand.date", "withdrawn", "subject", "time","censored.at.follow.up")
+  
+  data <- object@subject.data
+  
+  if(nrow(data)>0){
+  
+    if(any(!data$has.event %in% c(0,1))) errors <- paste(errors,"has.event must be 1 or 0" )
+    if(any(!data$withdrawn %in% c(0,1))) errors <- paste(errors,"withdrawn must be 1 or 0" )
+    if(any(!data$censored.at.follow.up %in% c(0,1))) errors <- paste(errors,"censored.at.follow.up must be 1 or 0" )
+
+    if(any(!required.colnames %in% colnames(data))) errors <- paste(errors,"invalid column names")
+  
+    
+    if(class(data$rand.date)!="Date") errors <- paste(errors,"Invalid rand.date")
+    if(class(data$time)!="numeric") {
+      errors <- paste(errors,"Times must be numeric")
+    }
+    else{
+      if(all(data$time==0)) errors <- paste(errors,"time value incorrect all are 0!")  
+      if(any(data$time < 0 | is.na(data$time))) {
+        errors <- paste(errors,"subjects cannot have non-positive time on trial. Please check subjects ",
+                      paste(data[data$time <= 0 | is.na(data$time),]$subject,collapse=", "))
+      
+      }
+    }
+  
+    if(any(duplicated(data$subject))) errors <- paste(errors,"subject ID must be unique")
+  
+    if(all(data$has.event==0)){
+      warning("No events have occurred - a model cannot be fit")
+    }
+    if(all(data$has.event==1)) errors <- paste(errors,"all events have occurred!")
+  
+    idx <- which(data$has.event==1& data$withdrawn==1)
+    if(length(idx)){
+      errors <- paste(errors,"subjects cannot be both withdrawn and have an event. Please check subjects ",
+                    paste(data$subject[idx],collapse=", "))
+    }
+  
+  }
+  
+  if(!is.numeric(object@followup)||length(object@followup)!= 1|| object@followup <= 0){
+    errors <- paste(errors,"Invalid followup argument")
+  }
+  
+  if(errors == "") TRUE else errors
+}
+  
+##' Class representing data to use for new predictions
+##' @slot subject.data a data frame with 6 columns
+##' "subject", "rand.date", "has.event", "withdrawn", "censored.at.follow.up" and "time" and "site" and "event.type"
+##' see vignette and \code{EventData} for further details. 
+##' @slot followup A numeric value for the fixed follow up period a subject
+##' is followed. This is in days. If there is no fixed followup period then 
+##' Inf should be used
+##' @export
+setClass("EventData", 
+          slots= list(subject.data = "data.frame",
+                      followup="numeric"),
+          validity = checkEventData)
+
+
+##' Constructor for event data object
+##' 
+##' All dates must be in one of the following formats:
+##' YYYY-MM-DD, DD/MM/YY or DD Month YYYY
+##'  
+##' 
+##' @param data A data frame 
+##' @param subject string, the column name of subject identifiers
+##' @param rand.date string, the column name of randomization dates
+##' @param has.event string, the column name of whether a subject had an event
+##' (1) or not (0) 
+##' @param withdrawn string, the column name of whether a subject has withdrawn
+##' (1) or not (0)  
+##' @param time Either a string, the column name of time each subject has been on the study
+##' or a list with elements with (some of) the following named elements.
+##' \code{last.date}, \code{event.date}, \code{prog.date}, \code{dth.date} and \code{withdrawn.date}
+##' In this case the package will attempt to derive the time column 
+##' See the vignette and then eventPrediction:::AddTimeColumn for further details
+##' @param site optional column for subject site
+##' @param event.type optional column for the event type (e.g. unstable angina) if not included then `Had Event' will be used 
+##' @param remove.0.time logical, if TRUE then all subjects with time = NA or 0 are removed from the
+##' data set and not included in the object. If FALSE then they are included in the simulation (but not in the model fitting)
+##' @param followup A numeric value for the fixed follow up period a subject
+##' is followed. This is in days. If there is no fixed followup period then 
+##' Inf should be used
+##' @return An \code{EventData} object
+##' @export
+EventData <- function(data,subject,rand.date,has.event,withdrawn,time,site=NULL,event.type=NULL,remove.0.time=FALSE,followup=Inf){
+  
+  #set time argument
+  arg <- if(!is.list(time))  time else NULL
+   
+  #check columns are in data frame 
+  for(x in c(subject,rand.date,has.event,withdrawn,arg)){
+    if(!is.null(x) && !x %in% colnames(data)){
+      stop(paste("Column name",x,"not found in data frame"))
+    }
+  }
+  
+  if(!is.numeric(followup) || length(followup)> 1 || followup <= 0){
+    stop("Invalid followup argument")
+  }
+  
+  #validate the input columns, deriving time from other columns if needed
+  data[,rand.date] <- if(nrow(data)>0) FixDates(data[,rand.date]) else data[,rand.date]
+  time <- if(!is.list(time))  data[,time] else AddTimeColumn(data,rand.date,has.event,withdrawn,subject,time)
+  site <- if(is.null(site)) rep(NA,nrow(data)) else data[,site] 
+  
+  if(is.null(event.type)) 
+    event.type <- ifelse(data[,has.event],"Has Event",factor(NA))
+  else{
+    event.type <- factor(data[,event.type])
+    if("" %in% levels(event.type)){
+      event.type[event.type==""] <- factor(NA)
+      event.type <- droplevels(event.type)
+    }
+  }
+  
+  #unvalidated data frame
+  subject.data <- data.frame(
+    subject=data[,subject],
+    rand.date=data[,rand.date],
+    time=time,
+    has.event=data[,has.event],
+    withdrawn=data[,withdrawn],
+    site=site,
+    event.type=event.type
+  )
+  
+     
+  keep <- which(as.character(subject.data$subject) > "" & !is.na(subject.data$rand.date))
+  if(length(keep) != nrow(subject.data)){
+    warning(paste("Subjects",paste(subject.data[setdiff(1:nrow(subject.data),keep),subject],collapse=", "),
+                  "have been removed due to invalid rand.date or missing subject ID."))
+  }
+  
+  subject.data <- subject.data[keep,]
+  
+  subject.data <- warnNAtime(remove.0.time,subject.data)
+  
+  subject.data <- IncludeFollowUp(subject.data,followup)
+  
+  #finally run some validation checks which change the input data and output warnings
+  validation.checks <- function(subject.data,idx,warning.message,fn){
+    if(length(idx)>0){
+      warning(paste("Subjects",paste(subject.data[idx,subject],collapse=", "),warning.message))
+      subject.data <- fn(subject.data,idx)
+    }
+    subject.data
+  }
+  
+  subject.data <- validation.checks(subject.data,
+                    idx=which(subject.data$has.event==1 & is.na(subject.data$event.type)),
+                    warning.message="have an event and no event type, their event type is set to 'Had Event'",
+                    fn=function(subject.data,idx){
+                      if(!"Had Event" %in% levels(subject.data$event.type)){
+                        levels(subject.data$event.type) <- c(levels(subject.data$event.type),"Had Event")
+                      }
+                      subject.data$event.type[idx] <- "Had Event"
+                      return(subject.data)
+                    })
+  
+  subject.data <- validation.checks(subject.data,
+                    idx=which(subject.data$has.event!=1 & !is.na(subject.data$event.type)),
+                    warning.message="have an event type but did not have an event",
+                    fn=function(subject.data,idx){
+                      subject.data$event.type[idx] <- factor(NA)
+                      subject.data$event.type <- droplevels(subject.data$event.type)
+                      return(subject.data)
+                    })
+  
+  subject.data <- validation.checks(subject.data,
+                    idx=which(subject.data$has.event==1& subject.data$withdrawn==1),
+                    warning.message="have an event and are withdrawn, assuming they have an event at the given time",
+                    fn=function(subject.data,idx){
+                      subject.data$withdrawn[idx] <- 0
+                      return(subject.data)
+                    })
+
+  #further validation occurs in the validity function of the class 
+  return(new("EventData", subject.data = subject.data, followup=followup))
+}
+
+
+
+##' @name show
+##' @rdname show-methods
+##' @aliases show,EventData-method
+##' @export
+setMethod("show",
+          "EventData",
+    function(object) {
+         cat("EventData, use object@subject.data$param to access individual columns:\n")
+         cat(str(object@subject.data))
+     })
+
+
+##' @name summary
+##' @rdname summary-methods
+##' @aliases summary,EventData-method
+##' @export
+setMethod("summary","EventData",
+  function(object){
+    df <- object@subject.data 
+    daysinyear <- standarddaysinyear()
+    
+    if(nrow(df)>0){
+    
+      cat(paste("Number of subjects:",nrow(df),"\n"))
+      cat(paste("Number of events:",sum(df$has.event==1),"\n"))
+    
+      if(sum(df$has.event==1) != 0 && length(levels(df$event.type)) != 1){
+        tab <- table(df$event.type)
+        lapply(names(tab),function(y){cat(paste(" Of type ",y,": ",tab[y],"\n",sep=""))})
+      }
+    
+      cat(paste("Number of withdrawn:", sum(df$withdrawn==1),"\n"))
+      cat(paste("First subject randomized:",as.Date(min(df$rand.date),origin="1970-01-01"),"\n"))
+      cat(paste("Last subject randomized:",as.Date(max(df$rand.date),origin="1970-01-01"),"\n"))
+    
+      if(sum(object@subject.data$has.event)>0){    
+        cat(paste("First Event:",as.Date(min(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
+        cat(paste("Last Event:",as.Date(max(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n"))
+      }
+      
+      av <- average.rec(nrow(df),as.numeric(min(df$rand.date)),as.numeric(max(df$rand.date))) 
+      cat(paste("Average recruitment (subjects/day):",roundForceOutputZeros(av,2),"\n"))
+    
+      if(!is.infinite(object@followup)){
+        cat(paste("Subjects followed for ",round(object@followup), " days (",
+                round(object@followup/daysinyear,2)," years)\n",sep=""))
+      
+        cat(paste("Number of subjects censored at end of follow up period:",
+                  sum(object@subject.data$censored.at.follow.up),"\n"))
+      
+      }
+    }
+    else{
+      cat("Empty data frame!\n")
+    }
+})
+
+
+
+##' Fit a survival model to an EventData object
+##' @rdname fit-methods
+##' @name fit
+##' @param object an EventData object
+##' @param ... Additional arguments to the function
+##' @param dist character, either weibull or loglogistic, which model
+##' is to be fit 
+##' @return an EventModel object
+##' @export
+if (!isGeneric("fit")){
+  setGeneric("fit", function(object,...) standardGeneric("fit"))
+}
+
+
+
+##' @name fit
+##' @rdname fit-methods
+##' @aliases fit,EventData-method
+##' @export
+setMethod("fit","EventData",function(object,dist="weibull"){
+  
+  if(!dist %in% c("weibull","loglogistic")){
+    stop("dist must be weibull or loglogistic")
+  }
+  
+  if(nrow(object@subject.data)==0)stop("Empty data frame!") 
+  if(sum(object@subject.data$has.event)==0){
+    stop("Cannot fit a model to a dataset with no events")
+  }
+  
+  #subjects with time = 0 are set to NA for the model fitting
+  #so they are ignored
+  indat <- object@subject.data
+  indat$time <- ifelse(indat$time==0,NA,indat$time)
+  
+  model<-survreg(Surv(time, has.event) ~ 1, data=indat, dist=dist, y = TRUE)
+  
+  new("EventModel",model=model,event.data=object,
+      simParams=FromDataParam(object=model,type=dist))
+})
+
+
+
+##' @rdname plot-methods
+##' @name plot
+##' @aliases plot,EventData,missing-method
+##' @export
+setMethod( "plot",
+  signature( x="EventData", y="missing" ),
+  function(x, xlab="log(t)", ylab="log(-log(S(t)))", main="", ...) {
+    
+    if(nrow(x@subject.data)==0)stop("Empty data frame!")
+    if(sum(x@subject.data$has.event)==0){
+      stop("Cannot fit a model to a dataset with no events")
+    }
+    
+    model <- survfit(Surv(time, has.event) ~ 1, data=x@subject.data,...)
+      
+    res <- data.frame(t=model$time, s=model$surv)
+    res <- res[res$t>0 & res$s>0 & res$s<1,]
+    
+    df <- data.frame(x=log(res$t),y=log(-log(res$s)))
+    
+    p <- ggplot(df, aes_string(x="x", y="y")) + geom_point() +
+      stat_smooth(method="lm", se=FALSE, color="red", size=1 ) +
+      xlab(xlab) + ylab(ylab) +
+      ggtitle(main) + theme_bw() +
+      theme(panel.grid.minor = element_line(colour="gray", size=0.5))
+    p
+  }
+)
+
+
+
+##' Method to create new \code{EventData} object only including recruitment times
+##' @param object The  object from which to create the new \code{EventData} object
+##' @rdname OnlyUseRecTimes-methods
+##' @return An \code{EventData} object with time=0 and no events or withdrawn subjects
+##' @name OnlyUseRecTimes
+##' @export
+setGeneric("OnlyUseRecTimes",function(object) standardGeneric("OnlyUseRecTimes"))
+
+##' @rdname OnlyUseRecTimes-methods
+##' @name OnlyUseRecTimes
+##' @aliases OnlyUseRecTimes,EventData-method
+##' @export
+setMethod("OnlyUseRecTimes","EventData",function(object){
+  object@subject.data$withdrawn <- 0
+  object@subject.data$has.event <- 0
+  object@subject.data$censored.at.follow.up <- 0
+  object@subject.data$time <- 0
+  object@subject.data$event.type <- factor(NA)
+  object@subject.data$event.type <- droplevels(object@subject.data$event.type)
+  object
+})
+
+
+##' Cut the Event Data
+##' 
+##' Create a new EventData from an existing one which shows how the
+##' data would have looked on a given date (assuming no lag in reporting of 
+##' events and if necessary subjects have been censored at cutoff point).
+##' If subject was censored before the cut date then their censored date remains
+##' unchanged
+##' @param object The EventData object
+##' @param date The date to cut the data at
+##' @rdname CutData-methods
+##' @name CutData
+##' @return \code{EventData} object with the data censored at the appropriate point
+##' @export
+setGeneric("CutData",function(object,date) standardGeneric("CutData"))
+
+
+##' @rdname CutData-methods
+##' @name CutData
+##' @aliases CutData,EventData-method
+##' @export
+setMethod("CutData","EventData",function(object,date){
+
+  cut.date <- FixDates(date)
+  
+  subject.data <- object@subject.data
+  
+  #only include subjects who have been randomized by this time
+  subject.data <- subject.data[subject.data$rand.date<=cut.date,]
+  if(nrow(subject.data)==0){
+    stop("Cut date before first subject randomization")
+  }
+  
+  censored.times <- cut.date - subject.data$rand.date + 1
+  idx <- which(censored.times < subject.data$time)
+  
+  if(length(idx)==0&&nrow(subject.data)==nrow(object@subject.data)){
+    warning("Cut date is too late (no ongoing subjects at this time) and so has no effect")
+  }
+  
+  
+  
+  subject.data$time <- pmin(censored.times,subject.data$time)
+  subject.data$has.event[idx] <- 0
+  subject.data$event.type[idx] <- factor(NA)
+  subject.data$event.type <- droplevels(subject.data$event.type)
+  subject.data$withdrawn[idx] <- 0
+  
+  EventData(
+    data=subject.data,
+    subject="subject",
+    rand.date="rand.date",
+    has.event="has.event",
+    withdrawn="withdrawn",
+    time="time",
+    site="site",
+    event.type="event.type",
+    followup=object@followup
+  )
+  
+})
+
+##' This function calculates the event type for pfs data
+##' 
+##' This function is called before creating the EventData object 
+##' @param has.event A vector or whether subjects had event 
+##' @param prog.date A vector of dates (or strings in the standard eventPrediction allowed formats)
+##' of progression.date or blank if unknown 
+##' @param dth.date A vector of dates (or strings in the standard eventPrediction allowed formats)
+##' of death date or blank if unknown
+##' @return A vector with NA if subject does not have event, 
+##' "Death" if dth.date <= prog.date or prog.date blank,
+##' "Progression (not death)" if prog.date < dth.date or dth.date blank
+##' "Progression (unknown if death)" if both prog.date and dth.date are blank
+##' @export
+CalculateProgEventTypes <- function(has.event,prog.date,dth.date){
+  #called before creating EventDate object so extra validation needed  
+  
+  if(any(!has.event %in% c(0,1))){
+    stop("Invalid has.event argument")
+  }
+  
+  prog.date <- FixDates(prog.date)
+  dth.date <- FixDates(dth.date)
+  
+  if(!(length(prog.date)==length(dth.date) && length(prog.date) ==length(has.event))){
+    stop("Invalid lengths of arguments")
+  }
+  
+  unlist(lapply(seq_along(has.event),function(x){
+    if(!has.event[x]) return(NA)
+    
+    dth <- dth.date[x]
+    prog <- prog.date[x]
+    
+    if(!is.na(prog) && !is.na(dth)){
+      if(dth<=prog) return("Death")  
+      return("Progression (not death)")
+    }  
+    
+    
+    #just dth return dth
+    if(!is.na(dth)) return("Death")
+    
+    #just prog return prog
+    if(!is.na(prog)) return("Progression (not death)")
+    
+    #Otherwise are relying on lastDate so do not know which
+    return("Progression (unknown if death)")
+    
+  }))
+  
+  
+}
+
+
+##' Create an \code{EventData} object with no subjects
+##' 
+##' @param followup The time subjects are followed until censoring  
+##' @export
+EmptyEventData <- function(followup=Inf){
+  
+  if(length(followup) > 1 || !is.numeric(followup) || followup < 0){
+    stop("Invalid followup argument")
+  }
+  
+  d <- data.frame(subject=character(0),
+                  randDate=numeric(0),
+                  has.event=numeric(0),
+                  withdrawn=numeric(0),
+                  time=numeric(0))
+  
+  #convert to an empty EventData object
+   EventData(data=d,
+             subject="subject",
+             rand.date="randDate",
+             has.event="has.event",
+             withdrawn="withdrawn",
+             time="time",
+             followup=followup)
+}
+
+##' Calculate the number of days at risk
+##' 
+##' For \code{EventData} object this is the number of days
+##' at risk of the input data. For \code{SingleSimDetails} object
+##' this is the median numberof days at risk in the simulation set at the given time
+##' 
+##' @param object The object to calculate the days at risk  
+##' @param ... Additional arguments passed to the function
+##' @param times A vector of dates for calculating cutting the simulated date at in order to
+##' calculate the number of days at risk by this point.
+##' @name CalculateDaysAtRisk
+##' @rdname CalculateDaysAtRisk-methods
+##' @export
+setGeneric("CalculateDaysAtRisk",function(object,...)standardGeneric("CalculateDaysAtRisk"))
+
+
+##' @rdname CalculateDaysAtRisk-methods
+##' @name CalculateDaysAtRisk
+##' @aliases CalculateDaysAtRisk,EventData-method
+##' @export
+setMethod("CalculateDaysAtRisk","EventData",
+          function(object){
+            return(sum(object@subject.data$time))            
+          }
+)
+
+##' Makes a barplot with the number of events over time
+##' 
+##' @param object The object to create the event versus time plot 
+##' @param ... Additional arguments passed to the function
+##' @param timeunit The resoloution on the x-axis (month, weeks or quarter)
+##' @name EventVsTimePlot
+##' @rdname EventVsTimePlot-methods
+##' @export
+setGeneric("EventVsTimePlot",function(object,timeunit="Months",...)standardGeneric("EventVsTimePlot"))
+
+
+##' @rdname EventVsTimePlot-methods
+##' @name EventVsTimePlot
+##' @aliases EventVsTimePlot,EventData-method
+##' @export
+setMethod("EventVsTimePlot","EventData",
+          function( object, timeunit="month" ){
+            my.data <- object@subject.data
+            my.data$lastdate <- LastDate( my.data )
+            my.data$has.event <- as.integer( my.data$has.event )
+            my.data$mytimeunit <- as.Date( cut( my.data$lastdate, 
+                                                breaks = timeunit ) )
+            
+            mybreaks <- if( timeunit == "quarter" ) "3 months" else "1 month" 
+            
+            ggplot( data = my.data, aes_string( x="mytimeunit", y="has.event" )) +
+              stat_summary( fun.y = sum, geom = "bar") + 
+              scale_x_date(
+                date_labels = "%Y-%b",
+                date_breaks = mybreaks ) + 
+              theme_bw() + 
+              theme( axis.text.x = element_text(angle = 45, hjust = 1) ) +
+              xlab( "" ) +
+              ylab( "Observed events" )
+          }
+)
+