--- a +++ b/R/eventData.R @@ -0,0 +1,569 @@ +# This file contains the public functions associated with eventData object +# in the predict from data part of the package. Note the estimateAccrualParameter +# function can be found in the accrual.R file. + +##' @include fromDataSimParam.R eventPrediction_package.R +NULL + +##' @import survival methods +NULL + +##' @importFrom mvtnorm rmvnorm +NULL + +##' @importFrom scales date_format +NULL + +# Function that will check the validity of EventData object +# when constructed +# @param object EventData object +checkEventData <- function(object){ + errors <- "" + + required.colnames <- c("has.event", "rand.date", "withdrawn", "subject", "time","censored.at.follow.up") + + data <- object@subject.data + + if(nrow(data)>0){ + + if(any(!data$has.event %in% c(0,1))) errors <- paste(errors,"has.event must be 1 or 0" ) + if(any(!data$withdrawn %in% c(0,1))) errors <- paste(errors,"withdrawn must be 1 or 0" ) + if(any(!data$censored.at.follow.up %in% c(0,1))) errors <- paste(errors,"censored.at.follow.up must be 1 or 0" ) + + if(any(!required.colnames %in% colnames(data))) errors <- paste(errors,"invalid column names") + + + if(class(data$rand.date)!="Date") errors <- paste(errors,"Invalid rand.date") + if(class(data$time)!="numeric") { + errors <- paste(errors,"Times must be numeric") + } + else{ + if(all(data$time==0)) errors <- paste(errors,"time value incorrect all are 0!") + if(any(data$time < 0 | is.na(data$time))) { + errors <- paste(errors,"subjects cannot have non-positive time on trial. Please check subjects ", + paste(data[data$time <= 0 | is.na(data$time),]$subject,collapse=", ")) + + } + } + + if(any(duplicated(data$subject))) errors <- paste(errors,"subject ID must be unique") + + if(all(data$has.event==0)){ + warning("No events have occurred - a model cannot be fit") + } + if(all(data$has.event==1)) errors <- paste(errors,"all events have occurred!") + + idx <- which(data$has.event==1& data$withdrawn==1) + if(length(idx)){ + errors <- paste(errors,"subjects cannot be both withdrawn and have an event. Please check subjects ", + paste(data$subject[idx],collapse=", ")) + } + + } + + if(!is.numeric(object@followup)||length(object@followup)!= 1|| object@followup <= 0){ + errors <- paste(errors,"Invalid followup argument") + } + + if(errors == "") TRUE else errors +} + +##' Class representing data to use for new predictions +##' @slot subject.data a data frame with 6 columns +##' "subject", "rand.date", "has.event", "withdrawn", "censored.at.follow.up" and "time" and "site" and "event.type" +##' see vignette and \code{EventData} for further details. +##' @slot followup A numeric value for the fixed follow up period a subject +##' is followed. This is in days. If there is no fixed followup period then +##' Inf should be used +##' @export +setClass("EventData", + slots= list(subject.data = "data.frame", + followup="numeric"), + validity = checkEventData) + + +##' Constructor for event data object +##' +##' All dates must be in one of the following formats: +##' YYYY-MM-DD, DD/MM/YY or DD Month YYYY +##' +##' +##' @param data A data frame +##' @param subject string, the column name of subject identifiers +##' @param rand.date string, the column name of randomization dates +##' @param has.event string, the column name of whether a subject had an event +##' (1) or not (0) +##' @param withdrawn string, the column name of whether a subject has withdrawn +##' (1) or not (0) +##' @param time Either a string, the column name of time each subject has been on the study +##' or a list with elements with (some of) the following named elements. +##' \code{last.date}, \code{event.date}, \code{prog.date}, \code{dth.date} and \code{withdrawn.date} +##' In this case the package will attempt to derive the time column +##' See the vignette and then eventPrediction:::AddTimeColumn for further details +##' @param site optional column for subject site +##' @param event.type optional column for the event type (e.g. unstable angina) if not included then `Had Event' will be used +##' @param remove.0.time logical, if TRUE then all subjects with time = NA or 0 are removed from the +##' data set and not included in the object. If FALSE then they are included in the simulation (but not in the model fitting) +##' @param followup A numeric value for the fixed follow up period a subject +##' is followed. This is in days. If there is no fixed followup period then +##' Inf should be used +##' @return An \code{EventData} object +##' @export +EventData <- function(data,subject,rand.date,has.event,withdrawn,time,site=NULL,event.type=NULL,remove.0.time=FALSE,followup=Inf){ + + #set time argument + arg <- if(!is.list(time)) time else NULL + + #check columns are in data frame + for(x in c(subject,rand.date,has.event,withdrawn,arg)){ + if(!is.null(x) && !x %in% colnames(data)){ + stop(paste("Column name",x,"not found in data frame")) + } + } + + if(!is.numeric(followup) || length(followup)> 1 || followup <= 0){ + stop("Invalid followup argument") + } + + #validate the input columns, deriving time from other columns if needed + data[,rand.date] <- if(nrow(data)>0) FixDates(data[,rand.date]) else data[,rand.date] + time <- if(!is.list(time)) data[,time] else AddTimeColumn(data,rand.date,has.event,withdrawn,subject,time) + site <- if(is.null(site)) rep(NA,nrow(data)) else data[,site] + + if(is.null(event.type)) + event.type <- ifelse(data[,has.event],"Has Event",factor(NA)) + else{ + event.type <- factor(data[,event.type]) + if("" %in% levels(event.type)){ + event.type[event.type==""] <- factor(NA) + event.type <- droplevels(event.type) + } + } + + #unvalidated data frame + subject.data <- data.frame( + subject=data[,subject], + rand.date=data[,rand.date], + time=time, + has.event=data[,has.event], + withdrawn=data[,withdrawn], + site=site, + event.type=event.type + ) + + + keep <- which(as.character(subject.data$subject) > "" & !is.na(subject.data$rand.date)) + if(length(keep) != nrow(subject.data)){ + warning(paste("Subjects",paste(subject.data[setdiff(1:nrow(subject.data),keep),subject],collapse=", "), + "have been removed due to invalid rand.date or missing subject ID.")) + } + + subject.data <- subject.data[keep,] + + subject.data <- warnNAtime(remove.0.time,subject.data) + + subject.data <- IncludeFollowUp(subject.data,followup) + + #finally run some validation checks which change the input data and output warnings + validation.checks <- function(subject.data,idx,warning.message,fn){ + if(length(idx)>0){ + warning(paste("Subjects",paste(subject.data[idx,subject],collapse=", "),warning.message)) + subject.data <- fn(subject.data,idx) + } + subject.data + } + + subject.data <- validation.checks(subject.data, + idx=which(subject.data$has.event==1 & is.na(subject.data$event.type)), + warning.message="have an event and no event type, their event type is set to 'Had Event'", + fn=function(subject.data,idx){ + if(!"Had Event" %in% levels(subject.data$event.type)){ + levels(subject.data$event.type) <- c(levels(subject.data$event.type),"Had Event") + } + subject.data$event.type[idx] <- "Had Event" + return(subject.data) + }) + + subject.data <- validation.checks(subject.data, + idx=which(subject.data$has.event!=1 & !is.na(subject.data$event.type)), + warning.message="have an event type but did not have an event", + fn=function(subject.data,idx){ + subject.data$event.type[idx] <- factor(NA) + subject.data$event.type <- droplevels(subject.data$event.type) + return(subject.data) + }) + + subject.data <- validation.checks(subject.data, + idx=which(subject.data$has.event==1& subject.data$withdrawn==1), + warning.message="have an event and are withdrawn, assuming they have an event at the given time", + fn=function(subject.data,idx){ + subject.data$withdrawn[idx] <- 0 + return(subject.data) + }) + + #further validation occurs in the validity function of the class + return(new("EventData", subject.data = subject.data, followup=followup)) +} + + + +##' @name show +##' @rdname show-methods +##' @aliases show,EventData-method +##' @export +setMethod("show", + "EventData", + function(object) { + cat("EventData, use object@subject.data$param to access individual columns:\n") + cat(str(object@subject.data)) + }) + + +##' @name summary +##' @rdname summary-methods +##' @aliases summary,EventData-method +##' @export +setMethod("summary","EventData", + function(object){ + df <- object@subject.data + daysinyear <- standarddaysinyear() + + if(nrow(df)>0){ + + cat(paste("Number of subjects:",nrow(df),"\n")) + cat(paste("Number of events:",sum(df$has.event==1),"\n")) + + if(sum(df$has.event==1) != 0 && length(levels(df$event.type)) != 1){ + tab <- table(df$event.type) + lapply(names(tab),function(y){cat(paste(" Of type ",y,": ",tab[y],"\n",sep=""))}) + } + + cat(paste("Number of withdrawn:", sum(df$withdrawn==1),"\n")) + cat(paste("First subject randomized:",as.Date(min(df$rand.date),origin="1970-01-01"),"\n")) + cat(paste("Last subject randomized:",as.Date(max(df$rand.date),origin="1970-01-01"),"\n")) + + if(sum(object@subject.data$has.event)>0){ + cat(paste("First Event:",as.Date(min(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n")) + cat(paste("Last Event:",as.Date(max(LastDate(df[df$has.event==1,]),na.rm=TRUE),origin="1970-01-01"),"\n")) + } + + av <- average.rec(nrow(df),as.numeric(min(df$rand.date)),as.numeric(max(df$rand.date))) + cat(paste("Average recruitment (subjects/day):",roundForceOutputZeros(av,2),"\n")) + + if(!is.infinite(object@followup)){ + cat(paste("Subjects followed for ",round(object@followup), " days (", + round(object@followup/daysinyear,2)," years)\n",sep="")) + + cat(paste("Number of subjects censored at end of follow up period:", + sum(object@subject.data$censored.at.follow.up),"\n")) + + } + } + else{ + cat("Empty data frame!\n") + } +}) + + + +##' Fit a survival model to an EventData object +##' @rdname fit-methods +##' @name fit +##' @param object an EventData object +##' @param ... Additional arguments to the function +##' @param dist character, either weibull or loglogistic, which model +##' is to be fit +##' @return an EventModel object +##' @export +if (!isGeneric("fit")){ + setGeneric("fit", function(object,...) standardGeneric("fit")) +} + + + +##' @name fit +##' @rdname fit-methods +##' @aliases fit,EventData-method +##' @export +setMethod("fit","EventData",function(object,dist="weibull"){ + + if(!dist %in% c("weibull","loglogistic")){ + stop("dist must be weibull or loglogistic") + } + + if(nrow(object@subject.data)==0)stop("Empty data frame!") + if(sum(object@subject.data$has.event)==0){ + stop("Cannot fit a model to a dataset with no events") + } + + #subjects with time = 0 are set to NA for the model fitting + #so they are ignored + indat <- object@subject.data + indat$time <- ifelse(indat$time==0,NA,indat$time) + + model<-survreg(Surv(time, has.event) ~ 1, data=indat, dist=dist, y = TRUE) + + new("EventModel",model=model,event.data=object, + simParams=FromDataParam(object=model,type=dist)) +}) + + + +##' @rdname plot-methods +##' @name plot +##' @aliases plot,EventData,missing-method +##' @export +setMethod( "plot", + signature( x="EventData", y="missing" ), + function(x, xlab="log(t)", ylab="log(-log(S(t)))", main="", ...) { + + if(nrow(x@subject.data)==0)stop("Empty data frame!") + if(sum(x@subject.data$has.event)==0){ + stop("Cannot fit a model to a dataset with no events") + } + + model <- survfit(Surv(time, has.event) ~ 1, data=x@subject.data,...) + + res <- data.frame(t=model$time, s=model$surv) + res <- res[res$t>0 & res$s>0 & res$s<1,] + + df <- data.frame(x=log(res$t),y=log(-log(res$s))) + + p <- ggplot(df, aes_string(x="x", y="y")) + geom_point() + + stat_smooth(method="lm", se=FALSE, color="red", size=1 ) + + xlab(xlab) + ylab(ylab) + + ggtitle(main) + theme_bw() + + theme(panel.grid.minor = element_line(colour="gray", size=0.5)) + p + } +) + + + +##' Method to create new \code{EventData} object only including recruitment times +##' @param object The object from which to create the new \code{EventData} object +##' @rdname OnlyUseRecTimes-methods +##' @return An \code{EventData} object with time=0 and no events or withdrawn subjects +##' @name OnlyUseRecTimes +##' @export +setGeneric("OnlyUseRecTimes",function(object) standardGeneric("OnlyUseRecTimes")) + +##' @rdname OnlyUseRecTimes-methods +##' @name OnlyUseRecTimes +##' @aliases OnlyUseRecTimes,EventData-method +##' @export +setMethod("OnlyUseRecTimes","EventData",function(object){ + object@subject.data$withdrawn <- 0 + object@subject.data$has.event <- 0 + object@subject.data$censored.at.follow.up <- 0 + object@subject.data$time <- 0 + object@subject.data$event.type <- factor(NA) + object@subject.data$event.type <- droplevels(object@subject.data$event.type) + object +}) + + +##' Cut the Event Data +##' +##' Create a new EventData from an existing one which shows how the +##' data would have looked on a given date (assuming no lag in reporting of +##' events and if necessary subjects have been censored at cutoff point). +##' If subject was censored before the cut date then their censored date remains +##' unchanged +##' @param object The EventData object +##' @param date The date to cut the data at +##' @rdname CutData-methods +##' @name CutData +##' @return \code{EventData} object with the data censored at the appropriate point +##' @export +setGeneric("CutData",function(object,date) standardGeneric("CutData")) + + +##' @rdname CutData-methods +##' @name CutData +##' @aliases CutData,EventData-method +##' @export +setMethod("CutData","EventData",function(object,date){ + + cut.date <- FixDates(date) + + subject.data <- object@subject.data + + #only include subjects who have been randomized by this time + subject.data <- subject.data[subject.data$rand.date<=cut.date,] + if(nrow(subject.data)==0){ + stop("Cut date before first subject randomization") + } + + censored.times <- cut.date - subject.data$rand.date + 1 + idx <- which(censored.times < subject.data$time) + + if(length(idx)==0&&nrow(subject.data)==nrow(object@subject.data)){ + warning("Cut date is too late (no ongoing subjects at this time) and so has no effect") + } + + + + subject.data$time <- pmin(censored.times,subject.data$time) + subject.data$has.event[idx] <- 0 + subject.data$event.type[idx] <- factor(NA) + subject.data$event.type <- droplevels(subject.data$event.type) + subject.data$withdrawn[idx] <- 0 + + EventData( + data=subject.data, + subject="subject", + rand.date="rand.date", + has.event="has.event", + withdrawn="withdrawn", + time="time", + site="site", + event.type="event.type", + followup=object@followup + ) + +}) + +##' This function calculates the event type for pfs data +##' +##' This function is called before creating the EventData object +##' @param has.event A vector or whether subjects had event +##' @param prog.date A vector of dates (or strings in the standard eventPrediction allowed formats) +##' of progression.date or blank if unknown +##' @param dth.date A vector of dates (or strings in the standard eventPrediction allowed formats) +##' of death date or blank if unknown +##' @return A vector with NA if subject does not have event, +##' "Death" if dth.date <= prog.date or prog.date blank, +##' "Progression (not death)" if prog.date < dth.date or dth.date blank +##' "Progression (unknown if death)" if both prog.date and dth.date are blank +##' @export +CalculateProgEventTypes <- function(has.event,prog.date,dth.date){ + #called before creating EventDate object so extra validation needed + + if(any(!has.event %in% c(0,1))){ + stop("Invalid has.event argument") + } + + prog.date <- FixDates(prog.date) + dth.date <- FixDates(dth.date) + + if(!(length(prog.date)==length(dth.date) && length(prog.date) ==length(has.event))){ + stop("Invalid lengths of arguments") + } + + unlist(lapply(seq_along(has.event),function(x){ + if(!has.event[x]) return(NA) + + dth <- dth.date[x] + prog <- prog.date[x] + + if(!is.na(prog) && !is.na(dth)){ + if(dth<=prog) return("Death") + return("Progression (not death)") + } + + + #just dth return dth + if(!is.na(dth)) return("Death") + + #just prog return prog + if(!is.na(prog)) return("Progression (not death)") + + #Otherwise are relying on lastDate so do not know which + return("Progression (unknown if death)") + + })) + + +} + + +##' Create an \code{EventData} object with no subjects +##' +##' @param followup The time subjects are followed until censoring +##' @export +EmptyEventData <- function(followup=Inf){ + + if(length(followup) > 1 || !is.numeric(followup) || followup < 0){ + stop("Invalid followup argument") + } + + d <- data.frame(subject=character(0), + randDate=numeric(0), + has.event=numeric(0), + withdrawn=numeric(0), + time=numeric(0)) + + #convert to an empty EventData object + EventData(data=d, + subject="subject", + rand.date="randDate", + has.event="has.event", + withdrawn="withdrawn", + time="time", + followup=followup) +} + +##' Calculate the number of days at risk +##' +##' For \code{EventData} object this is the number of days +##' at risk of the input data. For \code{SingleSimDetails} object +##' this is the median numberof days at risk in the simulation set at the given time +##' +##' @param object The object to calculate the days at risk +##' @param ... Additional arguments passed to the function +##' @param times A vector of dates for calculating cutting the simulated date at in order to +##' calculate the number of days at risk by this point. +##' @name CalculateDaysAtRisk +##' @rdname CalculateDaysAtRisk-methods +##' @export +setGeneric("CalculateDaysAtRisk",function(object,...)standardGeneric("CalculateDaysAtRisk")) + + +##' @rdname CalculateDaysAtRisk-methods +##' @name CalculateDaysAtRisk +##' @aliases CalculateDaysAtRisk,EventData-method +##' @export +setMethod("CalculateDaysAtRisk","EventData", + function(object){ + return(sum(object@subject.data$time)) + } +) + +##' Makes a barplot with the number of events over time +##' +##' @param object The object to create the event versus time plot +##' @param ... Additional arguments passed to the function +##' @param timeunit The resoloution on the x-axis (month, weeks or quarter) +##' @name EventVsTimePlot +##' @rdname EventVsTimePlot-methods +##' @export +setGeneric("EventVsTimePlot",function(object,timeunit="Months",...)standardGeneric("EventVsTimePlot")) + + +##' @rdname EventVsTimePlot-methods +##' @name EventVsTimePlot +##' @aliases EventVsTimePlot,EventData-method +##' @export +setMethod("EventVsTimePlot","EventData", + function( object, timeunit="month" ){ + my.data <- object@subject.data + my.data$lastdate <- LastDate( my.data ) + my.data$has.event <- as.integer( my.data$has.event ) + my.data$mytimeunit <- as.Date( cut( my.data$lastdate, + breaks = timeunit ) ) + + mybreaks <- if( timeunit == "quarter" ) "3 months" else "1 month" + + ggplot( data = my.data, aes_string( x="mytimeunit", y="has.event" )) + + stat_summary( fun.y = sum, geom = "bar") + + scale_x_date( + date_labels = "%Y-%b", + date_breaks = mybreaks ) + + theme_bw() + + theme( axis.text.x = element_text(angle = 45, hjust = 1) ) + + xlab( "" ) + + ylab( "Observed events" ) + } +) +