Diff of /R/fromDataInternal.R [000000] .. [f2e496]

Switch to side-by-side view

--- a
+++ b/R/fromDataInternal.R
@@ -0,0 +1,421 @@
+#This file contains the private functions
+#for the predict from data part of the package
+#apart from those used within the simulate function
+#see fromDataSimInternal.R. For functions called by 
+#AddTimeColumn see timeInternal.R
+
+##'@include common.R simQOutput.R timeInternal.R
+NULL
+
+
+# Deal with subjects who have time = 0 or NA when creating 
+# the EventData object
+# 
+# @param remove.0.time logical if TRUE then all subjects with time = NA or 0 are removed from the
+# data set and not included in the object. If FALSE then they are included in the simulation 
+# (but not in the model fitting)
+# @param subject.data A data frame to be included in the subject.data slot 
+# of the EventData object
+# @return subject.data with appropriate subjects removed/time set to 0
+warnNAtime <- function(remove.0.time,subject.data){
+  
+  if(!is.logical(remove.0.time) || length(remove.0.time) > 1){
+    stop("Invalid remove.0.time argument")
+  }
+  
+  index <- which(subject.data$time==0 | is.na(subject.data$time))
+  if(length(index)==0){
+    return(subject.data)
+  }
+  
+  
+  if(remove.0.time){
+    warning(paste("Subjects",paste(subject.data[index,]$subject,collapse=", "),"have time=NA or 0",
+                  "and have been removed from the data set."))
+    subject.data <- subject.data[!1:nrow(subject.data) %in% index,] 
+  }
+  else{
+    warning(paste("Subjects", paste(subject.data[index,]$subject, collapse=", "),
+                  "have time=NA or 0, their time has been set to 0 and",
+                  "they will not be used when fitting the model but will have event times simulated",
+                  "when performing the predictions unless withdrawn is set to 1."))
+    subject.data$time[is.na(subject.data$time)] <- 0
+  }
+  
+  subject.data
+}
+
+# Get the multiplicative factor to convert units to
+# days for the KM-plot
+# @param units "Days", "Months" or "Years"
+# @param daysinyear The number of days in a year (e.g. 365 or 365.25)
+# @return 1, daysinyear/12 or daysinyear respectively
+GetScaleForKM <- function(units,daysinyear){
+  if(!units %in% c("Days","Months","Years")){
+    stop("Invalid units argument")
+  }
+  
+  xscale <- 1
+  if(units == "Months") xscale <- daysinyear/12
+  if(units == "Years") xscale <- daysinyear
+  
+  return(xscale)
+}
+
+# Function which inputs a vector of dates/strings
+# and outputs a vector of dates
+# 
+# If S3 dates are input they are returned unchanged.
+# If character strings are input then they must all
+# have the same format either "YYYY-MM-DD", "DD/MM/YYYY"
+# or "DD Month YYYY"
+# 
+# @param vec The vector of characters
+# @return a vector of Dates.
+# If conversion fails an error is thrown
+FixDates <- function(vec){
+     
+   if(all(is.na(vec))) return(as.Date(vec))
+   if(class(vec)=="Date") return(vec)
+   
+   vec[vec==""] <- NA
+   formats <- c("%d/%m/%Y","%d %b %Y","%Y-%m-%d")
+       
+   ans <- lapply(formats,function(x){
+     as.Date(vec,format=x)
+   })
+   
+   which_date <- unlist(lapply(1:3,function(x){
+     sum(is.na(ans[[x]]))==sum(is.na(vec))
+   }))
+   
+   if(sum(which_date)!= 1){
+     stop("Error reading date format it should be of the form
+        YYYY-MM-DD, DD/MM/YYYY or DD Month YYYY ") 
+   }
+   
+  ans <- ans[[which(which_date,TRUE)]]
+  years <- as.character((ans),format="%Y") 
+  years <- years[!is.na(years)]
+   
+  if(any(as.numeric(years) < 1000) ){
+    stop("All years must have 4 digits, e.g. 01/01/2015 not 01/01/15")
+  } 
+   
+  return(ans) 
+}
+
+
+# Set the censored.at.follow.up column for EventData@@subject.data
+# @param subject.data An EventData@@subject.data data frame without the
+# censored.at.follow.up column
+# @param followup The follow period (in days)
+# @return subject.data with additional censored.at.follow.up column
+# Warnings will be output if dates have to be changed due to invalid data 
+# for example if event occurs after followup period 
+IncludeFollowUp <- function(subject.data,followup){
+  
+  subject.data$censored.at.follow.up <- as.numeric(subject.data$time > followup)
+  subject.data$time <-ifelse(subject.data$censored.at.follow.up==1,followup,subject.data$time)
+  
+  
+  resetToFollowUp <- function(data,idx,warning.msg){
+    if(length(idx)>0){
+      data$withdrawn[idx] <- 0
+      data$has.event[idx] <- 0
+      data$event.type[idx] <- NA
+      warning(paste("Subjects",paste(data[idx,"subject"],collapse=", "),warning.msg))
+    }
+    data
+  }
+  
+  subject.data <- resetToFollowUp(data=subject.data,
+                   idx=which(subject.data$withdrawn==1 & subject.data$censored.at.follow.up==1),
+                   warning.msg="have withdrawn dates after followup period and so have been censored at followup")
+  
+  resetToFollowUp(data=subject.data,
+                 idx=which(subject.data$has.event==1 & subject.data$censored.at.follow.up==1),
+                 warning.msg="had event after followup period and so have been censored at followup instead of having an event")
+
+}
+
+
+# Create an empty data frame for 
+# event.pred.data or time.pred.data
+# slots of the \code{WeibullResults} class
+# @return This empty data frame
+EmptyPredictionDF <- function(){
+  
+  data.frame(time=as.Date(character()),
+                 event=numeric(),
+                 CI_low=numeric(),
+                 CI_high=numeric(),
+                 daysatrisk=numeric())
+}
+
+
+# Create the text for summarizing a \code{FromDataResults object}
+# @param object A \code{FromDataResults} object
+# @param round.method See \code{eventPrediction:::myRound.Date}
+# @param text.width numeric The width of the text to be returned. See \code{base::strwrap}
+# @param show.predictions Logical if TRUE then include the time.pred.data and
+# event.pred.data information in the text 
+# @param show.at.risk Output the median number of at risk years
+# @return A string (which can be output using cat)
+getFromDataResultsSummaryText <- function(object,round.method="None",text.width=60,show.predictions=TRUE,show.at.risk=TRUE){
+  
+  daysinyear <- standarddaysinyear()
+  
+  data <- object@event.data
+  indat <- data@subject.data
+  indat$event.date <- ifelse(indat$has.event, LastDate(indat),NA)
+  
+  N.subjects <- length(indat$rand.date) #subjects in data set
+  
+  title <- ""
+  if(N.subjects!=0){
+    last.date <- if(any(!is.na(indat$event.date))) as.Date(max(indat$event.date, na.rm=TRUE),origin="1970-01-01")
+                 else "NA"
+    title <- paste0(N.subjects, " patients recruited where last patient in on ", max(indat$rand.date))
+    
+    if(class(last.date)=="Date"){
+      title <- paste0(title, " and last event observed at ", last.date,
+              " (",sum( indat$has.event, na.rm=TRUE), " events). ")
+    }
+    else{
+      title <- paste0(title," and no events observed. ")
+    }
+  }
+    
+    
+  if(object@Naccrual > 0){
+      title <- paste0(title, 
+                      " Out of ", N.subjects+object@Naccrual, " patients ",  object@Naccrual, 
+                      " were simulated using ", object@accrualGenerator@text," ")
+  }
+  
+  
+  title <- paste(title, "Using ", object@simParams@type ," survival model. ",sep="")
+  
+  NA.note <- ""
+  
+  if(nrow(object@event.pred.data)!=0 && show.predictions){
+    df <- object@event.pred.data 
+    ans <- lapply(1:nrow(df),function(x){
+      target <- myRound.Date(c(df[x,"CI_low"],df[x,"time"],df[x,"CI_high"]),round.method)
+      
+      retVal <- paste0("The time at which ", df[x,"event"]," events have occurred is predicted to be ",
+             target[2], " [", target[1], ", ", target[3], "]" )
+    
+      if(show.at.risk){
+        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk at this time",sep="")
+      }  
+      paste(retVal,". ",sep="")
+    }) 
+  
+    if(any(object@event.pred.data$time==as.Date(Inf,origin="1970-01-01"))){
+      NA.note <- paste("An expected time of NA implies that,",
+                       "in fewer than 50% of simulations the given number of events occurred.")
+    }else if(any(object@event.pred.data$CI_high==as.Date(Inf,origin="1970-01-01"))){
+      NA.note <- paste("A CI value of NA implies that, due to subject dropout, ",
+                       "in fewer than ",100*(1-object@limit),
+                        "% of the simulations the given number of events occurred.", sep="")
+    }
+    
+    title <- paste(title,paste(ans,collapse=""),sep="")
+  }
+  
+  if(nrow(object@time.pred.data)!=0 && show.predictions){
+    df <- object@time.pred.data 
+    ans <- lapply(1:nrow(df),function(x){
+      retVal <- paste0("On ", df[x,"time"]," the predicted number of events is ",
+             df[x,"event"], " [", df[x,"CI_low"], ", ", df[x,"CI_high"], "]" )
+    
+      if(show.at.risk){
+        retVal <- paste(retVal," with an expected ",round(df[x,"daysatrisk"]/daysinyear,2)," years of subjects at risk",sep="")
+      }  
+      paste(retVal,". ",sep="")  
+    }) 
+    
+    title <- paste(title,paste(ans,collapse=""),sep="")
+  }
+  
+  title <- paste(title,NA.note,sep="")
+  
+  return(AddLineBreaks(title,text.width=text.width))
+}
+
+
+# Round dates
+# 
+# @param mydates a vector of 3 dates, the
+# lower CI value, the median and the upper CI value
+# @param round.method character, if "toMonths" then 
+# the following procedure is applied:
+# lower CI: take the month of the date 15 days earlier \cr
+# median: take the month of the given date \cr
+# upper CI: take the month of the date 15 days later \cr 
+# otherwise the dates are unchanged
+# @return The rounded dates
+myRound.Date <- function(mydates,round.method){
+ 
+  if(round.method=="toMonths"){
+     c(format( mydates[1] - 15, format="%b %Y" ),
+       format( mydates[2], format="%b %Y"),
+       format( mydates[3] + 15, format="%b %Y" ))
+  }
+  else{
+    mydates
+  }
+}
+
+
+# Calculate the last date on study for subjects
+# 
+# Code: \code{ifelse(time==0,rand.date,time+rand.date-1)}
+# 
+# @param time A vector of times on study
+# @param rand.date A vector of randomization dates
+# @return A vector of dates
+internal.Date <- function(time,rand.date){
+   as.Date(ifelse(time==0,rand.date,time+rand.date-1),origin="1970-01-01")
+}
+
+# Calculate the last date on study for subjects
+#
+# The last date is the date of censoring or date of withdrawal
+# or date of event for subjects
+# @param indat  A data frame e.g. EventData@@subject.data
+# @return A vector of dates
+LastDate <- function(indat){
+  internal.Date(indat$time,indat$rand.date)
+}
+
+
+
+# Create a data frame which contains only subjects
+# who have been censored before a given date
+# @param data A data frame e.g. EventData@@subject.data
+# @param censor.date The given censored cutoff date
+# @return The subset of \code{data} which is censored (i.e. does not
+# have an event and is not withdrawn) before the given date
+GetLaggedSubjects <- function(data,censor.date){
+  data <- data[LastDate(data) < censor.date,]
+  data[data$has.event==0 & data$withdrawn==0 & data$censored.at.follow.up==0,]
+}
+
+
+
+# Function to derive the time on study for subjects given 
+# columns such as dth.date, event.date etc.
+# 
+# See the vignette for the logic used in calculating the time. 
+# Note the changing of times so they are <= followup period is
+# carried out in a subsequent function
+# 
+# see timeInternal.R
+#
+# @param data A data frame 
+# @param rand.date string, the column name of randomization dates
+# @param has.event string, the column name of whether a subject had an event
+# (1) or not (0) 
+# @param withdrawn string, the column name of whether a subject has withdrawn
+# (1) or not (0)  
+# @param subject, the column name of the subject ID
+# @param time.list A list of column names used to calculate the time on study for example
+# \code{time=list(last.date="lastDate",dth.date="dthDate", prog.date="progDate",
+# withdrawn.date="withdrawnDate")}
+# @return A vector containing time on study
+AddTimeColumn <- function(data,rand.date,has.event,withdrawn,subject,time.list){
+  
+  allowed.colnames <- c("dth.date","event.date","prog.date","withdrawn.date","last.date")
+  validate.time.list.arguments(data,rand.date,has.event,withdrawn,time.list,allowed.colnames)
+  
+  #create the date objects
+  for(x in allowed.colnames){
+    if(!is.null(time.list[[x]])) data[,x] <- FixDates(data[,time.list[[x]]]) 
+  }
+  
+  #check prog is <= dth if they exist
+  if("dth.date" %in% names(time.list) && "prog.date" %in% names(time.list)){
+    idx <- which(!is.na(data[,"dth.date"]) & 
+                 !is.na(data[,"prog.date"]) & 
+                   data[,"dth.date"] < data[,"prog.date"])
+    if(length(idx)>0){
+      warning(paste("Subjects",paste(data[idx,subject] ,collapse=", ")  ,
+                    "have progression date after death date. This is invalid and should be fixed"))
+    }
+  }
+  
+  #This vector will be populated with subjects time on study and returned 
+  ans <- rep(NA,nrow(data))
+  
+  #first deal with subjects who are censored
+  are.censored <- which(!data[,has.event] & !data[,withdrawn])
+  ans <- Time.Deal.With.Censored(ans,data,rand.date,are.censored,time.list,allowed.colnames[1:4],subject)
+  
+  
+  #next deal with subjects who have event
+  subs.had.event <- which(data[,has.event]==1)
+  ans <- Time.Deal.With.Had.Event(ans,data,rand.date,subs.had.event,time.list,allowed.colnames[1:3],subject,has.event)
+  
+  #finally subjects who have withdrawn
+  has.withdrawn <- which(data[,withdrawn]==1 & data[,has.event]==0)
+  ans <- Time.Deal.With.Withdrawn(ans,data,rand.date,has.withdrawn,time.list,withdrawn,subject,has.event)
+  
+    
+  #output warning if subject has withdranw date but have not been withdrawn
+  if(!is.null(time.list[["withdrawn.date"]])){
+    r <- which(any(!is.na(data[,"withdrawn.date"]) & data[,withdrawn]==0))
+    if(any(r)){
+      warning(paste("Subjects",paste(data[r,subject],collapse=", "), "have not withdrawn yet have a withdrawn date.",
+              "The withdrawn dates are ignored"))
+    }
+  }
+
+  #if we have not been able to calculate a time then output
+  #an error if had event or warning if censored/withdrawn
+  if(any(is.na(ans))){
+    r <- which(is.na(ans))
+    if(any(data[r,has.event]==1)){
+      s <- intersect(which(data[,has.event]==1), r)
+      stop(paste("Subjects",paste(data[s,subject],collapse=", "), "have an event but no time on study can be calculated.",
+                 "Is there any missing data in your data set?"))
+    }
+    
+    warning(paste("Subjects",paste(data[r,subject],collapse=", "),
+                  "are censored/withdrawn subject(s) and no time on study can be calculated.",
+                  "For these subjects the time on study is set to 0. Is there any missing data",
+                  "in your data set?"))
+  }  
+  
+  return(as.numeric(ans))
+      
+} 
+
+
+# Maximum Likelihood estimate of uniform
+# accrual parameter k
+# 
+# @param B recruitment period
+# @param s.i a vector of recruitment
+# times (numeric, time since start of recruitment period)
+# @return estimate of k = 1/(logB- mean(log(s.i)))
+MLestimateK <- function(B,s.i){
+  if(any(s.i<=0) || B <= 0 || any(s.i > B)) stop("Invalid arguments in MLestimateK")
+  
+  s <- mean(log(s.i))
+  return(1/(log(B)-s))
+}
+
+
+# Calculate the average recruitment rate (subjects/day)
+# @param N number of subjects recruited
+# @param first.date the date of the first recruitment (numeric)
+# @param last.date the date of the last recruitment (numeric)
+# @return The average recruitment
+average.rec <- function(N,first.date,last.date){ 
+  N/(last.date-first.date+1)
+}
+
+