biomkrAccrual / Git / [d9ee58] /R/prevalence.R

Models:
joseph-gordon/
biomkrAccrual
Downloads: 1
[d9ee58]: / R / prevalence.R
History
Download this file
471 lines (419 with data), 15.4 kB

#' A class for an object to contain the proportions of arriving patients to be
#' allocated to each arm of the trial.
#' 
#' @slot recruit_arm_prevalence Numeric vector of the expected proportion 
#' of patients eligible for each recruitment arm.
#' @slot recruit_arm_prevalence_start Numeric vector of the initial proportions
#' of patients eligible for each recruitment arm.
#' @slot recruit_arm_names Character vector of the names of the recruitment 
#' arms.
#' @slot shared_control TRUE if using a shared control arm for all 
#' experimental arms.
#' @slot ctrl_ratio Proportion of patients assigned to control
#' @slot treatment_arm_ids Named list of lists of recruitment arms by 
#' treatment arm.
#' @slot treatment_arm_ids_start Named list of lists of the initial 
#' configuration of recruitment arms by treatment arm.
#' @slot recruit_arm_id Automatically generated integer vector of the ID 
#' numbers of the recruitment arms.
#' @slot treatment_counts Automatically generated named integer vector of 
#' number of recruitment arms recruiting to each treatment arm.
#' @slot treatment_arm_struct Automatically generated logical matrix of 
#' treatment arms by recruitment arms.
#' @slot treatment_arm_struct_start Automatically generated logical matrix of 
#' the initial configuration of treatment arms by recruitment arms.
#' @slot experimental_arm_prevalence Automatically generated matrix of 
#' prevalences of treatment arms by recruitment arms
#' 
#' @param props_df Dataframe of expected biomarker prevalences for the 
#' regions, with one column `category` containing names for the 
#' biomarkers, and one column per region.
#' @param arms_ls List of lists of recruitment arms which recruit to
#' each treatment arm.
#' @param centres_df Dataframe containing columns `site`, the index
#' number of each site; `start_month`, the month in which that site
#' is expected to start recruiting; `mean_rate`, the expected number 
#' of patients from that site per month; `region`, the index of the
#' region the site is in (should be in the same order as the columns
#' in `props_df`); and an optional column `site_cap`, if there is a 
#' recruitment cap on any of the sites.
#' @param precision For the Dirichlet model of biomarker prevalences, 
#' variability decreases as precision increases. Defaults to 10.
#' @param shared_control TRUE if all experimental arms share one 
#' control arm; FALSE if they each have separate control arms.
#' @param ctrl_ratio Proportion of patients assigned to control
#' @param fixed_region_prevalences TRUE if biomarker prevalences 
#' should be considered to be identical for all sites within a 
#' region; FALSE if they should be drawn from a Dirichlet distribution
#' with a mean of the specified prevalence.
#' 
#' @name trial_structure
#' 
#' @import S7
#' 
trial_structure <- S7::new_class("trial_structure",
  package = "biomkrAccrual",
  properties = list(
    # These need explicitly setting
    recruit_arm_prevalence = S7::class_double,
    recruit_arm_prevalence_start = S7::class_double,
    recruit_arm_names = S7::class_character,
    shared_control = S7::class_logical,
    ctrl_ratio = S7::class_vector,
    treatment_arm_ids = S7::class_list,
    treatment_arm_ids_start = S7::class_list,
    # These are generated from existing properties at the time they execute
    recruit_arm_ids = S7::new_property(
      getter = function(self) seq_len(nrow(self@recruit_arm_prevalence))
    ),
    treatment_counts = S7::new_property(
      getter = function(self) length(self@treatment_arm_ids)
    ),
    # Will inherit class matrix despite S7 silliness
    treatment_arm_struct = S7::new_property(
      getter = function(self) {
        get_matrix_struct(self@treatment_arm_ids, self@recruit_arm_prevalence)
      }
    ),
    treatment_arm_struct_start = S7::new_property(
      getter = function(self) {
        get_matrix_struct(
          self@treatment_arm_ids_start, self@recruit_arm_prevalence_start
        )
      }
    ),
    # array[recruit arms, treat arms, prevalence set]
    experimental_arm_prevalence = S7::new_property(
      getter = 
        function(self) {
          get_array_prevalence(
            self@treatment_arm_struct, 
            self@recruit_arm_prevalence,
            self@shared_control,
            self@ctrl_ratio
          )
        }
    )
  ),
  # Make new instance by calling trial_structure(props_df, arms_ls)
  constructor = function(
    props_df = S7::class_missing, 
    arms_ls = S7::class_missing,
    centres_df = S7::class_missing,
    precision = S7::class_missing,
    shared_control = S7::class_missing,
    ctrl_ratio = S7::class_missing,
    fixed_region_prevalences = S7::class_missing
  ) {
    # Create the object and populate it
    S7::new_object(
      # Parent class
      S7::S7_object(),
      recruit_arm_names = props_df$category, 
      recruit_arm_prevalence = 
        get_recruit_arm_prevalence(
          props_df, centres_df, precision, fixed_region_prevalences
        ),
      recruit_arm_prevalence_start = 
        get_recruit_arm_prevalence(
          props_df, centres_df, precision, fixed_region_prevalences
        ),
      shared_control = shared_control,
      ctrl_ratio = ctrl_ratio,
      treatment_arm_ids = arms_ls,
      treatment_arm_ids_start = arms_ls
    )
  },
  validator = function(self) {
    if (!is.numeric(self@recruit_arm_prevalence[, -1])) {
      "Recruitment arm prevalences must be numbers"
    } else if (
      length(self@recruit_arm_names) != nrow(self@recruit_arm_prevalence)
    ) {
      "Different number of recruitment arm names supplied than prevalences"
    } else if (
      !(all(sapply(
        self@treatment_arm_ids, 
        function(v) is.integer(unlist(v)) || is.na(v)
      )))
    ) {
      "Elements from the treatment arm list should be integer vectors or NA"
    } 
  }

)


#' Dirichet regression model for biomarker prevalence
#' 
#' For each site, draws from the Dirichlet distribution using the
#' expected prevalences for the region. 
#' expected prevalences by region
#' 
#' @param props_df Dataframe with one row per biomarker.  Has one column 
#' `category`, containing names of the biomarkers, plus one column for 
#' each region, containing the expected biomarker prevalences for the regions.
#' @param centres_df Dataframe with one row per site, including one column
#' `region`, containing the index number for the region for each site.
#' Indices are assumed to be in the same order as the columns in `props_df`.
#' @param precision Variability decreases as precision increases.
#' @param fixed_region_prevalences TRUE if biomarker prevalences 
#' should be considered to be identical for all sites within a 
#' region; FALSE if they should be drawn from a Dirichlet distribution
#' with a mean of the specified prevalence.
#'   
#' @return Matrix of prevalences with one column per site and one 
#' row per biomarker; each column sums to 1.
#' 
#' @importFrom checkmate assert_names assert_data_frame assert_atomic_vector 
#' assert_integerish assert_numeric
#' 
get_recruit_arm_prevalence <- function(
  props_df, centres_df, precision, fixed_region_prevalences
) {

  # Check format of fixed_region_prevalences
  checkmate::assert_logical(
    fixed_region_prevalences, 
    len = 1, 
    any.missing = FALSE, 
    null.ok = FALSE
  )

  # If fixed_region_prevalences is FALSE, check contents of 
  # precision; otherwise should be NULL
  if (fixed_region_prevalences) {
    assert_null(precision)
  } else {
    # Check format and content of precision
    checkmate::assert_numeric(
      precision,
      lower = 10^-7,
      finite = TRUE,
      len = 1,
      any.missing = FALSE,
      null.ok = FALSE
    )
  }

  # Check format and content of centres_df

  checkmate::assert_data_frame(
    centres_df,
    types = "numeric",
    any.missing = FALSE,
    min.cols = 5,
    max.cols = 6,
    min.rows = 1,
    col.names = "named",
    null.ok = FALSE
  )

  checkmate::assert_names(
    names(centres_df),
    subset.of = c(
      "site", "start_month", "mean_rate", "region", "site_cap", "start_week"
    ),
    must.include = c(
      "site", "start_month", "mean_rate", "region", "start_week"
    )
  )

  sites_in_region <- centres_df$region

  checkmate::assert_atomic_vector(
    sites_in_region,
    any.missing = FALSE,
    min.len = 1,
    max.len = 10^4
  )

  checkmate::assert_integerish(
    sites_in_region,
    lower = 1,
    upper = 10^4,
    any.missing = FALSE,
    null.ok = FALSE
  )
  
  # Check format and content of props_df

  checkmate::assert_data_frame(
    props_df,
    types = c("numeric", "character"),
    any.missing = FALSE,
    # number of regions + "category"
    min.cols = max(sites_in_region) + 1,
    min.rows = 2,
    null.ok = FALSE
  )

  checkmate::assert_names(names(props_df), must.include = "category")


  # Any column that isn't "category" is assumed to be a region -
  # allows for named regions
  region_prevalence <- 
    props_df[, grep("^category$", names(props_df), invert = TRUE)]

  checkmate::assert_numeric(
    as.matrix(region_prevalence),
    lower = 0,
    upper = 1,
    finite = TRUE
  )

  if (fixed_region_prevalences) {
    # Use region prevalences unchanged
    recruit_arm_prevalence <- as.matrix(
      region_prevalence[, sites_in_region]
    )
    # Scale columns to sum to 1
    recruit_arm_prevalence <- sweep(
      recruit_arm_prevalence,
      2,
      colSums(recruit_arm_prevalence),
      FUN = "/"
    )

  } else {
    
    # Draw from Dirichlet distribution
    recruit_arm_prevalence <- do_dirichlet_draws(
      region_prevalence, sites_in_region, precision
    )
  }

  return(recruit_arm_prevalence)

}


#' Draws from dirichlet regression model for biomarker prevalences
#' to create the prevalence matrix.
#' 
#' @param region_prevalence Dataframe with one column for each 
#' region and one row for each biomarker, containing prevalences as 
#' probabilities.
#' @param sites_in_region Vector with a region index number for each
#' site, the index determined by the order of the columns in
#' `region_prevalence`.
#' @param precision Variability decreases as precision increases.
#'  
do_dirichlet_draws <- function(region_prevalence, sites_in_region, precision) {
  
  # Predeclare prevalence matrix
  recruit_arm_prevalence_mx <- matrix(
    data = NA, 
    ncol = length(sites_in_region),
    nrow = nrow(region_prevalence)
  )
  
  # Draw prevalences for sites in each region in turn
  for (region in unique(sites_in_region)) {

    # Sites in region
    site_indices <- which(sites_in_region == region)

    bio_prevalence <- rdirichlet_alt(
      n = length(site_indices),
      mu = region_prevalence[, region],
      phi = precision
    )
    # rdirichlet_alt produces the transpose of what we want,
    # for consistency with other implementations of rdirichlet
    recruit_arm_prevalence_mx[, site_indices] <- t(bio_prevalence)
  }

  return(recruit_arm_prevalence_mx)
}


#' Converts trial structure and prevalence information into matrix form
#' 
#' @param arms_ls List of lists of recruitment arms which recruit to
#' each treatment arm.
#' @param recruit_arm_prevalence Matrix of prevalences with one row per 
#' biomarker and one column per site; each column sums to 1.
#' 
#' @return Logical matrix with one row per biomarker and one column per
#' treatment arm; TRUE where the treatment recruits from that biomarker.
#' 
get_matrix_struct <- function(arms_ls, recruit_arm_prevalence) {

  # Predeclare matrix as no_biomarkers * no_treatments
  no_treats <- length(arms_ls)
  no_biomarkers <- nrow(recruit_arm_prevalence)
  
  # This one is logical, to avoid rounding errors
  arm_structure_mx <- 
    matrix(FALSE, max(unlist(arms_ls), no_biomarkers, na.rm = TRUE), no_treats)

  # Loop, changing to TRUE for arms including that treatment
  for (icol in seq_len(no_treats)) {
    
    if (any(!is.na(arms_ls[[icol]]))) {
      arm_structure_mx[unlist(arms_ls[[icol]]), icol] <- TRUE
    }
  }

  return(arm_structure_mx)
}


#' Make array with the prevalences by treatment arm, recruitment arm and 
#' prevalence set
#' @param arm_structure_mx Logical matrix of recruitment arms by treatment arm
#' @param recruit_arm_prevalence Data frame with sets of prevalences, in 
#' columns, for each arm (rows) 
#' @param shared_control TRUE if all treatment arms share the
#' same control arm; FALSE if each treatment arm has its own 
#' control. Defaults to TRUE.
#' @param ctrl_ratio Ratio of patients assigned to treatment versus control
#' 
#' @return arm_prevalence_ar Array of prevalences, recruitment arms * 
#' (treatment arms + control arms) * prevalence sets
#' 
get_array_prevalence <- function(
  arm_structure_mx, recruit_arm_prevalence, shared_control, ctrl_ratio
) {
  no_treatments <- ncol(arm_structure_mx)
  no_recruit_arms <- nrow(arm_structure_mx)
  no_regions <- ncol(recruit_arm_prevalence)

  scale_factor <- 
    ifelse(
      sapply(
        rowSums(arm_structure_mx), 
        function(x) isTRUE(all.equal(x, 0, tolerance = 1e-6))
      ),
      1, 
      rowSums(arm_structure_mx)
    )

  # List of matrices by centre 
  ## Total proportion recruited for centre on each open arm 
  ## - more efficient to fix later
  prev_ls <- lapply(
    seq(no_regions),
    function(i) recruit_arm_prevalence[, i] * arm_structure_mx / scale_factor
  )

  # Apply control configuration
  if (shared_control) {
    prev_ls <- lapply(
      prev_ls,
      function(mx) cbind(mx * ctrl_ratio[1], rowSums(mx) * ctrl_ratio[2])
    )
  } else {
    prev_ls <- lapply(
      prev_ls,
      function(mx) cbind(mx * ctrl_ratio[1], mx * ctrl_ratio[2])
    )
  }

  # Make into array
  arm_prevalence_ar <- array(
    data = do.call(cbind, prev_ls),
    dim = c(dim(prev_ls[[1]]), length(prev_ls))
  )
  
  return(arm_prevalence_ar)
}


#' Close off treatment arms.
#' In the list of treatment arm IDs, replaces the vector of recruitment
#' arm IDs with NA.
#' If any recruitment arms are closed, sets their prevalence to zero but 
#' does not recalculate prevalence vector, on the assumption that recruitment
#' from aites will fall because no patients with those characteristics will 
#' be recruited from that point.
#' Class object will automatically generate new trial structure and 
#' prevalence matrices.
#' 
#' @param structure_obj An object of class `trial_structure`.
#' @param arms Vector or scalar of integer arm ID numbers to remove
#' 
remove_treat_arms <- S7::new_generic("remove_treat_arms", "structure_obj")
S7::method(remove_treat_arms, trial_structure) <- function(
  structure_obj,
  arms
) {
  
  # Mark treatment arms as removed using NA; 
  # automatic getter for treatment_arm_struct does the rest
  structure_obj@treatment_arm_ids[arms] <- NA_integer_

  # Set prevalence to 0 for any recruitment arms which now have no
  # experimental arms to recruit to
  no_exp_arms <- rowSums(structure_obj@treatment_arm_struct) < 1
  structure_obj@recruit_arm_prevalence[no_exp_arms, ] <- 0

  return(structure_obj)
}


#' Check whether an object is of class "trial_structure".
#' 
#' @param x Object to test
#' 
#' @return TRUE or FALSE
#' 
#' @importFrom S7 new_generic method class_any
#' @export
#' 
is.trial_structure <- S7::new_generic("is.trial_structure", "x")
S7::method(is.trial_structure, S7::class_any) <- function(x) {
  inherits(x, "biomkrAccrual::trial_structure")
}