--- a +++ b/R/prevalence.R @@ -0,0 +1,470 @@ +#' A class for an object to contain the proportions of arriving patients to be +#' allocated to each arm of the trial. +#' +#' @slot recruit_arm_prevalence Numeric vector of the expected proportion +#' of patients eligible for each recruitment arm. +#' @slot recruit_arm_prevalence_start Numeric vector of the initial proportions +#' of patients eligible for each recruitment arm. +#' @slot recruit_arm_names Character vector of the names of the recruitment +#' arms. +#' @slot shared_control TRUE if using a shared control arm for all +#' experimental arms. +#' @slot ctrl_ratio Proportion of patients assigned to control +#' @slot treatment_arm_ids Named list of lists of recruitment arms by +#' treatment arm. +#' @slot treatment_arm_ids_start Named list of lists of the initial +#' configuration of recruitment arms by treatment arm. +#' @slot recruit_arm_id Automatically generated integer vector of the ID +#' numbers of the recruitment arms. +#' @slot treatment_counts Automatically generated named integer vector of +#' number of recruitment arms recruiting to each treatment arm. +#' @slot treatment_arm_struct Automatically generated logical matrix of +#' treatment arms by recruitment arms. +#' @slot treatment_arm_struct_start Automatically generated logical matrix of +#' the initial configuration of treatment arms by recruitment arms. +#' @slot experimental_arm_prevalence Automatically generated matrix of +#' prevalences of treatment arms by recruitment arms +#' +#' @param props_df Dataframe of expected biomarker prevalences for the +#' regions, with one column `category` containing names for the +#' biomarkers, and one column per region. +#' @param arms_ls List of lists of recruitment arms which recruit to +#' each treatment arm. +#' @param centres_df Dataframe containing columns `site`, the index +#' number of each site; `start_month`, the month in which that site +#' is expected to start recruiting; `mean_rate`, the expected number +#' of patients from that site per month; `region`, the index of the +#' region the site is in (should be in the same order as the columns +#' in `props_df`); and an optional column `site_cap`, if there is a +#' recruitment cap on any of the sites. +#' @param precision For the Dirichlet model of biomarker prevalences, +#' variability decreases as precision increases. Defaults to 10. +#' @param shared_control TRUE if all experimental arms share one +#' control arm; FALSE if they each have separate control arms. +#' @param ctrl_ratio Proportion of patients assigned to control +#' @param fixed_region_prevalences TRUE if biomarker prevalences +#' should be considered to be identical for all sites within a +#' region; FALSE if they should be drawn from a Dirichlet distribution +#' with a mean of the specified prevalence. +#' +#' @name trial_structure +#' +#' @import S7 +#' +trial_structure <- S7::new_class("trial_structure", + package = "biomkrAccrual", + properties = list( + # These need explicitly setting + recruit_arm_prevalence = S7::class_double, + recruit_arm_prevalence_start = S7::class_double, + recruit_arm_names = S7::class_character, + shared_control = S7::class_logical, + ctrl_ratio = S7::class_vector, + treatment_arm_ids = S7::class_list, + treatment_arm_ids_start = S7::class_list, + # These are generated from existing properties at the time they execute + recruit_arm_ids = S7::new_property( + getter = function(self) seq_len(nrow(self@recruit_arm_prevalence)) + ), + treatment_counts = S7::new_property( + getter = function(self) length(self@treatment_arm_ids) + ), + # Will inherit class matrix despite S7 silliness + treatment_arm_struct = S7::new_property( + getter = function(self) { + get_matrix_struct(self@treatment_arm_ids, self@recruit_arm_prevalence) + } + ), + treatment_arm_struct_start = S7::new_property( + getter = function(self) { + get_matrix_struct( + self@treatment_arm_ids_start, self@recruit_arm_prevalence_start + ) + } + ), + # array[recruit arms, treat arms, prevalence set] + experimental_arm_prevalence = S7::new_property( + getter = + function(self) { + get_array_prevalence( + self@treatment_arm_struct, + self@recruit_arm_prevalence, + self@shared_control, + self@ctrl_ratio + ) + } + ) + ), + # Make new instance by calling trial_structure(props_df, arms_ls) + constructor = function( + props_df = S7::class_missing, + arms_ls = S7::class_missing, + centres_df = S7::class_missing, + precision = S7::class_missing, + shared_control = S7::class_missing, + ctrl_ratio = S7::class_missing, + fixed_region_prevalences = S7::class_missing + ) { + # Create the object and populate it + S7::new_object( + # Parent class + S7::S7_object(), + recruit_arm_names = props_df$category, + recruit_arm_prevalence = + get_recruit_arm_prevalence( + props_df, centres_df, precision, fixed_region_prevalences + ), + recruit_arm_prevalence_start = + get_recruit_arm_prevalence( + props_df, centres_df, precision, fixed_region_prevalences + ), + shared_control = shared_control, + ctrl_ratio = ctrl_ratio, + treatment_arm_ids = arms_ls, + treatment_arm_ids_start = arms_ls + ) + }, + validator = function(self) { + if (!is.numeric(self@recruit_arm_prevalence[, -1])) { + "Recruitment arm prevalences must be numbers" + } else if ( + length(self@recruit_arm_names) != nrow(self@recruit_arm_prevalence) + ) { + "Different number of recruitment arm names supplied than prevalences" + } else if ( + !(all(sapply( + self@treatment_arm_ids, + function(v) is.integer(unlist(v)) || is.na(v) + ))) + ) { + "Elements from the treatment arm list should be integer vectors or NA" + } + } + +) + + +#' Dirichet regression model for biomarker prevalence +#' +#' For each site, draws from the Dirichlet distribution using the +#' expected prevalences for the region. +#' expected prevalences by region +#' +#' @param props_df Dataframe with one row per biomarker. Has one column +#' `category`, containing names of the biomarkers, plus one column for +#' each region, containing the expected biomarker prevalences for the regions. +#' @param centres_df Dataframe with one row per site, including one column +#' `region`, containing the index number for the region for each site. +#' Indices are assumed to be in the same order as the columns in `props_df`. +#' @param precision Variability decreases as precision increases. +#' @param fixed_region_prevalences TRUE if biomarker prevalences +#' should be considered to be identical for all sites within a +#' region; FALSE if they should be drawn from a Dirichlet distribution +#' with a mean of the specified prevalence. +#' +#' @return Matrix of prevalences with one column per site and one +#' row per biomarker; each column sums to 1. +#' +#' @importFrom checkmate assert_names assert_data_frame assert_atomic_vector +#' assert_integerish assert_numeric +#' +get_recruit_arm_prevalence <- function( + props_df, centres_df, precision, fixed_region_prevalences +) { + + # Check format of fixed_region_prevalences + checkmate::assert_logical( + fixed_region_prevalences, + len = 1, + any.missing = FALSE, + null.ok = FALSE + ) + + # If fixed_region_prevalences is FALSE, check contents of + # precision; otherwise should be NULL + if (fixed_region_prevalences) { + assert_null(precision) + } else { + # Check format and content of precision + checkmate::assert_numeric( + precision, + lower = 10^-7, + finite = TRUE, + len = 1, + any.missing = FALSE, + null.ok = FALSE + ) + } + + # Check format and content of centres_df + + checkmate::assert_data_frame( + centres_df, + types = "numeric", + any.missing = FALSE, + min.cols = 5, + max.cols = 6, + min.rows = 1, + col.names = "named", + null.ok = FALSE + ) + + checkmate::assert_names( + names(centres_df), + subset.of = c( + "site", "start_month", "mean_rate", "region", "site_cap", "start_week" + ), + must.include = c( + "site", "start_month", "mean_rate", "region", "start_week" + ) + ) + + sites_in_region <- centres_df$region + + checkmate::assert_atomic_vector( + sites_in_region, + any.missing = FALSE, + min.len = 1, + max.len = 10^4 + ) + + checkmate::assert_integerish( + sites_in_region, + lower = 1, + upper = 10^4, + any.missing = FALSE, + null.ok = FALSE + ) + + # Check format and content of props_df + + checkmate::assert_data_frame( + props_df, + types = c("numeric", "character"), + any.missing = FALSE, + # number of regions + "category" + min.cols = max(sites_in_region) + 1, + min.rows = 2, + null.ok = FALSE + ) + + checkmate::assert_names(names(props_df), must.include = "category") + + + # Any column that isn't "category" is assumed to be a region - + # allows for named regions + region_prevalence <- + props_df[, grep("^category$", names(props_df), invert = TRUE)] + + checkmate::assert_numeric( + as.matrix(region_prevalence), + lower = 0, + upper = 1, + finite = TRUE + ) + + if (fixed_region_prevalences) { + # Use region prevalences unchanged + recruit_arm_prevalence <- as.matrix( + region_prevalence[, sites_in_region] + ) + # Scale columns to sum to 1 + recruit_arm_prevalence <- sweep( + recruit_arm_prevalence, + 2, + colSums(recruit_arm_prevalence), + FUN = "/" + ) + + } else { + + # Draw from Dirichlet distribution + recruit_arm_prevalence <- do_dirichlet_draws( + region_prevalence, sites_in_region, precision + ) + } + + return(recruit_arm_prevalence) + +} + + +#' Draws from dirichlet regression model for biomarker prevalences +#' to create the prevalence matrix. +#' +#' @param region_prevalence Dataframe with one column for each +#' region and one row for each biomarker, containing prevalences as +#' probabilities. +#' @param sites_in_region Vector with a region index number for each +#' site, the index determined by the order of the columns in +#' `region_prevalence`. +#' @param precision Variability decreases as precision increases. +#' +do_dirichlet_draws <- function(region_prevalence, sites_in_region, precision) { + + # Predeclare prevalence matrix + recruit_arm_prevalence_mx <- matrix( + data = NA, + ncol = length(sites_in_region), + nrow = nrow(region_prevalence) + ) + + # Draw prevalences for sites in each region in turn + for (region in unique(sites_in_region)) { + + # Sites in region + site_indices <- which(sites_in_region == region) + + bio_prevalence <- rdirichlet_alt( + n = length(site_indices), + mu = region_prevalence[, region], + phi = precision + ) + # rdirichlet_alt produces the transpose of what we want, + # for consistency with other implementations of rdirichlet + recruit_arm_prevalence_mx[, site_indices] <- t(bio_prevalence) + } + + return(recruit_arm_prevalence_mx) +} + + +#' Converts trial structure and prevalence information into matrix form +#' +#' @param arms_ls List of lists of recruitment arms which recruit to +#' each treatment arm. +#' @param recruit_arm_prevalence Matrix of prevalences with one row per +#' biomarker and one column per site; each column sums to 1. +#' +#' @return Logical matrix with one row per biomarker and one column per +#' treatment arm; TRUE where the treatment recruits from that biomarker. +#' +get_matrix_struct <- function(arms_ls, recruit_arm_prevalence) { + + # Predeclare matrix as no_biomarkers * no_treatments + no_treats <- length(arms_ls) + no_biomarkers <- nrow(recruit_arm_prevalence) + + # This one is logical, to avoid rounding errors + arm_structure_mx <- + matrix(FALSE, max(unlist(arms_ls), no_biomarkers, na.rm = TRUE), no_treats) + + # Loop, changing to TRUE for arms including that treatment + for (icol in seq_len(no_treats)) { + + if (any(!is.na(arms_ls[[icol]]))) { + arm_structure_mx[unlist(arms_ls[[icol]]), icol] <- TRUE + } + } + + return(arm_structure_mx) +} + + +#' Make array with the prevalences by treatment arm, recruitment arm and +#' prevalence set +#' @param arm_structure_mx Logical matrix of recruitment arms by treatment arm +#' @param recruit_arm_prevalence Data frame with sets of prevalences, in +#' columns, for each arm (rows) +#' @param shared_control TRUE if all treatment arms share the +#' same control arm; FALSE if each treatment arm has its own +#' control. Defaults to TRUE. +#' @param ctrl_ratio Ratio of patients assigned to treatment versus control +#' +#' @return arm_prevalence_ar Array of prevalences, recruitment arms * +#' (treatment arms + control arms) * prevalence sets +#' +get_array_prevalence <- function( + arm_structure_mx, recruit_arm_prevalence, shared_control, ctrl_ratio +) { + no_treatments <- ncol(arm_structure_mx) + no_recruit_arms <- nrow(arm_structure_mx) + no_regions <- ncol(recruit_arm_prevalence) + + scale_factor <- + ifelse( + sapply( + rowSums(arm_structure_mx), + function(x) isTRUE(all.equal(x, 0, tolerance = 1e-6)) + ), + 1, + rowSums(arm_structure_mx) + ) + + # List of matrices by centre + ## Total proportion recruited for centre on each open arm + ## - more efficient to fix later + prev_ls <- lapply( + seq(no_regions), + function(i) recruit_arm_prevalence[, i] * arm_structure_mx / scale_factor + ) + + # Apply control configuration + if (shared_control) { + prev_ls <- lapply( + prev_ls, + function(mx) cbind(mx * ctrl_ratio[1], rowSums(mx) * ctrl_ratio[2]) + ) + } else { + prev_ls <- lapply( + prev_ls, + function(mx) cbind(mx * ctrl_ratio[1], mx * ctrl_ratio[2]) + ) + } + + # Make into array + arm_prevalence_ar <- array( + data = do.call(cbind, prev_ls), + dim = c(dim(prev_ls[[1]]), length(prev_ls)) + ) + + return(arm_prevalence_ar) +} + + +#' Close off treatment arms. +#' In the list of treatment arm IDs, replaces the vector of recruitment +#' arm IDs with NA. +#' If any recruitment arms are closed, sets their prevalence to zero but +#' does not recalculate prevalence vector, on the assumption that recruitment +#' from aites will fall because no patients with those characteristics will +#' be recruited from that point. +#' Class object will automatically generate new trial structure and +#' prevalence matrices. +#' +#' @param structure_obj An object of class `trial_structure`. +#' @param arms Vector or scalar of integer arm ID numbers to remove +#' +remove_treat_arms <- S7::new_generic("remove_treat_arms", "structure_obj") +S7::method(remove_treat_arms, trial_structure) <- function( + structure_obj, + arms +) { + + # Mark treatment arms as removed using NA; + # automatic getter for treatment_arm_struct does the rest + structure_obj@treatment_arm_ids[arms] <- NA_integer_ + + # Set prevalence to 0 for any recruitment arms which now have no + # experimental arms to recruit to + no_exp_arms <- rowSums(structure_obj@treatment_arm_struct) < 1 + structure_obj@recruit_arm_prevalence[no_exp_arms, ] <- 0 + + return(structure_obj) +} + + +#' Check whether an object is of class "trial_structure". +#' +#' @param x Object to test +#' +#' @return TRUE or FALSE +#' +#' @importFrom S7 new_generic method class_any +#' @export +#' +is.trial_structure <- S7::new_generic("is.trial_structure", "x") +S7::method(is.trial_structure, S7::class_any) <- function(x) { + inherits(x, "biomkrAccrual::trial_structure") +} +