Diff of /R/utils-batches.R [000000] .. [d9ee58]

Switch to side-by-side view

--- a
+++ b/R/utils-batches.R
@@ -0,0 +1,278 @@
+#' Run a number of batches of recruitment prediction and
+#' collect summary statistics on arm closures, and final
+#' recruitment totals for all experimental and control arms
+#' @param n Number of instances to run (defaults to 100)
+#' @param centres_file File containing recruitment centre data
+#' (defaults to "centres.csv")
+#' @param arms_file File containing list of which recruitment
+#' arms recruit to which experimental arm
+#' @param data_path Folder where `centres_file`, `prop_file` and
+#' `arms_file` are located. Defaults to the location of the package
+#' example data in the package installation; this should be changed. 
+#' @param output_path Folder where data generated during execution
+#' will be stored; defaults to `../biomkrAccrual_output_data/`.
+#' @param figs_path Folder where figures generated during execution
+#' will be stored; defaults to the `figures` subdirectory in
+#' `output_path`.
+#' @param accrual_period Maximum number of weeks to recruit 
+#' (defaults to 80)
+#' @param target_arm_size Maximum size for all experimental arms
+#' (defaults to 308)
+#' @param target_interim Recruitment target for experimental arms 
+#' at interim analysis; defaults to `target_arm_size / 2`.
+#' @param target_control Maximum size for all control arms
+#' (defaults to 308)
+#' @param target_interim_control Recruitment target for control arms 
+#' at interim analysis; defaults to `target_control / 2`.
+#' @param shared_control = TRUE,
+#' @param fixed_centre_starts TRUE if centres are assumed to start
+#' exactly when planned; FALSE if some randomisation should be added.
+#' @param fixed_site_rates TRUE if centre recruitment rates should 
+#' be treated as exact; FALSE if they should be drawn from a gamma
+#' distribution with a mean of the specified rate.
+#' @param fixed_region_prevalences TRUE if biomarker prevalences 
+#' should be considered to be identical for all sites within a 
+#' region; FALSE if they should be drawn from a Dirichlet distribution
+#' with a mean of the specified prevalence.
+#' @param quietly If TRUE, do not display information and plots from
+#' individual runs within the batch. Defaults to TRUE.
+#' @param keep_files If FALSE, do not save data or plots from individual
+#' runs within the batch.  Defaults to FALSE.
+#' @return Dataframe of site closing times
+#' @return Dataframe of experimental arm totals
+#' @export
+#' 
+#' @importFrom jsonlite read_json
+#' @importFrom utils write.csv
+#' 
+biomkrAccrualSim <- function(
+  n = 100,
+  centres_file = "centres.csv",
+  arms_file = "arms.json",
+  data_path = "extdata/",
+  output_path = "../biomkrAccrual_output_data/",
+  figs_path = paste0(output_path, "figures/"),
+  accrual_period = 50 / 4,
+  interim_period = 25 / 4,
+  precision = 10,
+  # active : control ratio (all active the same)
+  ctrl_ratio = c(1, 1),
+  target_arm_size = 60,
+  target_interim = target_arm_size / 2,
+  target_control = 180,
+  target_interim_control = target_control / 2,
+  shared_control = TRUE,
+  fixed_centre_starts = TRUE,
+  fixed_site_rates = FALSE,
+  fixed_region_prevalences = FALSE,
+  quietly = TRUE,
+  keep_files = FALSE
+) {
+  # Timestamp for batch files (but not individual run files)
+  run_time <- format(Sys.time(), "%F-%H-%M-%S")
+
+  # Information for setting up dataframe
+  arms_ls <- 
+    jsonlite::read_json(system.file(
+      data_path, arms_file, package = "biomkrAccrual"
+    ), simplifyVector = TRUE)
+
+  # Define matrix of zeroes for efficiency
+  arm_closures_mx <- structure(
+    matrix(0, nrow = n, ncol = length(arms_ls)),
+    class = c("armtotals", "matrix", "array")
+  )
+  arm_totals_mx <- structure(
+    matrix(0, nrow = n, ncol = ifelse(
+      shared_control,
+      length(arms_ls) + 1,
+      2 * length(arms_ls)
+    )),
+    class = c("armtotals", "matrix", "array")
+  )
+  arm_interim_mx <- structure(
+    matrix(0, nrow = n, ncol = ifelse(
+      shared_control,
+      length(arms_ls) + 1,
+      2 * length(arms_ls)
+    )),
+    class = c("armtotals", "matrix", "array")
+  )
+
+  # Set column names
+  colnames(arm_closures_mx) <- names(arms_ls)
+  if (shared_control) {
+    colnames(arm_totals_mx) <- c(names(arms_ls), "Control")
+  } else {
+    colnames(arm_totals_mx) <- c(names(arms_ls), paste0("C-", names(arms_ls)))
+  }
+  colnames(arm_interim_mx) <- colnames(arm_totals_mx)
+
+  # Run batches
+  for (irun in seq(n)) {
+    accrual_instance <- biomkrAccrual(
+      centres_file = centres_file,
+      arms_file = arms_file,
+      data_path = data_path,
+      accrual_period = accrual_period,
+      interim_period = interim_period,
+      precision = precision,
+      ctrl_ratio = ctrl_ratio,
+      target_arm_size = target_arm_size,
+      target_interim = target_interim,
+      target_control = target_control,
+      shared_control = shared_control,
+      fixed_centre_starts = fixed_centre_starts,
+      fixed_site_rates = fixed_site_rates,
+      fixed_region_prevalences = fixed_region_prevalences,
+      quietly = TRUE,
+      keep_files = FALSE
+    )
+
+    arm_closures_mx[irun, ] <- accrual_instance@phase_changes
+    arm_totals_mx[irun, ] <- treat_sums(
+      accrual_instance@accrual[
+        seq_len(min(
+          nrow(accrual_instance@accrual), accrual_instance@accrual_period
+        )), ,
+      ]
+
+    )
+    arm_interim_mx[irun, ] <- treat_sums(
+      accrual_instance@accrual[
+        seq(accrual_instance@interim_period), , 
+      ]
+    )
+  }
+
+  # Keep copies of output, stamped with datetime
+  datetime <- format(Sys.time(), "%y-%m-%d_%H-%M-%S")
+
+  write.csv(
+    as.data.frame(arm_closures_mx),
+    paste0(output_path, "arm_closures_", datetime, ".csv"),
+    row.names = FALSE
+  )
+
+  write.csv(
+    as.data.frame(arm_totals_mx),
+    paste0(output_path, "arm_totals_", datetime, ".csv"),
+    row.names = FALSE
+  )
+
+  write.csv(
+    as.data.frame(arm_interim_mx),
+    paste0(output_path, "arm_interim_totals_", datetime, ".csv"),
+    row.names = FALSE
+  )
+
+  print(summary(as.data.frame(arm_closures_mx)))
+  print(summary(as.data.frame(arm_totals_mx)))
+  print(summary(as.data.frame(arm_interim_mx)))
+
+  # Interim plot
+
+  p <- plot(
+    arm_interim_mx, 
+    target = c(
+      target_interim, target_interim_control, 
+      target_arm_size, target_control
+    ), 
+    target_names = c(
+      "Interim", "Interim\ncontrol", 
+      "Accrual", "Accrual\ncontrol"
+    ),
+    target_week = interim_period
+  )
+
+  ggplot2::ggsave(
+    paste0(figs_path, "arm-totals-interim-", run_time, ".png"),
+    plot = p,
+    width = 12,
+    height = 8,
+    dpi = 400
+  )
+
+  print(p)
+
+  #
+
+
+  # Individual accrual plots
+  arm_names <- dimnames(arm_totals_mx)[[2]]
+
+  ## Mark arms as treatment or control
+  treatment_arms <- startsWith(arm_names, "T")
+
+  ## Same colours as in interim plot
+  col_order <- c(seq_len(length(treatment_arms))[-1], 1)
+  arm_colours <- grDevices::palette.colors(length(treatment_arms))[col_order]
+  
+  
+
+  # Total accrual plots
+
+  data_ls <- list(
+    Interim = as.data.frame(arm_interim_mx),
+    Accrual = as.data.frame(arm_totals_mx)
+  )
+  target_ls <- list(
+    Interim = c(target_interim, target_interim_control),
+    Accrual = c(target_arm_size, target_control)
+  )
+
+  ## Loop across interim and total
+  for (j in seq_len(length(data_ls))) {
+    # Loop across all arms
+    for (i in seq(treatment_arms)) {
+      p <- accrual_arm_plot(
+        data_ls[[j]],
+        arm_colours,
+        treatment_arms,
+        target_ls[[j]],
+        plot_id = names(data_ls)[j],
+        i
+      )
+
+      ggplot2::ggsave(
+        paste0(
+          figs_path, 
+          "arm-totals-",
+          tolower(names(data_ls)[j]),
+          "-",
+          arm_names[treatment_arms][i], "-", 
+          run_time, 
+          ".png"
+        ),
+        plot = p,
+        width = 12,
+        height = 8,
+        dpi = 400
+      )
+
+      print(p)
+    }
+  }
+}
+
+
+#' Format batch accrual data in long format for plotting.
+#' 
+#' @param data Matrix of accrual data.
+#' 
+matrix_to_long <- function(data) {
+
+  arm_names <- dimnames(data)[[2]]
+
+  data_df <- stats::reshape(
+    as.data.frame(data),
+    direction = "long",
+    varying = arm_names,
+    timevar = "Arm",
+    times = arm_names,
+    v.names = "Recruitment",
+    idvar = "Run"
+  )
+
+  return(data_df)
+}
\ No newline at end of file