Diff of /R/utils-batches.R [000000] .. [d9ee58]

Switch to unified view

a b/R/utils-batches.R
1
#' Run a number of batches of recruitment prediction and
2
#' collect summary statistics on arm closures, and final
3
#' recruitment totals for all experimental and control arms
4
#' @param n Number of instances to run (defaults to 100)
5
#' @param centres_file File containing recruitment centre data
6
#' (defaults to "centres.csv")
7
#' @param arms_file File containing list of which recruitment
8
#' arms recruit to which experimental arm
9
#' @param data_path Folder where `centres_file`, `prop_file` and
10
#' `arms_file` are located. Defaults to the location of the package
11
#' example data in the package installation; this should be changed. 
12
#' @param output_path Folder where data generated during execution
13
#' will be stored; defaults to `../biomkrAccrual_output_data/`.
14
#' @param figs_path Folder where figures generated during execution
15
#' will be stored; defaults to the `figures` subdirectory in
16
#' `output_path`.
17
#' @param accrual_period Maximum number of weeks to recruit 
18
#' (defaults to 80)
19
#' @param target_arm_size Maximum size for all experimental arms
20
#' (defaults to 308)
21
#' @param target_interim Recruitment target for experimental arms 
22
#' at interim analysis; defaults to `target_arm_size / 2`.
23
#' @param target_control Maximum size for all control arms
24
#' (defaults to 308)
25
#' @param target_interim_control Recruitment target for control arms 
26
#' at interim analysis; defaults to `target_control / 2`.
27
#' @param shared_control = TRUE,
28
#' @param fixed_centre_starts TRUE if centres are assumed to start
29
#' exactly when planned; FALSE if some randomisation should be added.
30
#' @param fixed_site_rates TRUE if centre recruitment rates should 
31
#' be treated as exact; FALSE if they should be drawn from a gamma
32
#' distribution with a mean of the specified rate.
33
#' @param fixed_region_prevalences TRUE if biomarker prevalences 
34
#' should be considered to be identical for all sites within a 
35
#' region; FALSE if they should be drawn from a Dirichlet distribution
36
#' with a mean of the specified prevalence.
37
#' @param quietly If TRUE, do not display information and plots from
38
#' individual runs within the batch. Defaults to TRUE.
39
#' @param keep_files If FALSE, do not save data or plots from individual
40
#' runs within the batch.  Defaults to FALSE.
41
#' @return Dataframe of site closing times
42
#' @return Dataframe of experimental arm totals
43
#' @export
44
#' 
45
#' @importFrom jsonlite read_json
46
#' @importFrom utils write.csv
47
#' 
48
biomkrAccrualSim <- function(
49
  n = 100,
50
  centres_file = "centres.csv",
51
  arms_file = "arms.json",
52
  data_path = "extdata/",
53
  output_path = "../biomkrAccrual_output_data/",
54
  figs_path = paste0(output_path, "figures/"),
55
  accrual_period = 50 / 4,
56
  interim_period = 25 / 4,
57
  precision = 10,
58
  # active : control ratio (all active the same)
59
  ctrl_ratio = c(1, 1),
60
  target_arm_size = 60,
61
  target_interim = target_arm_size / 2,
62
  target_control = 180,
63
  target_interim_control = target_control / 2,
64
  shared_control = TRUE,
65
  fixed_centre_starts = TRUE,
66
  fixed_site_rates = FALSE,
67
  fixed_region_prevalences = FALSE,
68
  quietly = TRUE,
69
  keep_files = FALSE
70
) {
71
  # Timestamp for batch files (but not individual run files)
72
  run_time <- format(Sys.time(), "%F-%H-%M-%S")
73
74
  # Information for setting up dataframe
75
  arms_ls <- 
76
    jsonlite::read_json(system.file(
77
      data_path, arms_file, package = "biomkrAccrual"
78
    ), simplifyVector = TRUE)
79
80
  # Define matrix of zeroes for efficiency
81
  arm_closures_mx <- structure(
82
    matrix(0, nrow = n, ncol = length(arms_ls)),
83
    class = c("armtotals", "matrix", "array")
84
  )
85
  arm_totals_mx <- structure(
86
    matrix(0, nrow = n, ncol = ifelse(
87
      shared_control,
88
      length(arms_ls) + 1,
89
      2 * length(arms_ls)
90
    )),
91
    class = c("armtotals", "matrix", "array")
92
  )
93
  arm_interim_mx <- structure(
94
    matrix(0, nrow = n, ncol = ifelse(
95
      shared_control,
96
      length(arms_ls) + 1,
97
      2 * length(arms_ls)
98
    )),
99
    class = c("armtotals", "matrix", "array")
100
  )
101
102
  # Set column names
103
  colnames(arm_closures_mx) <- names(arms_ls)
104
  if (shared_control) {
105
    colnames(arm_totals_mx) <- c(names(arms_ls), "Control")
106
  } else {
107
    colnames(arm_totals_mx) <- c(names(arms_ls), paste0("C-", names(arms_ls)))
108
  }
109
  colnames(arm_interim_mx) <- colnames(arm_totals_mx)
110
111
  # Run batches
112
  for (irun in seq(n)) {
113
    accrual_instance <- biomkrAccrual(
114
      centres_file = centres_file,
115
      arms_file = arms_file,
116
      data_path = data_path,
117
      accrual_period = accrual_period,
118
      interim_period = interim_period,
119
      precision = precision,
120
      ctrl_ratio = ctrl_ratio,
121
      target_arm_size = target_arm_size,
122
      target_interim = target_interim,
123
      target_control = target_control,
124
      shared_control = shared_control,
125
      fixed_centre_starts = fixed_centre_starts,
126
      fixed_site_rates = fixed_site_rates,
127
      fixed_region_prevalences = fixed_region_prevalences,
128
      quietly = TRUE,
129
      keep_files = FALSE
130
    )
131
132
    arm_closures_mx[irun, ] <- accrual_instance@phase_changes
133
    arm_totals_mx[irun, ] <- treat_sums(
134
      accrual_instance@accrual[
135
        seq_len(min(
136
          nrow(accrual_instance@accrual), accrual_instance@accrual_period
137
        )), ,
138
      ]
139
140
    )
141
    arm_interim_mx[irun, ] <- treat_sums(
142
      accrual_instance@accrual[
143
        seq(accrual_instance@interim_period), , 
144
      ]
145
    )
146
  }
147
148
  # Keep copies of output, stamped with datetime
149
  datetime <- format(Sys.time(), "%y-%m-%d_%H-%M-%S")
150
151
  write.csv(
152
    as.data.frame(arm_closures_mx),
153
    paste0(output_path, "arm_closures_", datetime, ".csv"),
154
    row.names = FALSE
155
  )
156
157
  write.csv(
158
    as.data.frame(arm_totals_mx),
159
    paste0(output_path, "arm_totals_", datetime, ".csv"),
160
    row.names = FALSE
161
  )
162
163
  write.csv(
164
    as.data.frame(arm_interim_mx),
165
    paste0(output_path, "arm_interim_totals_", datetime, ".csv"),
166
    row.names = FALSE
167
  )
168
169
  print(summary(as.data.frame(arm_closures_mx)))
170
  print(summary(as.data.frame(arm_totals_mx)))
171
  print(summary(as.data.frame(arm_interim_mx)))
172
173
  # Interim plot
174
175
  p <- plot(
176
    arm_interim_mx, 
177
    target = c(
178
      target_interim, target_interim_control, 
179
      target_arm_size, target_control
180
    ), 
181
    target_names = c(
182
      "Interim", "Interim\ncontrol", 
183
      "Accrual", "Accrual\ncontrol"
184
    ),
185
    target_week = interim_period
186
  )
187
188
  ggplot2::ggsave(
189
    paste0(figs_path, "arm-totals-interim-", run_time, ".png"),
190
    plot = p,
191
    width = 12,
192
    height = 8,
193
    dpi = 400
194
  )
195
196
  print(p)
197
198
  #
199
200
201
  # Individual accrual plots
202
  arm_names <- dimnames(arm_totals_mx)[[2]]
203
204
  ## Mark arms as treatment or control
205
  treatment_arms <- startsWith(arm_names, "T")
206
207
  ## Same colours as in interim plot
208
  col_order <- c(seq_len(length(treatment_arms))[-1], 1)
209
  arm_colours <- grDevices::palette.colors(length(treatment_arms))[col_order]
210
  
211
  
212
213
  # Total accrual plots
214
215
  data_ls <- list(
216
    Interim = as.data.frame(arm_interim_mx),
217
    Accrual = as.data.frame(arm_totals_mx)
218
  )
219
  target_ls <- list(
220
    Interim = c(target_interim, target_interim_control),
221
    Accrual = c(target_arm_size, target_control)
222
  )
223
224
  ## Loop across interim and total
225
  for (j in seq_len(length(data_ls))) {
226
    # Loop across all arms
227
    for (i in seq(treatment_arms)) {
228
      p <- accrual_arm_plot(
229
        data_ls[[j]],
230
        arm_colours,
231
        treatment_arms,
232
        target_ls[[j]],
233
        plot_id = names(data_ls)[j],
234
        i
235
      )
236
237
      ggplot2::ggsave(
238
        paste0(
239
          figs_path, 
240
          "arm-totals-",
241
          tolower(names(data_ls)[j]),
242
          "-",
243
          arm_names[treatment_arms][i], "-", 
244
          run_time, 
245
          ".png"
246
        ),
247
        plot = p,
248
        width = 12,
249
        height = 8,
250
        dpi = 400
251
      )
252
253
      print(p)
254
    }
255
  }
256
}
257
258
259
#' Format batch accrual data in long format for plotting.
260
#' 
261
#' @param data Matrix of accrual data.
262
#' 
263
matrix_to_long <- function(data) {
264
265
  arm_names <- dimnames(data)[[2]]
266
267
  data_df <- stats::reshape(
268
    as.data.frame(data),
269
    direction = "long",
270
    varying = arm_names,
271
    timevar = "Arm",
272
    times = arm_names,
273
    v.names = "Recruitment",
274
    idvar = "Run"
275
  )
276
277
  return(data_df)
278
}