Diff of /R/prevalence.R [000000] .. [d9ee58]

Switch to unified view

a b/R/prevalence.R
1
#' A class for an object to contain the proportions of arriving patients to be
2
#' allocated to each arm of the trial.
3
#' 
4
#' @slot recruit_arm_prevalence Numeric vector of the expected proportion 
5
#' of patients eligible for each recruitment arm.
6
#' @slot recruit_arm_prevalence_start Numeric vector of the initial proportions
7
#' of patients eligible for each recruitment arm.
8
#' @slot recruit_arm_names Character vector of the names of the recruitment 
9
#' arms.
10
#' @slot shared_control TRUE if using a shared control arm for all 
11
#' experimental arms.
12
#' @slot ctrl_ratio Proportion of patients assigned to control
13
#' @slot treatment_arm_ids Named list of lists of recruitment arms by 
14
#' treatment arm.
15
#' @slot treatment_arm_ids_start Named list of lists of the initial 
16
#' configuration of recruitment arms by treatment arm.
17
#' @slot recruit_arm_id Automatically generated integer vector of the ID 
18
#' numbers of the recruitment arms.
19
#' @slot treatment_counts Automatically generated named integer vector of 
20
#' number of recruitment arms recruiting to each treatment arm.
21
#' @slot treatment_arm_struct Automatically generated logical matrix of 
22
#' treatment arms by recruitment arms.
23
#' @slot treatment_arm_struct_start Automatically generated logical matrix of 
24
#' the initial configuration of treatment arms by recruitment arms.
25
#' @slot experimental_arm_prevalence Automatically generated matrix of 
26
#' prevalences of treatment arms by recruitment arms
27
#' 
28
#' @param props_df Dataframe of expected biomarker prevalences for the 
29
#' regions, with one column `category` containing names for the 
30
#' biomarkers, and one column per region.
31
#' @param arms_ls List of lists of recruitment arms which recruit to
32
#' each treatment arm.
33
#' @param centres_df Dataframe containing columns `site`, the index
34
#' number of each site; `start_month`, the month in which that site
35
#' is expected to start recruiting; `mean_rate`, the expected number 
36
#' of patients from that site per month; `region`, the index of the
37
#' region the site is in (should be in the same order as the columns
38
#' in `props_df`); and an optional column `site_cap`, if there is a 
39
#' recruitment cap on any of the sites.
40
#' @param precision For the Dirichlet model of biomarker prevalences, 
41
#' variability decreases as precision increases. Defaults to 10.
42
#' @param shared_control TRUE if all experimental arms share one 
43
#' control arm; FALSE if they each have separate control arms.
44
#' @param ctrl_ratio Proportion of patients assigned to control
45
#' @param fixed_region_prevalences TRUE if biomarker prevalences 
46
#' should be considered to be identical for all sites within a 
47
#' region; FALSE if they should be drawn from a Dirichlet distribution
48
#' with a mean of the specified prevalence.
49
#' 
50
#' @name trial_structure
51
#' 
52
#' @import S7
53
#' 
54
trial_structure <- S7::new_class("trial_structure",
55
  package = "biomkrAccrual",
56
  properties = list(
57
    # These need explicitly setting
58
    recruit_arm_prevalence = S7::class_double,
59
    recruit_arm_prevalence_start = S7::class_double,
60
    recruit_arm_names = S7::class_character,
61
    shared_control = S7::class_logical,
62
    ctrl_ratio = S7::class_vector,
63
    treatment_arm_ids = S7::class_list,
64
    treatment_arm_ids_start = S7::class_list,
65
    # These are generated from existing properties at the time they execute
66
    recruit_arm_ids = S7::new_property(
67
      getter = function(self) seq_len(nrow(self@recruit_arm_prevalence))
68
    ),
69
    treatment_counts = S7::new_property(
70
      getter = function(self) length(self@treatment_arm_ids)
71
    ),
72
    # Will inherit class matrix despite S7 silliness
73
    treatment_arm_struct = S7::new_property(
74
      getter = function(self) {
75
        get_matrix_struct(self@treatment_arm_ids, self@recruit_arm_prevalence)
76
      }
77
    ),
78
    treatment_arm_struct_start = S7::new_property(
79
      getter = function(self) {
80
        get_matrix_struct(
81
          self@treatment_arm_ids_start, self@recruit_arm_prevalence_start
82
        )
83
      }
84
    ),
85
    # array[recruit arms, treat arms, prevalence set]
86
    experimental_arm_prevalence = S7::new_property(
87
      getter = 
88
        function(self) {
89
          get_array_prevalence(
90
            self@treatment_arm_struct, 
91
            self@recruit_arm_prevalence,
92
            self@shared_control,
93
            self@ctrl_ratio
94
          )
95
        }
96
    )
97
  ),
98
  # Make new instance by calling trial_structure(props_df, arms_ls)
99
  constructor = function(
100
    props_df = S7::class_missing, 
101
    arms_ls = S7::class_missing,
102
    centres_df = S7::class_missing,
103
    precision = S7::class_missing,
104
    shared_control = S7::class_missing,
105
    ctrl_ratio = S7::class_missing,
106
    fixed_region_prevalences = S7::class_missing
107
  ) {
108
    # Create the object and populate it
109
    S7::new_object(
110
      # Parent class
111
      S7::S7_object(),
112
      recruit_arm_names = props_df$category, 
113
      recruit_arm_prevalence = 
114
        get_recruit_arm_prevalence(
115
          props_df, centres_df, precision, fixed_region_prevalences
116
        ),
117
      recruit_arm_prevalence_start = 
118
        get_recruit_arm_prevalence(
119
          props_df, centres_df, precision, fixed_region_prevalences
120
        ),
121
      shared_control = shared_control,
122
      ctrl_ratio = ctrl_ratio,
123
      treatment_arm_ids = arms_ls,
124
      treatment_arm_ids_start = arms_ls
125
    )
126
  },
127
  validator = function(self) {
128
    if (!is.numeric(self@recruit_arm_prevalence[, -1])) {
129
      "Recruitment arm prevalences must be numbers"
130
    } else if (
131
      length(self@recruit_arm_names) != nrow(self@recruit_arm_prevalence)
132
    ) {
133
      "Different number of recruitment arm names supplied than prevalences"
134
    } else if (
135
      !(all(sapply(
136
        self@treatment_arm_ids, 
137
        function(v) is.integer(unlist(v)) || is.na(v)
138
      )))
139
    ) {
140
      "Elements from the treatment arm list should be integer vectors or NA"
141
    } 
142
  }
143
144
)
145
146
147
#' Dirichet regression model for biomarker prevalence
148
#' 
149
#' For each site, draws from the Dirichlet distribution using the
150
#' expected prevalences for the region. 
151
#' expected prevalences by region
152
#' 
153
#' @param props_df Dataframe with one row per biomarker.  Has one column 
154
#' `category`, containing names of the biomarkers, plus one column for 
155
#' each region, containing the expected biomarker prevalences for the regions.
156
#' @param centres_df Dataframe with one row per site, including one column
157
#' `region`, containing the index number for the region for each site.
158
#' Indices are assumed to be in the same order as the columns in `props_df`.
159
#' @param precision Variability decreases as precision increases.
160
#' @param fixed_region_prevalences TRUE if biomarker prevalences 
161
#' should be considered to be identical for all sites within a 
162
#' region; FALSE if they should be drawn from a Dirichlet distribution
163
#' with a mean of the specified prevalence.
164
#'   
165
#' @return Matrix of prevalences with one column per site and one 
166
#' row per biomarker; each column sums to 1.
167
#' 
168
#' @importFrom checkmate assert_names assert_data_frame assert_atomic_vector 
169
#' assert_integerish assert_numeric
170
#' 
171
get_recruit_arm_prevalence <- function(
172
  props_df, centres_df, precision, fixed_region_prevalences
173
) {
174
175
  # Check format of fixed_region_prevalences
176
  checkmate::assert_logical(
177
    fixed_region_prevalences, 
178
    len = 1, 
179
    any.missing = FALSE, 
180
    null.ok = FALSE
181
  )
182
183
  # If fixed_region_prevalences is FALSE, check contents of 
184
  # precision; otherwise should be NULL
185
  if (fixed_region_prevalences) {
186
    assert_null(precision)
187
  } else {
188
    # Check format and content of precision
189
    checkmate::assert_numeric(
190
      precision,
191
      lower = 10^-7,
192
      finite = TRUE,
193
      len = 1,
194
      any.missing = FALSE,
195
      null.ok = FALSE
196
    )
197
  }
198
199
  # Check format and content of centres_df
200
201
  checkmate::assert_data_frame(
202
    centres_df,
203
    types = "numeric",
204
    any.missing = FALSE,
205
    min.cols = 5,
206
    max.cols = 6,
207
    min.rows = 1,
208
    col.names = "named",
209
    null.ok = FALSE
210
  )
211
212
  checkmate::assert_names(
213
    names(centres_df),
214
    subset.of = c(
215
      "site", "start_month", "mean_rate", "region", "site_cap", "start_week"
216
    ),
217
    must.include = c(
218
      "site", "start_month", "mean_rate", "region", "start_week"
219
    )
220
  )
221
222
  sites_in_region <- centres_df$region
223
224
  checkmate::assert_atomic_vector(
225
    sites_in_region,
226
    any.missing = FALSE,
227
    min.len = 1,
228
    max.len = 10^4
229
  )
230
231
  checkmate::assert_integerish(
232
    sites_in_region,
233
    lower = 1,
234
    upper = 10^4,
235
    any.missing = FALSE,
236
    null.ok = FALSE
237
  )
238
  
239
  # Check format and content of props_df
240
241
  checkmate::assert_data_frame(
242
    props_df,
243
    types = c("numeric", "character"),
244
    any.missing = FALSE,
245
    # number of regions + "category"
246
    min.cols = max(sites_in_region) + 1,
247
    min.rows = 2,
248
    null.ok = FALSE
249
  )
250
251
  checkmate::assert_names(names(props_df), must.include = "category")
252
253
254
  # Any column that isn't "category" is assumed to be a region -
255
  # allows for named regions
256
  region_prevalence <- 
257
    props_df[, grep("^category$", names(props_df), invert = TRUE)]
258
259
  checkmate::assert_numeric(
260
    as.matrix(region_prevalence),
261
    lower = 0,
262
    upper = 1,
263
    finite = TRUE
264
  )
265
266
  if (fixed_region_prevalences) {
267
    # Use region prevalences unchanged
268
    recruit_arm_prevalence <- as.matrix(
269
      region_prevalence[, sites_in_region]
270
    )
271
    # Scale columns to sum to 1
272
    recruit_arm_prevalence <- sweep(
273
      recruit_arm_prevalence,
274
      2,
275
      colSums(recruit_arm_prevalence),
276
      FUN = "/"
277
    )
278
279
  } else {
280
    
281
    # Draw from Dirichlet distribution
282
    recruit_arm_prevalence <- do_dirichlet_draws(
283
      region_prevalence, sites_in_region, precision
284
    )
285
  }
286
287
  return(recruit_arm_prevalence)
288
289
}
290
291
292
#' Draws from dirichlet regression model for biomarker prevalences
293
#' to create the prevalence matrix.
294
#' 
295
#' @param region_prevalence Dataframe with one column for each 
296
#' region and one row for each biomarker, containing prevalences as 
297
#' probabilities.
298
#' @param sites_in_region Vector with a region index number for each
299
#' site, the index determined by the order of the columns in
300
#' `region_prevalence`.
301
#' @param precision Variability decreases as precision increases.
302
#'  
303
do_dirichlet_draws <- function(region_prevalence, sites_in_region, precision) {
304
  
305
  # Predeclare prevalence matrix
306
  recruit_arm_prevalence_mx <- matrix(
307
    data = NA, 
308
    ncol = length(sites_in_region),
309
    nrow = nrow(region_prevalence)
310
  )
311
  
312
  # Draw prevalences for sites in each region in turn
313
  for (region in unique(sites_in_region)) {
314
315
    # Sites in region
316
    site_indices <- which(sites_in_region == region)
317
318
    bio_prevalence <- rdirichlet_alt(
319
      n = length(site_indices),
320
      mu = region_prevalence[, region],
321
      phi = precision
322
    )
323
    # rdirichlet_alt produces the transpose of what we want,
324
    # for consistency with other implementations of rdirichlet
325
    recruit_arm_prevalence_mx[, site_indices] <- t(bio_prevalence)
326
  }
327
328
  return(recruit_arm_prevalence_mx)
329
}
330
331
332
#' Converts trial structure and prevalence information into matrix form
333
#' 
334
#' @param arms_ls List of lists of recruitment arms which recruit to
335
#' each treatment arm.
336
#' @param recruit_arm_prevalence Matrix of prevalences with one row per 
337
#' biomarker and one column per site; each column sums to 1.
338
#' 
339
#' @return Logical matrix with one row per biomarker and one column per
340
#' treatment arm; TRUE where the treatment recruits from that biomarker.
341
#' 
342
get_matrix_struct <- function(arms_ls, recruit_arm_prevalence) {
343
344
  # Predeclare matrix as no_biomarkers * no_treatments
345
  no_treats <- length(arms_ls)
346
  no_biomarkers <- nrow(recruit_arm_prevalence)
347
  
348
  # This one is logical, to avoid rounding errors
349
  arm_structure_mx <- 
350
    matrix(FALSE, max(unlist(arms_ls), no_biomarkers, na.rm = TRUE), no_treats)
351
352
  # Loop, changing to TRUE for arms including that treatment
353
  for (icol in seq_len(no_treats)) {
354
    
355
    if (any(!is.na(arms_ls[[icol]]))) {
356
      arm_structure_mx[unlist(arms_ls[[icol]]), icol] <- TRUE
357
    }
358
  }
359
360
  return(arm_structure_mx)
361
}
362
363
364
#' Make array with the prevalences by treatment arm, recruitment arm and 
365
#' prevalence set
366
#' @param arm_structure_mx Logical matrix of recruitment arms by treatment arm
367
#' @param recruit_arm_prevalence Data frame with sets of prevalences, in 
368
#' columns, for each arm (rows) 
369
#' @param shared_control TRUE if all treatment arms share the
370
#' same control arm; FALSE if each treatment arm has its own 
371
#' control. Defaults to TRUE.
372
#' @param ctrl_ratio Ratio of patients assigned to treatment versus control
373
#' 
374
#' @return arm_prevalence_ar Array of prevalences, recruitment arms * 
375
#' (treatment arms + control arms) * prevalence sets
376
#' 
377
get_array_prevalence <- function(
378
  arm_structure_mx, recruit_arm_prevalence, shared_control, ctrl_ratio
379
) {
380
  no_treatments <- ncol(arm_structure_mx)
381
  no_recruit_arms <- nrow(arm_structure_mx)
382
  no_regions <- ncol(recruit_arm_prevalence)
383
384
  scale_factor <- 
385
    ifelse(
386
      sapply(
387
        rowSums(arm_structure_mx), 
388
        function(x) isTRUE(all.equal(x, 0, tolerance = 1e-6))
389
      ),
390
      1, 
391
      rowSums(arm_structure_mx)
392
    )
393
394
  # List of matrices by centre 
395
  ## Total proportion recruited for centre on each open arm 
396
  ## - more efficient to fix later
397
  prev_ls <- lapply(
398
    seq(no_regions),
399
    function(i) recruit_arm_prevalence[, i] * arm_structure_mx / scale_factor
400
  )
401
402
  # Apply control configuration
403
  if (shared_control) {
404
    prev_ls <- lapply(
405
      prev_ls,
406
      function(mx) cbind(mx * ctrl_ratio[1], rowSums(mx) * ctrl_ratio[2])
407
    )
408
  } else {
409
    prev_ls <- lapply(
410
      prev_ls,
411
      function(mx) cbind(mx * ctrl_ratio[1], mx * ctrl_ratio[2])
412
    )
413
  }
414
415
  # Make into array
416
  arm_prevalence_ar <- array(
417
    data = do.call(cbind, prev_ls),
418
    dim = c(dim(prev_ls[[1]]), length(prev_ls))
419
  )
420
  
421
  return(arm_prevalence_ar)
422
}
423
424
425
#' Close off treatment arms.
426
#' In the list of treatment arm IDs, replaces the vector of recruitment
427
#' arm IDs with NA.
428
#' If any recruitment arms are closed, sets their prevalence to zero but 
429
#' does not recalculate prevalence vector, on the assumption that recruitment
430
#' from aites will fall because no patients with those characteristics will 
431
#' be recruited from that point.
432
#' Class object will automatically generate new trial structure and 
433
#' prevalence matrices.
434
#' 
435
#' @param structure_obj An object of class `trial_structure`.
436
#' @param arms Vector or scalar of integer arm ID numbers to remove
437
#' 
438
remove_treat_arms <- S7::new_generic("remove_treat_arms", "structure_obj")
439
S7::method(remove_treat_arms, trial_structure) <- function(
440
  structure_obj,
441
  arms
442
) {
443
  
444
  # Mark treatment arms as removed using NA; 
445
  # automatic getter for treatment_arm_struct does the rest
446
  structure_obj@treatment_arm_ids[arms] <- NA_integer_
447
448
  # Set prevalence to 0 for any recruitment arms which now have no
449
  # experimental arms to recruit to
450
  no_exp_arms <- rowSums(structure_obj@treatment_arm_struct) < 1
451
  structure_obj@recruit_arm_prevalence[no_exp_arms, ] <- 0
452
453
  return(structure_obj)
454
}
455
456
457
#' Check whether an object is of class "trial_structure".
458
#' 
459
#' @param x Object to test
460
#' 
461
#' @return TRUE or FALSE
462
#' 
463
#' @importFrom S7 new_generic method class_any
464
#' @export
465
#' 
466
is.trial_structure <- S7::new_generic("is.trial_structure", "x")
467
S7::method(is.trial_structure, S7::class_any) <- function(x) {
468
  inherits(x, "biomkrAccrual::trial_structure")
469
}
470