Diff of /R/sim_ctd.R [000000] .. [dfe06d]

Switch to unified view

a b/R/sim_ctd.R
1
#' Simulate contact data from a transmission tree
2
#'
3
#' This function simulates contact data from a transmission tree. The
4
#' model assumes that all transmission pairs have experienced contact, and that
5
#' there is no false-positive reporting of contacts. The probability of contact
6
#' occuring between a non-transmision pair is given by the parameter lambda. The
7
#' probability of reporting a contact (transmission pair or not) is given by the
8
#' parameters eps.
9
#'
10
#' @importFrom magrittr %>%
11
#'
12
#' @param tTree a dataframe or matrix of two columns, with each row providing
13
#'     the ids (numerical or as characters) of a transmission pair
14
#'
15
#' @param eps the contact reporting coverage, defined as the probability of
16
#'     reporting a contact (transmission pair or not)
17
#'
18
#' @param lambda the non-infectious contact rate, defined as the probability
19
#'     of contact between a non-transmission pair.
20
#'
21
#' @author Finlay Campbell (\email{finlaycampbell93@@gmail.com})
22
#'
23
#' @examples
24
#'
25
#' ## load data
26
#' x <- fake_outbreak
27
#' tTree <- data.frame(from = x$ances, to = seq_along(x$ances))
28
#'
29
#' ## simulate contact data with coverage of 80% and 10% probability of
30
#' ## contact between non-transmission pairs
31
#' ctd <- outbreaker2:::sim_ctd(tTree, eps = 0.8, lambda = 0.1)
32
#'
33
#' ## inspect contact data
34
#' head(ctd)
35
36
sim_ctd <- function(tTree, eps, lambda) {
37
38
  tTree <- apply(tTree, 2, as.character)
39
40
  if(any(c(eps, lambda) < 0) | any(c(eps, lambda) > 1)) {
41
    stop('eps and lambda must be probabilities')
42
  }
43
44
  if(ncol(tTree) != 2) {
45
    stop("tTree must have two columns")
46
  }
47
48
  id <- unique(c(tTree[,1], tTree[,2]))
49
  id <- id[!is.na(id)]
50
51
  ## Sort tTree by value or alphabetically, This ensures A:B and B:A are both
52
  ## recognised when querying the contacts dataframe for transmission pairs
53
  tTree <- tTree %>%
54
    stats::na.omit() %>%
55
    apply(1, sort, decreasing = FALSE) %>%
56
    t() %>%
57
    as.data.frame(stringsAsFactors = FALSE)
58
59
  tTree <- tTree[order(tTree[,1]),]
60
  names(tTree) <- c('V1', 'V2')
61
  if(nrow(tTree) == 0) stop("No transmission observed")
62
63
  ## Create a dataframe of all potential contacts
64
  contacts <- as.data.frame(t(utils::combn(sort(id), 2)),
65
                            stringsAsFactors = FALSE)
66
67
  ## Create a column of logicals indicating whether a pair represents a
68
  ## transmission pair or not
69
  tTree$tPair <- TRUE
70
71
  ## Mark non-transmission pairs in the contacts dataframe. The merge function
72
  ## will mark pairs found in contacts but not in tTree as 'NA'. These are
73
  ## then converted to FALSE
74
  contacts <- merge(contacts, tTree, by = c('V1', 'V2'), all.x = TRUE)
75
  contacts$tPair[is.na(contacts$tPair)] <- FALSE
76
77
  ## Sample a number of rows given by a binomial distribution
78
  sampler <- function(x, prob) {
79
    x[sample(1:nrow(x), stats::rbinom(1, nrow(x), prob)), 1:3]
80
  }
81
82
  ## Sample transmission pairs with probability eps
83
  ## Sample non-transmission pairs with probability eps*lambda
84
  ctd <- rbind(sampler(contacts[contacts$tPair,], eps),
85
               sampler(contacts[!contacts$tPair,], eps*lambda))
86
87
  ctd <- ctd[, c(1, 2)]
88
89
  colnames(ctd) <- c('i', 'j')
90
  rownames(ctd) <- NULL
91
92
  return(ctd)
93
94
}