|
a |
|
b/R/sim_ctd.R |
|
|
1 |
#' Simulate contact data from a transmission tree |
|
|
2 |
#' |
|
|
3 |
#' This function simulates contact data from a transmission tree. The |
|
|
4 |
#' model assumes that all transmission pairs have experienced contact, and that |
|
|
5 |
#' there is no false-positive reporting of contacts. The probability of contact |
|
|
6 |
#' occuring between a non-transmision pair is given by the parameter lambda. The |
|
|
7 |
#' probability of reporting a contact (transmission pair or not) is given by the |
|
|
8 |
#' parameters eps. |
|
|
9 |
#' |
|
|
10 |
#' @importFrom magrittr %>% |
|
|
11 |
#' |
|
|
12 |
#' @param tTree a dataframe or matrix of two columns, with each row providing |
|
|
13 |
#' the ids (numerical or as characters) of a transmission pair |
|
|
14 |
#' |
|
|
15 |
#' @param eps the contact reporting coverage, defined as the probability of |
|
|
16 |
#' reporting a contact (transmission pair or not) |
|
|
17 |
#' |
|
|
18 |
#' @param lambda the non-infectious contact rate, defined as the probability |
|
|
19 |
#' of contact between a non-transmission pair. |
|
|
20 |
#' |
|
|
21 |
#' @author Finlay Campbell (\email{finlaycampbell93@@gmail.com}) |
|
|
22 |
#' |
|
|
23 |
#' @examples |
|
|
24 |
#' |
|
|
25 |
#' ## load data |
|
|
26 |
#' x <- fake_outbreak |
|
|
27 |
#' tTree <- data.frame(from = x$ances, to = seq_along(x$ances)) |
|
|
28 |
#' |
|
|
29 |
#' ## simulate contact data with coverage of 80% and 10% probability of |
|
|
30 |
#' ## contact between non-transmission pairs |
|
|
31 |
#' ctd <- outbreaker2:::sim_ctd(tTree, eps = 0.8, lambda = 0.1) |
|
|
32 |
#' |
|
|
33 |
#' ## inspect contact data |
|
|
34 |
#' head(ctd) |
|
|
35 |
|
|
|
36 |
sim_ctd <- function(tTree, eps, lambda) { |
|
|
37 |
|
|
|
38 |
tTree <- apply(tTree, 2, as.character) |
|
|
39 |
|
|
|
40 |
if(any(c(eps, lambda) < 0) | any(c(eps, lambda) > 1)) { |
|
|
41 |
stop('eps and lambda must be probabilities') |
|
|
42 |
} |
|
|
43 |
|
|
|
44 |
if(ncol(tTree) != 2) { |
|
|
45 |
stop("tTree must have two columns") |
|
|
46 |
} |
|
|
47 |
|
|
|
48 |
id <- unique(c(tTree[,1], tTree[,2])) |
|
|
49 |
id <- id[!is.na(id)] |
|
|
50 |
|
|
|
51 |
## Sort tTree by value or alphabetically, This ensures A:B and B:A are both |
|
|
52 |
## recognised when querying the contacts dataframe for transmission pairs |
|
|
53 |
tTree <- tTree %>% |
|
|
54 |
stats::na.omit() %>% |
|
|
55 |
apply(1, sort, decreasing = FALSE) %>% |
|
|
56 |
t() %>% |
|
|
57 |
as.data.frame(stringsAsFactors = FALSE) |
|
|
58 |
|
|
|
59 |
tTree <- tTree[order(tTree[,1]),] |
|
|
60 |
names(tTree) <- c('V1', 'V2') |
|
|
61 |
if(nrow(tTree) == 0) stop("No transmission observed") |
|
|
62 |
|
|
|
63 |
## Create a dataframe of all potential contacts |
|
|
64 |
contacts <- as.data.frame(t(utils::combn(sort(id), 2)), |
|
|
65 |
stringsAsFactors = FALSE) |
|
|
66 |
|
|
|
67 |
## Create a column of logicals indicating whether a pair represents a |
|
|
68 |
## transmission pair or not |
|
|
69 |
tTree$tPair <- TRUE |
|
|
70 |
|
|
|
71 |
## Mark non-transmission pairs in the contacts dataframe. The merge function |
|
|
72 |
## will mark pairs found in contacts but not in tTree as 'NA'. These are |
|
|
73 |
## then converted to FALSE |
|
|
74 |
contacts <- merge(contacts, tTree, by = c('V1', 'V2'), all.x = TRUE) |
|
|
75 |
contacts$tPair[is.na(contacts$tPair)] <- FALSE |
|
|
76 |
|
|
|
77 |
## Sample a number of rows given by a binomial distribution |
|
|
78 |
sampler <- function(x, prob) { |
|
|
79 |
x[sample(1:nrow(x), stats::rbinom(1, nrow(x), prob)), 1:3] |
|
|
80 |
} |
|
|
81 |
|
|
|
82 |
## Sample transmission pairs with probability eps |
|
|
83 |
## Sample non-transmission pairs with probability eps*lambda |
|
|
84 |
ctd <- rbind(sampler(contacts[contacts$tPair,], eps), |
|
|
85 |
sampler(contacts[!contacts$tPair,], eps*lambda)) |
|
|
86 |
|
|
|
87 |
ctd <- ctd[, c(1, 2)] |
|
|
88 |
|
|
|
89 |
colnames(ctd) <- c('i', 'j') |
|
|
90 |
rownames(ctd) <- NULL |
|
|
91 |
|
|
|
92 |
return(ctd) |
|
|
93 |
|
|
|
94 |
} |