--- a +++ b/R/sim_ctd.R @@ -0,0 +1,94 @@ +#' Simulate contact data from a transmission tree +#' +#' This function simulates contact data from a transmission tree. The +#' model assumes that all transmission pairs have experienced contact, and that +#' there is no false-positive reporting of contacts. The probability of contact +#' occuring between a non-transmision pair is given by the parameter lambda. The +#' probability of reporting a contact (transmission pair or not) is given by the +#' parameters eps. +#' +#' @importFrom magrittr %>% +#' +#' @param tTree a dataframe or matrix of two columns, with each row providing +#' the ids (numerical or as characters) of a transmission pair +#' +#' @param eps the contact reporting coverage, defined as the probability of +#' reporting a contact (transmission pair or not) +#' +#' @param lambda the non-infectious contact rate, defined as the probability +#' of contact between a non-transmission pair. +#' +#' @author Finlay Campbell (\email{finlaycampbell93@@gmail.com}) +#' +#' @examples +#' +#' ## load data +#' x <- fake_outbreak +#' tTree <- data.frame(from = x$ances, to = seq_along(x$ances)) +#' +#' ## simulate contact data with coverage of 80% and 10% probability of +#' ## contact between non-transmission pairs +#' ctd <- outbreaker2:::sim_ctd(tTree, eps = 0.8, lambda = 0.1) +#' +#' ## inspect contact data +#' head(ctd) + +sim_ctd <- function(tTree, eps, lambda) { + + tTree <- apply(tTree, 2, as.character) + + if(any(c(eps, lambda) < 0) | any(c(eps, lambda) > 1)) { + stop('eps and lambda must be probabilities') + } + + if(ncol(tTree) != 2) { + stop("tTree must have two columns") + } + + id <- unique(c(tTree[,1], tTree[,2])) + id <- id[!is.na(id)] + + ## Sort tTree by value or alphabetically, This ensures A:B and B:A are both + ## recognised when querying the contacts dataframe for transmission pairs + tTree <- tTree %>% + stats::na.omit() %>% + apply(1, sort, decreasing = FALSE) %>% + t() %>% + as.data.frame(stringsAsFactors = FALSE) + + tTree <- tTree[order(tTree[,1]),] + names(tTree) <- c('V1', 'V2') + if(nrow(tTree) == 0) stop("No transmission observed") + + ## Create a dataframe of all potential contacts + contacts <- as.data.frame(t(utils::combn(sort(id), 2)), + stringsAsFactors = FALSE) + + ## Create a column of logicals indicating whether a pair represents a + ## transmission pair or not + tTree$tPair <- TRUE + + ## Mark non-transmission pairs in the contacts dataframe. The merge function + ## will mark pairs found in contacts but not in tTree as 'NA'. These are + ## then converted to FALSE + contacts <- merge(contacts, tTree, by = c('V1', 'V2'), all.x = TRUE) + contacts$tPair[is.na(contacts$tPair)] <- FALSE + + ## Sample a number of rows given by a binomial distribution + sampler <- function(x, prob) { + x[sample(1:nrow(x), stats::rbinom(1, nrow(x), prob)), 1:3] + } + + ## Sample transmission pairs with probability eps + ## Sample non-transmission pairs with probability eps*lambda + ctd <- rbind(sampler(contacts[contacts$tPair,], eps), + sampler(contacts[!contacts$tPair,], eps*lambda)) + + ctd <- ctd[, c(1, 2)] + + colnames(ctd) <- c('i', 'j') + rownames(ctd) <- NULL + + return(ctd) + +}