Diff of /R/ref_likelihoods.R [000000] .. [dfe06d]

Switch to side-by-side view

--- a
+++ b/R/ref_likelihoods.R
@@ -0,0 +1,119 @@
+
+## ON THE PURPOSE OF THESE FUNCTIONS
+
+## These functions are no longer used in outbreaker2, but were part of the
+## original implementation, and are still used in testing procedures to ensure
+## that Rcpp versions give identical results.
+
+
+
+
+
+
+## This likelihood corresponds to the probability of observing a number of
+## mutations between cases and their ancestors. See src/likelihoods_cpp for
+## details of the Rcpp implmentation.
+
+.ll_genetic <- function(data, param, i = NULL) {
+    if (is.null(i)) {
+        i <- seq_len(data$N)
+    }
+
+    ## discard cases with no ancestors to avoid subsetting data$D with 'NA'
+    i <- i[!is.na(param$alpha[i])]
+
+    ## likelihood is based on the number of mutations between a case and its
+    ## ancestor; these are extracted from a pairwise genetic distance matrix
+    ## (data$D)
+    
+    ## the log-likelihood is computed as: sum(mu^n_mut + (1-mu)^(L-n_mut))
+    ## with:
+    ## 'mu' is the mutation probability
+    ## 'L' the number of sites in the alignment
+    ## 'n_mut' the number of mutations between an ancestor and its descendent
+    ##
+    ## for computer efficiency, we re-factorise it as:
+    ##  log(mu) * sum(n_mut) + log(1 - mu) * (L-n_mut + (kappa-1)*L)
+    ##
+    
+    n_mut <- data$D[cbind(i, param$alpha[i], deparse.level = 0)]
+    
+    n_non_mut <- (data$L - n_mut)
+
+    out <- sum(n_mut*log(param$kappa[i]*param$mu) +
+               n_non_mut*log(1 - param$kappa[i]*param$mu),
+               na.rm = TRUE)
+    
+    return(out)
+}
+
+
+
+
+
+
+## This likelihood corresponds to the probability of observing infection dates of cases given the
+## infection dates of their ancestors.
+
+.ll_timing_infections <- function(data, param, i = NULL) {
+    if (is.null(i)) {
+        i <- seq_len(data$N)
+    }
+
+    ## discard cases with no ancestors to avoid subsetting data$D with 'NA'
+    i <- i[!is.na(param$alpha[i])]
+
+
+    ## compute delays between infection dates of cases and of their ancestors
+    T <- param$t_inf[i] - param$t_inf[param$alpha[i]]
+
+    ## avoid over-shooting: delays outside the range of columns in pre-computed log-densities
+    ## (data$log_w_dens) will give a likelihood of zero
+    if (any(T<1 | T>ncol(data$log_w_dens), na.rm = TRUE)) return(-Inf)
+
+    ## output is a sum of log-densities
+    sum(data$log_w_dens[cbind(param$kappa[i], T)], na.rm = TRUE)
+}
+
+
+
+
+
+## This likelihood corresponds to the probability of reporting dates of cases given their
+## infection dates.
+
+.ll_timing_sampling <- function(data, param, i = NULL) {
+    if (is.null(i)) {
+        i <- seq_len(data$N)
+    }
+
+    ## compute delays
+    T <- data$dates[i] - param$t_inf[i]
+    T <- T[!is.na(T)]
+
+    ## avoid over-shooting
+    if (any(T<1 | T>length(data$log_f_dens))) return(-Inf)
+
+    ## output is a sum of log densities
+    sum(data$log_f_dens[T], na.rm = TRUE)
+}
+
+
+
+
+
+
+## This likelihood corresponds to the probability of a given number of
+## unreported cases on an ancestry.
+
+.ll_reporting <- function(data, param, i = NULL) {
+    if (is.null(i)) {
+        i <- seq_len(data$N)
+    }
+
+    sum(stats::dgeom(param$kappa[i]-1,
+                     prob = param$pi,
+                     log = TRUE), na.rm = TRUE)
+}
+
+