|
a |
|
b/R/ref_likelihoods.R |
|
|
1 |
|
|
|
2 |
## ON THE PURPOSE OF THESE FUNCTIONS |
|
|
3 |
|
|
|
4 |
## These functions are no longer used in outbreaker2, but were part of the |
|
|
5 |
## original implementation, and are still used in testing procedures to ensure |
|
|
6 |
## that Rcpp versions give identical results. |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
|
|
|
10 |
|
|
|
11 |
|
|
|
12 |
|
|
|
13 |
## This likelihood corresponds to the probability of observing a number of |
|
|
14 |
## mutations between cases and their ancestors. See src/likelihoods_cpp for |
|
|
15 |
## details of the Rcpp implmentation. |
|
|
16 |
|
|
|
17 |
.ll_genetic <- function(data, param, i = NULL) { |
|
|
18 |
if (is.null(i)) { |
|
|
19 |
i <- seq_len(data$N) |
|
|
20 |
} |
|
|
21 |
|
|
|
22 |
## discard cases with no ancestors to avoid subsetting data$D with 'NA' |
|
|
23 |
i <- i[!is.na(param$alpha[i])] |
|
|
24 |
|
|
|
25 |
## likelihood is based on the number of mutations between a case and its |
|
|
26 |
## ancestor; these are extracted from a pairwise genetic distance matrix |
|
|
27 |
## (data$D) |
|
|
28 |
|
|
|
29 |
## the log-likelihood is computed as: sum(mu^n_mut + (1-mu)^(L-n_mut)) |
|
|
30 |
## with: |
|
|
31 |
## 'mu' is the mutation probability |
|
|
32 |
## 'L' the number of sites in the alignment |
|
|
33 |
## 'n_mut' the number of mutations between an ancestor and its descendent |
|
|
34 |
## |
|
|
35 |
## for computer efficiency, we re-factorise it as: |
|
|
36 |
## log(mu) * sum(n_mut) + log(1 - mu) * (L-n_mut + (kappa-1)*L) |
|
|
37 |
## |
|
|
38 |
|
|
|
39 |
n_mut <- data$D[cbind(i, param$alpha[i], deparse.level = 0)] |
|
|
40 |
|
|
|
41 |
n_non_mut <- (data$L - n_mut) |
|
|
42 |
|
|
|
43 |
out <- sum(n_mut*log(param$kappa[i]*param$mu) + |
|
|
44 |
n_non_mut*log(1 - param$kappa[i]*param$mu), |
|
|
45 |
na.rm = TRUE) |
|
|
46 |
|
|
|
47 |
return(out) |
|
|
48 |
} |
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
## This likelihood corresponds to the probability of observing infection dates of cases given the |
|
|
56 |
## infection dates of their ancestors. |
|
|
57 |
|
|
|
58 |
.ll_timing_infections <- function(data, param, i = NULL) { |
|
|
59 |
if (is.null(i)) { |
|
|
60 |
i <- seq_len(data$N) |
|
|
61 |
} |
|
|
62 |
|
|
|
63 |
## discard cases with no ancestors to avoid subsetting data$D with 'NA' |
|
|
64 |
i <- i[!is.na(param$alpha[i])] |
|
|
65 |
|
|
|
66 |
|
|
|
67 |
## compute delays between infection dates of cases and of their ancestors |
|
|
68 |
T <- param$t_inf[i] - param$t_inf[param$alpha[i]] |
|
|
69 |
|
|
|
70 |
## avoid over-shooting: delays outside the range of columns in pre-computed log-densities |
|
|
71 |
## (data$log_w_dens) will give a likelihood of zero |
|
|
72 |
if (any(T<1 | T>ncol(data$log_w_dens), na.rm = TRUE)) return(-Inf) |
|
|
73 |
|
|
|
74 |
## output is a sum of log-densities |
|
|
75 |
sum(data$log_w_dens[cbind(param$kappa[i], T)], na.rm = TRUE) |
|
|
76 |
} |
|
|
77 |
|
|
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
|
82 |
## This likelihood corresponds to the probability of reporting dates of cases given their |
|
|
83 |
## infection dates. |
|
|
84 |
|
|
|
85 |
.ll_timing_sampling <- function(data, param, i = NULL) { |
|
|
86 |
if (is.null(i)) { |
|
|
87 |
i <- seq_len(data$N) |
|
|
88 |
} |
|
|
89 |
|
|
|
90 |
## compute delays |
|
|
91 |
T <- data$dates[i] - param$t_inf[i] |
|
|
92 |
T <- T[!is.na(T)] |
|
|
93 |
|
|
|
94 |
## avoid over-shooting |
|
|
95 |
if (any(T<1 | T>length(data$log_f_dens))) return(-Inf) |
|
|
96 |
|
|
|
97 |
## output is a sum of log densities |
|
|
98 |
sum(data$log_f_dens[T], na.rm = TRUE) |
|
|
99 |
} |
|
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
|
106 |
## This likelihood corresponds to the probability of a given number of |
|
|
107 |
## unreported cases on an ancestry. |
|
|
108 |
|
|
|
109 |
.ll_reporting <- function(data, param, i = NULL) { |
|
|
110 |
if (is.null(i)) { |
|
|
111 |
i <- seq_len(data$N) |
|
|
112 |
} |
|
|
113 |
|
|
|
114 |
sum(stats::dgeom(param$kappa[i]-1, |
|
|
115 |
prob = param$pi, |
|
|
116 |
log = TRUE), na.rm = TRUE) |
|
|
117 |
} |
|
|
118 |
|
|
|
119 |
|