Diff of /R/ref_likelihoods.R [000000] .. [dfe06d]

Switch to unified view

a b/R/ref_likelihoods.R
1
2
## ON THE PURPOSE OF THESE FUNCTIONS
3
4
## These functions are no longer used in outbreaker2, but were part of the
5
## original implementation, and are still used in testing procedures to ensure
6
## that Rcpp versions give identical results.
7
8
9
10
11
12
13
## This likelihood corresponds to the probability of observing a number of
14
## mutations between cases and their ancestors. See src/likelihoods_cpp for
15
## details of the Rcpp implmentation.
16
17
.ll_genetic <- function(data, param, i = NULL) {
18
    if (is.null(i)) {
19
        i <- seq_len(data$N)
20
    }
21
22
    ## discard cases with no ancestors to avoid subsetting data$D with 'NA'
23
    i <- i[!is.na(param$alpha[i])]
24
25
    ## likelihood is based on the number of mutations between a case and its
26
    ## ancestor; these are extracted from a pairwise genetic distance matrix
27
    ## (data$D)
28
    
29
    ## the log-likelihood is computed as: sum(mu^n_mut + (1-mu)^(L-n_mut))
30
    ## with:
31
    ## 'mu' is the mutation probability
32
    ## 'L' the number of sites in the alignment
33
    ## 'n_mut' the number of mutations between an ancestor and its descendent
34
    ##
35
    ## for computer efficiency, we re-factorise it as:
36
    ##  log(mu) * sum(n_mut) + log(1 - mu) * (L-n_mut + (kappa-1)*L)
37
    ##
38
    
39
    n_mut <- data$D[cbind(i, param$alpha[i], deparse.level = 0)]
40
    
41
    n_non_mut <- (data$L - n_mut)
42
43
    out <- sum(n_mut*log(param$kappa[i]*param$mu) +
44
               n_non_mut*log(1 - param$kappa[i]*param$mu),
45
               na.rm = TRUE)
46
    
47
    return(out)
48
}
49
50
51
52
53
54
55
## This likelihood corresponds to the probability of observing infection dates of cases given the
56
## infection dates of their ancestors.
57
58
.ll_timing_infections <- function(data, param, i = NULL) {
59
    if (is.null(i)) {
60
        i <- seq_len(data$N)
61
    }
62
63
    ## discard cases with no ancestors to avoid subsetting data$D with 'NA'
64
    i <- i[!is.na(param$alpha[i])]
65
66
67
    ## compute delays between infection dates of cases and of their ancestors
68
    T <- param$t_inf[i] - param$t_inf[param$alpha[i]]
69
70
    ## avoid over-shooting: delays outside the range of columns in pre-computed log-densities
71
    ## (data$log_w_dens) will give a likelihood of zero
72
    if (any(T<1 | T>ncol(data$log_w_dens), na.rm = TRUE)) return(-Inf)
73
74
    ## output is a sum of log-densities
75
    sum(data$log_w_dens[cbind(param$kappa[i], T)], na.rm = TRUE)
76
}
77
78
79
80
81
82
## This likelihood corresponds to the probability of reporting dates of cases given their
83
## infection dates.
84
85
.ll_timing_sampling <- function(data, param, i = NULL) {
86
    if (is.null(i)) {
87
        i <- seq_len(data$N)
88
    }
89
90
    ## compute delays
91
    T <- data$dates[i] - param$t_inf[i]
92
    T <- T[!is.na(T)]
93
94
    ## avoid over-shooting
95
    if (any(T<1 | T>length(data$log_f_dens))) return(-Inf)
96
97
    ## output is a sum of log densities
98
    sum(data$log_f_dens[T], na.rm = TRUE)
99
}
100
101
102
103
104
105
106
## This likelihood corresponds to the probability of a given number of
107
## unreported cases on an ancestry.
108
109
.ll_reporting <- function(data, param, i = NULL) {
110
    if (is.null(i)) {
111
        i <- seq_len(data$N)
112
    }
113
114
    sum(stats::dgeom(param$kappa[i]-1,
115
                     prob = param$pi,
116
                     log = TRUE), na.rm = TRUE)
117
}
118
119