--- a +++ b/src/moves.cpp @@ -0,0 +1,596 @@ +#include <Rcpp.h> +#include <Rmath.h> +#include "internals.h" +#include "likelihoods.h" +#include "priors.h" + + + +// IMPORTANT: ON INDEXING VECTORS AND ANCESTRIES + +// Most of the functions implemented here are susceptible to be called from R +// via Rcpp, and are therefore treated as interfaces. This causes a number of +// headaches when using indices of cases defined in R (1:N) to refer to elements +// in Rcpp / Cpp vectors (0:N-1). By convention, we store all data on the +// original scale (1:N), and modify indices whenever accessing elements of +// vectors. In other words, in an expression like 'alpha[j]', 'j' should always +// be on the internal scale (0:N-1). + +// In all these functions, 'SEXP i' is an optional vector of case indices, on +// the 1:N scale. + + + + + + +// --------------------------- + +// Movement of the mutation rate 'mu' is done using a dumb normal proposal. This +// is satisfying for now - we only reject a few non-sensical values outside the +// range [0;1]. The SD of the proposal (implicitely contained in rand$mu.rnorm1, +// but really provided through 'config', seems fine as the range of real values +// will never change much. Probably not much point in using auto-tuning here. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_mu(Rcpp::List param, Rcpp::List data, Rcpp::List config, + Rcpp::RObject custom_ll = R_NilValue, + Rcpp::RObject custom_prior = R_NilValue) { + + // deep copy here for now, ultimately should be an arg. + Rcpp::List new_param = clone(param); + Rcpp::NumericVector mu = param["mu"]; + Rcpp::NumericVector new_mu = new_param["mu"]; + + double sd_mu = static_cast<double>(config["sd_mu"]); + + double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0; + + // proposal (normal distribution with SD: config$sd_mu) + + new_mu[0] += R::rnorm(0.0, sd_mu); // new proposed value + + + // automatic rejection of negative mu + + if (new_mu[0] < 0.0) { + return param; + } + + + // compute likelihoods + old_logpost = cpp_ll_genetic(data, param, R_NilValue, custom_ll); + new_logpost = cpp_ll_genetic(data, new_param, R_NilValue, custom_ll); + + + // compute priors + + old_logpost += cpp_prior_mu(param, config, custom_prior); + new_logpost += cpp_prior_mu(new_param, config, custom_prior); + + + // acceptance term + + p_accept = exp(new_logpost - old_logpost); + + + // acceptance: the new value is already in mu, so we only act if the move is + // rejected, in which case we restore the previous ('old') value + + if (p_accept < unif_rand()) { // reject new values + return param; + } + + return new_param; +} + + + + + + +// --------------------------- + +// movement of the Reporting probability 'pi' is done using a dumb normal +// proposal. This is satisfying for now - we only reject a few non-sensical +// values outside the range [0;1]. The SD of the proposal (implicitely contained +// in rand$pi.rnorm1, but really provided through 'config', seems fine as the +// range of real values will never change much. Probably not much point in using +// auto-tuning here. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_pi(Rcpp::List param, Rcpp::List data, Rcpp::List config, + Rcpp::RObject custom_ll = R_NilValue, + Rcpp::RObject custom_prior = R_NilValue) { + + // deep copy here for now, ultimately should be an arg. + + Rcpp::List new_param = clone(param); + Rcpp::NumericVector pi = param["pi"]; // these are just pointers + Rcpp::NumericVector new_pi = new_param["pi"]; // these are just pointers + + double sd_pi = static_cast<double>(config["sd_pi"]); + + double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0; + + + // proposal (normal distribution with SD: config$sd_pi) + + new_pi[0] += R::rnorm(0.0, sd_pi); // new proposed value + + + // automatic rejection of pi outside [0;1] + + if (new_pi[0] < 0.0 || new_pi[0] > 1.0) { + return param; + } + + + // compute likelihoods + old_logpost = cpp_ll_reporting(data, param, R_NilValue, custom_ll); + new_logpost = cpp_ll_reporting(data, new_param, R_NilValue, custom_ll); + + + // compute priors + + old_logpost += cpp_prior_pi(param, config, custom_prior); + new_logpost += cpp_prior_pi(new_param, config, custom_prior); + + + // acceptance term + + p_accept = exp(new_logpost - old_logpost); + + + // acceptance: the new value is already in pi, so we only act if the move is + // rejected, in which case we restore the previous ('old') value + + if (p_accept < unif_rand()) { // reject new values + return param; + } + + return new_param; +} + + + + + + +// --------------------------- + +// movement of the contact reporting coverage 'eps' is done using a dumb normal +// proposal. This is satisfying for now - we only reject a few non-sensical +// values outside the range [0;1]. The SD of the proposal is provided through +// 'config'; this seems fine as the range of real values will never change +// much. Probably not much point in using auto-tuning here. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_eps(Rcpp::List param, Rcpp::List data, Rcpp::List config, + Rcpp::RObject custom_ll = R_NilValue, + Rcpp::RObject custom_prior = R_NilValue) { + + // deep copy here for now, ultimately should be an arg. + + Rcpp::List new_param = clone(param); + Rcpp::NumericVector eps = param["eps"]; // these are just pointers + Rcpp::NumericVector new_eps = new_param["eps"]; // these are just pointers + + double sd_eps = static_cast<double>(config["sd_eps"]); + + double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0; + + + // proposal (normal distribution with SD: config$sd_eps) + + new_eps[0] += R::rnorm(0.0, sd_eps); // new proposed value + + + // automatic rejection of eps outside [0;1] + + if (new_eps[0] < 0.0 || new_eps[0] > 1.0) { + return param; + } + + + // compute likelihoods + old_logpost = cpp_ll_contact(data, param, R_NilValue, custom_ll); + new_logpost = cpp_ll_contact(data, new_param, R_NilValue, custom_ll); + + + // compute priors + + old_logpost += cpp_prior_eps(param, config, custom_prior); + new_logpost += cpp_prior_eps(new_param, config, custom_prior); + + + // acceptance term + + p_accept = exp(new_logpost - old_logpost); + + + // acceptance: the new value is already in eps, so we only act if the move is + // rejected, in which case we restore the previous ('old') value + + if (p_accept < unif_rand()) { // reject new values + return param; + } + + return new_param; +} + + + + + + +// --------------------------- + +// movement of the non-infectious contact rate 'eps' is done using a dumb +// normal proposal. This is satisfying for now - we only reject a few +// non-sensical values outside the range [0;1]. The SD of the proposal is +// provided through 'config'; this seems fine as the range of real values will +// never change much. Probably not much point in using auto-tuning here. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_lambda(Rcpp::List param, Rcpp::List data, Rcpp::List config, + Rcpp::RObject custom_ll = R_NilValue, + Rcpp::RObject custom_prior = R_NilValue) { + + // deep copy here for now, ultimately should be an arg. + + Rcpp::List new_param = clone(param); + Rcpp::NumericVector lambda = param["lambda"]; // these are just pointers + Rcpp::NumericVector new_lambda = new_param["lambda"]; // these are just pointers + + double sd_lambda = static_cast<double>(config["sd_lambda"]); + + double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0; + + + // proposal (normal distribution with SD: config$sd_lambda) + + new_lambda[0] += R::rnorm(0.0, sd_lambda); // new proposed value + + + // automatic rejection of lambda outside [0;1] + + if (new_lambda[0] < 0.0 || new_lambda[0] > 1.0) { + return param; + } + + + // compute likelihoods + old_logpost = cpp_ll_contact(data, param, R_NilValue, custom_ll); + new_logpost = cpp_ll_contact(data, new_param, R_NilValue, custom_ll); + + + // compute priors + + old_logpost += cpp_prior_lambda(param, config, custom_prior); + new_logpost += cpp_prior_lambda(new_param, config, custom_prior); + + + // acceptance term + + p_accept = exp(new_logpost - old_logpost); + + + // acceptance: the new value is already in lambda, so we only act if the move is + // rejected, in which case we restore the previous ('old') value + + if (p_accept < unif_rand()) { // reject new values + return param; + } + + return new_param; +} + + + + + + +// --------------------------- + +// Movement of infection dates are +/- 1 from current states. These movements +// are currently vectorised, i.e. a bunch of dates are proposed all together; +// this may not be sustainable for larger datasets. The non-vectorised option +// will be slower and speed-up with C/C++ will be more substantial then. + +// This version differs from the initial R implementation in several points: + +// 1. all cases are moved +// 2. cases are moved one by one +// 3. movement for each case is +/- 1 time unit + +// Notes + +// - when computing the timing log-likelihood, the descendents of each +// case are also affected. + +// - we generate a new vector 'new_t_inf', which will replace the +// previous pointer defining param["t_inf"]. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_t_inf(Rcpp::List param, Rcpp::List data, + Rcpp::RObject list_custom_ll = R_NilValue) { + + // deep copy here for now, ultimately should be an arg. + + Rcpp::List new_param = clone(param); + Rcpp::IntegerVector t_inf = param["t_inf"]; + Rcpp::IntegerVector new_t_inf = new_param["t_inf"]; + Rcpp::IntegerVector alpha = param["alpha"]; + Rcpp::IntegerVector local_cases; + + size_t N = static_cast<size_t>(data["N"]); + + double old_loc_loglike = 0.0, new_loc_loglike = 0.0, p_loc_accept = 0.0; + + + for (size_t i = 0; i < N; i++) { + + local_cases = cpp_find_descendents(param["alpha"], i+1); + + // loglike with current value + old_loc_loglike = cpp_ll_timing(data, param, i+1, list_custom_ll); // term for case 'i' with offset + + // term descendents of 'i' + if (local_cases.size() > 0) { + old_loc_loglike += cpp_ll_timing(data, param, local_cases, list_custom_ll); + } + + // proposal (+/- 1) + new_t_inf[i] += unif_rand() > 0.5 ? 1 : -1; // new proposed value + + // loglike with new value + new_loc_loglike = cpp_ll_timing(data, new_param, i+1, list_custom_ll); // term for case 'i' with offset + + // term descendents of 'i' + if (local_cases.size() > 0) { + new_loc_loglike += cpp_ll_timing(data, new_param, local_cases, list_custom_ll); + } + + + // acceptance term + p_loc_accept = exp(new_loc_loglike - old_loc_loglike); + + + // acceptance: the new value is already in t_inf, so we only act if the move + // is rejected, in which case we restore the previous ('old') value + + if (p_loc_accept < unif_rand()) { // reject new values + new_t_inf[i] = t_inf[i]; + } + } + + return new_param; + +} + + + + + + +// --------------------------- + +// Movement of ancestries ('alpha') is not vectorised, movements are made one +// case at a time. This procedure is simply about picking an infector at random +// amongst cases preceeding the case considered. The original version in +// 'outbreaker' used to move simultaneously 'alpha', 'kappa' and 't_inf', but +// current implementation is simpler and seems to mix at least as well. Proper +// movement of 'alpha' needs this procedure as well as a swapping procedure +// (swaps are not possible through move.alpha only); in all instances, 'alpha' +// is on the scale 1:N. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_alpha(Rcpp::List param, Rcpp::List data, + Rcpp::RObject list_custom_ll = R_NilValue) { + Rcpp::List new_param = clone(param); + Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha + Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf + Rcpp::IntegerVector new_alpha = new_param["alpha"]; + + size_t N = static_cast<size_t>(data["N"]); + + double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0; + + + for (size_t i = 0; i < N; i++) { + + // only non-NA ancestries are moved, if there is at least 1 choice + if (alpha[i] != NA_INTEGER && sum(t_inf < t_inf[i]) > 0) { + + // loglike with current value + // old_loglike = cpp_ll_all(data, param, R_NilValue); + old_loglike = cpp_ll_all(data, param, i+1, list_custom_ll); // offset + + // proposal (+/- 1) + new_alpha[i] = cpp_pick_possible_ancestor(t_inf, i+1); // new proposed value (on scale 1 ... N) + + // loglike with current value + new_loglike = cpp_ll_all(data, new_param, i+1, list_custom_ll); + + // acceptance term + p_accept = exp(new_loglike - old_loglike); + + // which case we restore the previous ('old') value + if (p_accept < unif_rand()) { // reject new values + new_alpha[i] = alpha[i]; + } else { + alpha[i] = new_alpha[i]; + } + } + } + + return new_param; +} + + + + + + +// --------------------------- + +// The basic movement of ancestries (picking an ancestor at random amongst in +// previous cases) makes swaps of ancestries (A->B) to (B->A) very +// difficult. This function addresses the issue. It is computer-intensive, but +// likely a determining factor for faster mixing. Unlike previous versions in +// the original 'outbreaker' package, all cases are 'moved' here. A swap is +// defined as: + +// x -> a -> b becomes a -> x -> b + +// Obviously cases are moved one at a time. We need to use local likelihood +// changes for this move to scale well with outbreak size. The complicated bit +// is that the move impacts all descendents from 'a' as well as 'x'. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_swap_cases(Rcpp::List param, Rcpp::List data, + Rcpp::RObject list_custom_ll = R_NilValue) { + + Rcpp::List new_param = clone(param); + Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha + Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf + Rcpp::List swapinfo; // contains alpha and t_inf + Rcpp::IntegerVector local_cases; + + size_t N = static_cast<size_t>(data["N"]); + int N_int = data["N"]; + + double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0; + + // Shuffle indices to make equal cases equally likely + Rcpp::IntegerVector idx = Rcpp::seq(0, N-1); + idx = Rcpp::sample(idx, N_int, false); + + for (size_t j = 0; j < N; ++j) { + size_t i = (size_t)idx[j]; + + // only non-NA ancestries are moved, if there is at least 1 choice + if (alpha[i] != NA_INTEGER && sum(t_inf < t_inf[i]) > 0) { + + // The local likelihood is defined as the likelihood computed for the + // cases affected by the swap; these include: + + // - 'i' + // - the descendents of 'i' + // - 'alpha[i]' + // - the descendents of 'alpha[i]' (other than 'i') + + local_cases = cpp_find_local_cases(param["alpha"], i+1); + + + // loglike with current parameters + + old_loglike = cpp_ll_all(data, param, local_cases, list_custom_ll); // offset + + + // proposal: swap case 'i' and its ancestor + + swapinfo = cpp_swap_cases(param, i+1); + new_param["alpha"] = swapinfo["alpha"]; + new_param["t_inf"] = swapinfo["t_inf"]; + new_param["kappa"] = swapinfo["kappa"]; + + + // loglike with new parameters + + new_loglike = cpp_ll_all(data, new_param, local_cases, list_custom_ll); + + + // acceptance term + + p_accept = exp(new_loglike - old_loglike); + + + // acceptance: change param only if new values is accepted + + if (p_accept >= unif_rand()) { // accept new parameters + param["alpha"] = new_param["alpha"]; + param["t_inf"] = new_param["t_inf"]; + param["kappa"] = new_param["kappa"]; + } + } + } + + return param; +} + + + + + + +// --------------------------- + + +// Movement of the number of generations on transmission chains ('kappa') is +// done for one ancestry at a time. As for infection times ('t_inf') we use a +// dumb, symmetric +/- 1 proposal. But because values are typically in a short +// range (e.g. [1-3]) we probably propose more dumb values here. We may +// eventually want to bounce back or use and correct for assymetric proposals. + +// [[Rcpp::export(rng = true)]] +Rcpp::List cpp_move_kappa(Rcpp::List param, Rcpp::List data, Rcpp::List config, + Rcpp::RObject list_custom_ll = R_NilValue) { + Rcpp::List new_param = clone(param); + Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha + Rcpp::IntegerVector kappa = param["kappa"]; // pointer to param$kappa + Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf + Rcpp::IntegerVector new_kappa = new_param["kappa"]; + + size_t N = static_cast<size_t>(data["N"]); + size_t K = static_cast<size_t>(config["max_kappa"]); + size_t jump; + + double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0; + + for (size_t i = 0; i < N; i++) { + + // only non-NA ancestries are moved + if (alpha[i] != NA_INTEGER) { + + // propose new kappa + jump = (unif_rand() > 0.5) ? 1 : -1; + new_kappa[i] = kappa[i] + jump; + + + // only look into this move if new kappa is positive and smaller than the + // maximum value; if not, remember to reset the value of new_kappa to that + // of kappa, otherwise we implicitely accept stupid moves automatically + + if (new_kappa[i] < 1 || new_kappa[i] > K) { + new_kappa[i] = kappa[i]; + } else { + + + // loglike with current parameters + old_loglike = cpp_ll_all(data, param, i+1, list_custom_ll); + + + // loglike with new parameters + new_loglike = cpp_ll_all(data, new_param, i+1, list_custom_ll); + + + // acceptance term + p_accept = exp(new_loglike - old_loglike); + + // acceptance: change param only if new values is accepted + if (p_accept >= unif_rand()) { // accept new parameters + // Rprintf("\naccepting kappa:%d (p: %f old ll: %f new ll: %f", + // new_kappa[i], p_accept, old_loglike, new_loglike); + kappa[i] = new_kappa[i]; + } else { + new_kappa[i] = kappa[i]; + } + } + } + + } + + return param; +}