Diff of /src/moves.cpp [000000] .. [dfe06d]

Switch to unified view

a b/src/moves.cpp
1
#include <Rcpp.h>
2
#include <Rmath.h>
3
#include "internals.h"
4
#include "likelihoods.h"
5
#include "priors.h"
6
7
8
9
// IMPORTANT: ON INDEXING VECTORS AND ANCESTRIES
10
11
// Most of the functions implemented here are susceptible to be called from R
12
// via Rcpp, and are therefore treated as interfaces. This causes a number of
13
// headaches when using indices of cases defined in R (1:N) to refer to elements
14
// in Rcpp / Cpp vectors (0:N-1). By convention, we store all data on the
15
// original scale (1:N), and modify indices whenever accessing elements of
16
// vectors. In other words, in an expression like 'alpha[j]', 'j' should always
17
// be on the internal scale (0:N-1).
18
19
// In all these functions, 'SEXP i' is an optional vector of case indices, on
20
// the 1:N scale.
21
22
23
24
25
26
27
// ---------------------------
28
29
// Movement of the mutation rate 'mu' is done using a dumb normal proposal. This
30
// is satisfying for now - we only reject a few non-sensical values outside the
31
// range [0;1]. The SD of the proposal (implicitely contained in rand$mu.rnorm1,
32
// but really provided through 'config', seems fine as the range of real values
33
// will never change much. Probably not much point in using auto-tuning here.
34
35
// [[Rcpp::export(rng = true)]]
36
Rcpp::List cpp_move_mu(Rcpp::List param, Rcpp::List data, Rcpp::List config,
37
               Rcpp::RObject custom_ll = R_NilValue,
38
               Rcpp::RObject custom_prior = R_NilValue) {
39
40
  // deep copy here for now, ultimately should be an arg.
41
  Rcpp::List new_param = clone(param);
42
  Rcpp::NumericVector mu = param["mu"];
43
  Rcpp::NumericVector new_mu = new_param["mu"];
44
45
  double sd_mu = static_cast<double>(config["sd_mu"]);
46
47
  double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0;
48
49
  // proposal (normal distribution with SD: config$sd_mu)
50
51
  new_mu[0] += R::rnorm(0.0, sd_mu); // new proposed value
52
53
54
  // automatic rejection of negative mu
55
56
  if (new_mu[0] < 0.0) {
57
    return param;
58
  }
59
60
61
  // compute likelihoods
62
  old_logpost = cpp_ll_genetic(data, param, R_NilValue, custom_ll);
63
  new_logpost = cpp_ll_genetic(data, new_param, R_NilValue, custom_ll);
64
65
66
  // compute priors
67
68
  old_logpost += cpp_prior_mu(param, config, custom_prior);
69
  new_logpost += cpp_prior_mu(new_param, config, custom_prior);
70
71
72
  // acceptance term
73
74
  p_accept = exp(new_logpost - old_logpost);
75
76
77
  // acceptance: the new value is already in mu, so we only act if the move is
78
  // rejected, in which case we restore the previous ('old') value
79
80
  if (p_accept < unif_rand()) { // reject new values
81
    return param;
82
  }
83
84
  return new_param;
85
}
86
87
88
89
90
91
92
// ---------------------------
93
94
// movement of the Reporting probability 'pi' is done using a dumb normal
95
// proposal. This is satisfying for now - we only reject a few non-sensical
96
// values outside the range [0;1]. The SD of the proposal (implicitely contained
97
// in rand$pi.rnorm1, but really provided through 'config', seems fine as the
98
// range of real values will never change much. Probably not much point in using
99
// auto-tuning here.
100
101
// [[Rcpp::export(rng = true)]]
102
Rcpp::List cpp_move_pi(Rcpp::List param, Rcpp::List data, Rcpp::List config,
103
               Rcpp::RObject custom_ll = R_NilValue,
104
               Rcpp::RObject custom_prior = R_NilValue) {
105
106
  // deep copy here for now, ultimately should be an arg.
107
108
  Rcpp::List new_param = clone(param);
109
  Rcpp::NumericVector pi = param["pi"]; // these are just pointers
110
  Rcpp::NumericVector new_pi = new_param["pi"]; // these are just pointers
111
112
  double sd_pi = static_cast<double>(config["sd_pi"]);
113
114
  double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0;
115
116
117
  // proposal (normal distribution with SD: config$sd_pi)
118
119
  new_pi[0] += R::rnorm(0.0, sd_pi); // new proposed value
120
121
122
  // automatic rejection of pi outside [0;1]
123
124
  if (new_pi[0] < 0.0 || new_pi[0] > 1.0) {
125
    return param;
126
  }
127
128
129
  // compute likelihoods
130
  old_logpost = cpp_ll_reporting(data, param, R_NilValue, custom_ll);
131
  new_logpost = cpp_ll_reporting(data, new_param, R_NilValue, custom_ll);
132
133
134
  // compute priors
135
136
  old_logpost += cpp_prior_pi(param, config, custom_prior);
137
  new_logpost += cpp_prior_pi(new_param, config, custom_prior);
138
139
140
  // acceptance term
141
142
  p_accept = exp(new_logpost - old_logpost);
143
144
145
  // acceptance: the new value is already in pi, so we only act if the move is
146
  // rejected, in which case we restore the previous ('old') value
147
148
  if (p_accept < unif_rand()) { // reject new values
149
    return param;
150
  }
151
152
  return new_param;
153
}
154
155
156
157
158
159
160
// ---------------------------
161
162
// movement of the contact reporting coverage 'eps' is done using a dumb normal
163
// proposal. This is satisfying for now - we only reject a few non-sensical
164
// values outside the range [0;1]. The SD of the proposal is provided through
165
// 'config'; this seems fine as the range of real values will never change
166
// much. Probably not much point in using auto-tuning here.
167
168
// [[Rcpp::export(rng = true)]]
169
Rcpp::List cpp_move_eps(Rcpp::List param, Rcpp::List data, Rcpp::List config,
170
               Rcpp::RObject custom_ll = R_NilValue,
171
               Rcpp::RObject custom_prior = R_NilValue) {
172
173
  // deep copy here for now, ultimately should be an arg.
174
175
  Rcpp::List new_param = clone(param);
176
  Rcpp::NumericVector eps = param["eps"]; // these are just pointers
177
  Rcpp::NumericVector new_eps = new_param["eps"]; // these are just pointers
178
179
  double sd_eps = static_cast<double>(config["sd_eps"]);
180
181
  double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0;
182
183
184
  // proposal (normal distribution with SD: config$sd_eps)
185
186
  new_eps[0] += R::rnorm(0.0, sd_eps); // new proposed value
187
188
189
  // automatic rejection of eps outside [0;1]
190
191
  if (new_eps[0] < 0.0 || new_eps[0] > 1.0) {
192
    return param;
193
  }
194
195
196
  // compute likelihoods
197
  old_logpost = cpp_ll_contact(data, param, R_NilValue, custom_ll);
198
  new_logpost = cpp_ll_contact(data, new_param, R_NilValue, custom_ll);
199
200
201
  // compute priors
202
203
  old_logpost += cpp_prior_eps(param, config, custom_prior);
204
  new_logpost += cpp_prior_eps(new_param, config, custom_prior);
205
206
207
  // acceptance term
208
209
  p_accept = exp(new_logpost - old_logpost);
210
211
212
  // acceptance: the new value is already in eps, so we only act if the move is
213
  // rejected, in which case we restore the previous ('old') value
214
215
  if (p_accept < unif_rand()) { // reject new values
216
    return param;
217
  }
218
219
  return new_param;
220
}
221
222
223
224
225
226
227
// ---------------------------
228
229
// movement of the non-infectious contact rate 'eps' is done using a dumb
230
// normal proposal. This is satisfying for now - we only reject a few
231
// non-sensical values outside the range [0;1]. The SD of the proposal is
232
// provided through 'config'; this seems fine as the range of real values will
233
// never change much. Probably not much point in using auto-tuning here.
234
235
// [[Rcpp::export(rng = true)]]
236
Rcpp::List cpp_move_lambda(Rcpp::List param, Rcpp::List data, Rcpp::List config,
237
               Rcpp::RObject custom_ll = R_NilValue,
238
               Rcpp::RObject custom_prior = R_NilValue) {
239
240
  // deep copy here for now, ultimately should be an arg.
241
242
  Rcpp::List new_param = clone(param);
243
  Rcpp::NumericVector lambda = param["lambda"]; // these are just pointers
244
  Rcpp::NumericVector new_lambda = new_param["lambda"]; // these are just pointers
245
246
  double sd_lambda = static_cast<double>(config["sd_lambda"]);
247
248
  double old_logpost = 0.0, new_logpost = 0.0, p_accept = 0.0;
249
250
251
  // proposal (normal distribution with SD: config$sd_lambda)
252
253
  new_lambda[0] += R::rnorm(0.0, sd_lambda); // new proposed value
254
255
256
  // automatic rejection of lambda outside [0;1]
257
258
  if (new_lambda[0] < 0.0 || new_lambda[0] > 1.0) {
259
    return param;
260
  }
261
262
263
  // compute likelihoods
264
  old_logpost = cpp_ll_contact(data, param, R_NilValue, custom_ll);
265
  new_logpost = cpp_ll_contact(data, new_param, R_NilValue, custom_ll);
266
267
268
  // compute priors
269
270
  old_logpost += cpp_prior_lambda(param, config, custom_prior);
271
  new_logpost += cpp_prior_lambda(new_param, config, custom_prior);
272
273
274
  // acceptance term
275
276
  p_accept = exp(new_logpost - old_logpost);
277
278
279
  // acceptance: the new value is already in lambda, so we only act if the move is
280
  // rejected, in which case we restore the previous ('old') value
281
282
  if (p_accept < unif_rand()) { // reject new values
283
    return param;
284
  }
285
286
  return new_param;
287
}
288
289
290
291
292
293
294
// ---------------------------
295
296
// Movement of infection dates are +/- 1 from current states. These movements
297
// are currently vectorised, i.e. a bunch of dates are proposed all together;
298
// this may not be sustainable for larger datasets. The non-vectorised option
299
// will be slower and speed-up with C/C++ will be more substantial then.
300
301
// This version differs from the initial R implementation in several points:
302
303
// 1. all cases are moved
304
// 2. cases are moved one by one
305
// 3. movement for each case is +/- 1 time unit
306
307
// Notes
308
309
// - when computing the timing log-likelihood, the descendents of each
310
// case are also affected.
311
312
// - we generate a new vector 'new_t_inf', which will replace the
313
// previous pointer defining param["t_inf"].
314
315
// [[Rcpp::export(rng = true)]]
316
Rcpp::List cpp_move_t_inf(Rcpp::List param, Rcpp::List data,
317
              Rcpp::RObject list_custom_ll = R_NilValue) {
318
319
  // deep copy here for now, ultimately should be an arg.
320
321
  Rcpp::List new_param = clone(param);
322
  Rcpp::IntegerVector t_inf = param["t_inf"];
323
  Rcpp::IntegerVector new_t_inf = new_param["t_inf"];
324
  Rcpp::IntegerVector alpha = param["alpha"];
325
  Rcpp::IntegerVector local_cases;
326
327
  size_t N = static_cast<size_t>(data["N"]);
328
329
  double old_loc_loglike = 0.0, new_loc_loglike = 0.0, p_loc_accept = 0.0;
330
331
332
  for (size_t i = 0; i < N; i++) {
333
334
    local_cases = cpp_find_descendents(param["alpha"], i+1);
335
336
    // loglike with current value
337
    old_loc_loglike = cpp_ll_timing(data, param, i+1, list_custom_ll); // term for case 'i' with offset
338
339
    // term descendents of 'i'
340
    if (local_cases.size() > 0) {
341
      old_loc_loglike += cpp_ll_timing(data, param, local_cases, list_custom_ll);
342
    }
343
344
    // proposal (+/- 1)
345
    new_t_inf[i] += unif_rand() > 0.5 ? 1 : -1; // new proposed value
346
347
    // loglike with new value
348
    new_loc_loglike = cpp_ll_timing(data, new_param, i+1, list_custom_ll); // term for case 'i' with offset
349
350
    // term descendents of 'i'
351
    if (local_cases.size() > 0) {
352
      new_loc_loglike += cpp_ll_timing(data, new_param, local_cases, list_custom_ll);
353
    }
354
355
356
    // acceptance term
357
    p_loc_accept = exp(new_loc_loglike - old_loc_loglike);
358
359
360
    // acceptance: the new value is already in t_inf, so we only act if the move
361
    // is rejected, in which case we restore the previous ('old') value
362
363
    if (p_loc_accept < unif_rand()) { // reject new values
364
      new_t_inf[i] = t_inf[i];
365
    }
366
  }
367
368
  return new_param;
369
370
}
371
372
373
374
375
376
377
// ---------------------------
378
379
// Movement of ancestries ('alpha') is not vectorised, movements are made one
380
// case at a time. This procedure is simply about picking an infector at random
381
// amongst cases preceeding the case considered. The original version in
382
// 'outbreaker' used to move simultaneously 'alpha', 'kappa' and 't_inf', but
383
// current implementation is simpler and seems to mix at least as well. Proper
384
// movement of 'alpha' needs this procedure as well as a swapping procedure
385
// (swaps are not possible through move.alpha only); in all instances, 'alpha'
386
// is on the scale 1:N.
387
388
// [[Rcpp::export(rng = true)]]
389
Rcpp::List cpp_move_alpha(Rcpp::List param, Rcpp::List data,
390
              Rcpp::RObject list_custom_ll = R_NilValue) {
391
  Rcpp::List new_param = clone(param);
392
  Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha
393
  Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf
394
  Rcpp::IntegerVector new_alpha = new_param["alpha"];
395
396
  size_t N = static_cast<size_t>(data["N"]);
397
398
  double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0;
399
400
401
  for (size_t i = 0; i < N; i++) {
402
403
    // only non-NA ancestries are moved, if there is at least 1 choice
404
    if (alpha[i] != NA_INTEGER && sum(t_inf < t_inf[i]) > 0) {
405
406
      // loglike with current value
407
      // old_loglike = cpp_ll_all(data, param, R_NilValue);
408
      old_loglike = cpp_ll_all(data, param, i+1, list_custom_ll); // offset
409
410
      // proposal (+/- 1)
411
      new_alpha[i] = cpp_pick_possible_ancestor(t_inf, i+1); // new proposed value (on scale 1 ... N)
412
413
      // loglike with current value
414
      new_loglike = cpp_ll_all(data, new_param, i+1, list_custom_ll);
415
416
      // acceptance term
417
      p_accept = exp(new_loglike - old_loglike);
418
419
      // which case we restore the previous ('old') value
420
      if (p_accept < unif_rand()) { // reject new values
421
    new_alpha[i] = alpha[i];
422
      } else {
423
    alpha[i] = new_alpha[i];
424
      }
425
    }
426
  }
427
428
  return new_param;
429
}
430
431
432
433
434
435
436
// ---------------------------
437
438
// The basic movement of ancestries (picking an ancestor at random amongst in
439
// previous cases) makes swaps of ancestries (A->B) to (B->A) very
440
// difficult. This function addresses the issue. It is computer-intensive, but
441
// likely a determining factor for faster mixing. Unlike previous versions in
442
// the original 'outbreaker' package, all cases are 'moved' here. A swap is
443
// defined as:
444
445
// x -> a -> b  becomes a -> x -> b
446
447
// Obviously cases are moved one at a time. We need to use local likelihood
448
// changes for this move to scale well with outbreak size. The complicated bit
449
// is that the move impacts all descendents from 'a' as well as 'x'.
450
451
// [[Rcpp::export(rng = true)]]
452
Rcpp::List cpp_move_swap_cases(Rcpp::List param, Rcpp::List data,
453
                   Rcpp::RObject list_custom_ll = R_NilValue) {
454
455
  Rcpp::List new_param = clone(param);
456
  Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha
457
  Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf
458
  Rcpp::List swapinfo; // contains alpha and t_inf
459
  Rcpp::IntegerVector local_cases;
460
461
  size_t N = static_cast<size_t>(data["N"]);
462
  int N_int = data["N"];
463
  
464
  double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0;
465
466
  // Shuffle indices to make equal cases equally likely
467
  Rcpp::IntegerVector idx = Rcpp::seq(0, N-1);
468
  idx = Rcpp::sample(idx, N_int, false);
469
470
  for (size_t j = 0; j < N; ++j) {
471
    size_t i = (size_t)idx[j];
472
473
    // only non-NA ancestries are moved, if there is at least 1 choice
474
    if (alpha[i] != NA_INTEGER && sum(t_inf < t_inf[i]) > 0) {
475
476
      // The local likelihood is defined as the likelihood computed for the
477
      // cases affected by the swap; these include:
478
479
      // - 'i'
480
      // - the descendents of 'i'
481
      // - 'alpha[i]'
482
      // - the descendents of 'alpha[i]' (other than 'i')
483
484
      local_cases = cpp_find_local_cases(param["alpha"], i+1);
485
486
487
      // loglike with current parameters
488
489
      old_loglike = cpp_ll_all(data, param, local_cases, list_custom_ll); // offset
490
491
492
      // proposal: swap case 'i' and its ancestor
493
494
      swapinfo = cpp_swap_cases(param, i+1);
495
      new_param["alpha"] = swapinfo["alpha"];
496
      new_param["t_inf"] = swapinfo["t_inf"];
497
      new_param["kappa"] = swapinfo["kappa"];
498
499
500
      // loglike with new parameters
501
502
      new_loglike = cpp_ll_all(data, new_param, local_cases, list_custom_ll);
503
504
505
      // acceptance term
506
507
      p_accept = exp(new_loglike - old_loglike);
508
509
510
      // acceptance: change param only if new values is accepted
511
512
      if (p_accept >= unif_rand()) { // accept new parameters
513
    param["alpha"] = new_param["alpha"];
514
    param["t_inf"] = new_param["t_inf"];
515
    param["kappa"] = new_param["kappa"];
516
      }
517
    }
518
  }
519
520
  return param;
521
}
522
523
524
525
526
527
528
// ---------------------------
529
530
531
// Movement of the number of generations on transmission chains ('kappa') is
532
// done for one ancestry at a time. As for infection times ('t_inf') we use a
533
// dumb, symmetric +/- 1 proposal. But because values are typically in a short
534
// range (e.g. [1-3]) we probably propose more dumb values here. We may
535
// eventually want to bounce back or use and correct for assymetric proposals.
536
537
// [[Rcpp::export(rng = true)]]
538
Rcpp::List cpp_move_kappa(Rcpp::List param, Rcpp::List data, Rcpp::List config,
539
              Rcpp::RObject list_custom_ll = R_NilValue) {
540
  Rcpp::List new_param = clone(param);
541
  Rcpp::IntegerVector alpha = param["alpha"]; // pointer to param$alpha
542
  Rcpp::IntegerVector kappa = param["kappa"]; // pointer to param$kappa
543
  Rcpp::IntegerVector t_inf = param["t_inf"]; // pointer to param$t_inf
544
  Rcpp::IntegerVector new_kappa = new_param["kappa"];
545
546
  size_t N = static_cast<size_t>(data["N"]);
547
  size_t K = static_cast<size_t>(config["max_kappa"]);
548
  size_t jump;
549
550
  double old_loglike = 0.0, new_loglike = 0.0, p_accept = 0.0;
551
552
  for (size_t i = 0; i < N; i++) {
553
554
    // only non-NA ancestries are moved
555
    if (alpha[i] != NA_INTEGER) {
556
557
      // propose new kappa
558
      jump = (unif_rand() > 0.5) ? 1 : -1;
559
      new_kappa[i] = kappa[i] + jump;
560
561
562
      // only look into this move if new kappa is positive and smaller than the
563
      // maximum value; if not, remember to reset the value of new_kappa to that
564
      // of kappa, otherwise we implicitely accept stupid moves automatically
565
566
      if (new_kappa[i] < 1 || new_kappa[i] > K) {
567
    new_kappa[i] = kappa[i];
568
      } else {
569
570
571
    // loglike with current parameters
572
    old_loglike = cpp_ll_all(data, param, i+1, list_custom_ll);
573
574
    
575
    // loglike with new parameters
576
    new_loglike = cpp_ll_all(data, new_param, i+1, list_custom_ll);
577
578
579
    // acceptance term
580
    p_accept = exp(new_loglike - old_loglike);
581
582
    // acceptance: change param only if new values is accepted
583
    if (p_accept >= unif_rand()) { // accept new parameters
584
      //       Rprintf("\naccepting kappa:%d  (p: %f  old ll:  %f  new ll: %f",
585
      //        new_kappa[i], p_accept, old_loglike, new_loglike);
586
      kappa[i] = new_kappa[i];
587
    } else {
588
      new_kappa[i] = kappa[i];
589
    }
590
      }
591
    }
592
593
  }
594
595
  return param;
596
}