Diff of /src/internals.cpp [000000] .. [dfe06d]

Switch to unified view

a b/src/internals.cpp
1
2
#include "internals.h"
3
4
5
6
// IMPORTANT: ON INDEXING VECTORS AND ANCESTRIES
7
8
// Most of the functions implemented here are susceptible to be called from R
9
// via Rcpp, and are therefore treated as interfaces. This causes a number of
10
// headaches when using indices of cases defined in R (1:N) to refer to elements
11
// in Rcpp / Cpp vectors (0:N-1). By convention, we store all data on the
12
// original scale (1:N), and modify indices whenever accessing elements of
13
// vectors. In other words, in an expression like 'alpha[j]', 'j' should always
14
// be on the internal scale (0:N-1).
15
16
// In all these functions, 'SEXP i' is an optional vector of case indices, on
17
// the 1:N scale.
18
19
20
21
22
23
// ---------------------------
24
25
//   This function returns a vector of indices of cases which could be infector
26
//   of 'i' (i.e., their infection dates preceed that of 'i'). Only tricky bit
27
//   here is keep in mind that 't_inf' is indexed from 0 to N-1, while 'i' and
28
//   'alpha' (ancestors) are values from 1 to N.
29
30
//   Original R code:
31
32
// are.possible.alpha <- function(t_inf, i) {
33
//     if (length(i)>1) {
34
//         stop("i has a length > 1")
35
//     }
36
//     if (any(t_inf[i]==min(t_inf))) {
37
//         return(NA)
38
//     }
39
//     return(which(t_inf < t_inf[i[1]]))
40
// }
41
42
// [[Rcpp::export()]]
43
std::vector<int> cpp_are_possible_ancestors(Rcpp::IntegerVector t_inf, size_t i) {
44
  size_t n = t_inf.size();
45
  std::vector<int> out;
46
  out.reserve(n);
47
  for (size_t j = 0; j < n; j++) {
48
    if (t_inf[j] < t_inf[i-1]) { // offset
49
      out.push_back(j+1); // +1 needed for range 1 ... N
50
    }
51
  }
52
  return out;
53
}
54
55
56
57
58
59
// ---------------------------
60
61
//  This function samples a single value from a vector of integers.
62
63
// [[Rcpp::export()]]
64
size_t cpp_sample1(std::vector<int> x) {
65
  if (x.size() < 1) {
66
    Rcpp::Rcerr << "Trying to sample from empty vector" << std::endl;
67
    Rcpp::stop("Trying to sample from empty vector");
68
  }
69
70
  return x[unif_rand() * x.size()];
71
}
72
73
74
75
76
// ---------------------------
77
78
//    This function choose a possible infector for case 'i'; 'i' is on the scale
79
//    1:N
80
81
//    Original R version:
82
83
// .choose.possible.alpha <- function(t_inf, i) {
84
//     return(sample(are.possible.alpha(t_inf=t_inf, i=i), 1))
85
// }
86
87
// [[Rcpp::export()]]
88
size_t cpp_pick_possible_ancestor(Rcpp::IntegerVector t_inf, size_t i) {
89
  return cpp_sample1(cpp_are_possible_ancestors(t_inf, i));
90
}
91
92
93
94
95
96
97
// ---------------------------
98
99
// This function returns the descendents of a given case 'i' in the current
100
// ancestries; 'i' is on the scale 1:N. The output is also on the scale 1:N.
101
102
// Original R version:
103
104
// find.descendents <- function(param, i) {
105
//   ## find descendents
106
//     which(param.current$alpha==i)
107
//  }
108
109
// [[Rcpp::export()]]
110
Rcpp::IntegerVector cpp_find_descendents(Rcpp::IntegerVector alpha, size_t i) {
111
  size_t counter = 0, n = 0;
112
113
  // determine size of output vector and create it
114
  for (size_t j = 0; j < alpha.size(); j++) {
115
    if (alpha[j] == i) n++;
116
  }
117
  
118
  Rcpp::IntegerVector out(n);
119
120
  // fill in output vector
121
  for (size_t j = 0; j < alpha.size(); j++) {
122
    if (alpha[j] == i) {
123
      out[counter++] = j + 1; // offset
124
    }
125
  }
126
  return out;
127
}
128
129
130
131
132
133
134
135
// ---------------------------
136
137
// This function returns a vector of indices of cases which are 'local' to a
138
// case 'i'. Locality is defined as the following set of cases:
139
140
// - 'i'
141
// - the descendents of 'i'
142
// - 'alpha[i-1]'
143
// - the descendents of 'alpha[i]' (excluding 'i')
144
145
// where 'alpha' is a IntegerVector storing ancestries. Note that 'i' and
146
// 'alpha' are on the scale 1:N. 
147
148
// [[Rcpp::export()]]
149
Rcpp::IntegerVector cpp_find_local_cases(Rcpp::IntegerVector alpha, size_t i) {
150
  // determine descendents of 'i':
151
  Rcpp::IntegerVector desc_i = cpp_find_descendents(alpha, i);
152
  size_t n = desc_i.size() + 1; // +1 is to count 'i' itself
153
  
154
  // determine descendents of 'alpha[i]':
155
  Rcpp::IntegerVector desc_alpha_i = cpp_find_descendents(alpha,
156
                              (size_t) alpha[i-1]);
157
  if (alpha[i-1] != NA_INTEGER) {
158
    n += desc_alpha_i.size();
159
  }
160
161
  // create output
162
  Rcpp::IntegerVector out(n);
163
  size_t counter = 0;
164
165
  // 'i'
166
  out[counter++] = i;
167
168
  // 'descendents of 'i'
169
  for (size_t j = 0; j < desc_i.size(); j++) {
170
    out[counter++] = desc_i[j];
171
  }
172
  
173
  if (alpha[i-1] != NA_INTEGER) {
174
    // alpha[i-1] ...
175
    out[counter++] = alpha[i-1];
176
    
177
    // ... and its descendents
178
    for (size_t j = 0; j < desc_alpha_i.size(); j++) {
179
      if ( desc_alpha_i[j] != i) {
180
    out[counter++] = desc_alpha_i[j];
181
      }
182
    }
183
  }
184
185
  return out;
186
}
187
188
189
190
191
192
193
194
195
// ---------------------------
196
197
// This function swaps cases in a transmission tree. The focus case is 'i', and
198
// is swapped with its ancestor 'x=alpha[i-1]'. In other words the change is
199
// from: x -> i to i -> x
200
// Involved changes are:
201
202
// - descendents of 'i' become descendents of 'x'
203
// - descendents of 'x' become descendents of 'i'
204
// - the infector if 'i' becomes the infector of 'x' (i.e. alpha[x-1])
205
// - the infector if 'x' becomes 'i'
206
// - infection time of 'i' becomes that of 'x'
207
// - infection time of 'x' becomes that of 'i'
208
209
// Note on indexing: 'i', 'x', and values of alpha are on the scale 1:N. The
210
// function's output is a list with new alpha and t_inf.
211
212
// Note on forbidden swaps: two types of swaps are excluded:
213
// - 'i' is imported, so that 'alpha[i-1]' is NA_INTEGER
214
// - 'x' is imported, so that 'alpha[x-1]' is NA_INTEGER
215
216
// [[Rcpp::export()]]
217
Rcpp::List cpp_swap_cases(Rcpp::List param, size_t i) {
218
  Rcpp::IntegerVector alpha_in = param["alpha"];
219
  Rcpp::IntegerVector t_inf_in = param["t_inf"];
220
  Rcpp::IntegerVector kappa_in = param["kappa"];
221
  Rcpp::IntegerVector alpha_out = clone(alpha_in);
222
  Rcpp::IntegerVector t_inf_out = clone(t_inf_in);
223
  Rcpp::IntegerVector kappa_out = clone(kappa_in);
224
  Rcpp::List out;
225
  out["alpha"] = alpha_out;
226
  out["t_inf"] = t_inf_out;
227
  out["kappa"] = kappa_out;
228
      
229
  size_t N = alpha_in.size();
230
  
231
  // escape if the case is imported, i.e. alpha[i-1] is NA
232
  
233
  if (alpha_in[i-1] == NA_INTEGER) {
234
    return out;
235
  }
236
237
238
  // escape if ancestor of the case is imported, i.e. alpha[x-1] is NA
239
  
240
  size_t x = (size_t) alpha_in[i-1];
241
  //if (alpha_in[x-1] == NA_INTEGER) {
242
  // return out;
243
  //}
244
  
245
 
246
  // replace ancestries:
247
  // - descendents of 'i' become descendents of 'x'
248
  // - descendents of 'x' become descendents of 'i'
249
250
  for (size_t j = 0; j < N; j++) {
251
    if (alpha_in[j] == i) {
252
      alpha_out[j] = x;
253
    } else if (alpha_in[j] == x) {
254
      alpha_out[j] = i;
255
    }
256
  }
257
258
259
  // the ancestor of 'i' becomes an ancestor of 'x'
260
261
  alpha_out[i-1] = alpha_in[x-1];
262
263
  
264
  // 'i' is now the ancestor of 'x'
265
  alpha_out[x-1] = i;
266
  
267
268
  // swap infections times of 'i' and 'x'
269
  t_inf_out[i-1] =   t_inf_in[x-1];
270
  t_inf_out[x-1] =   t_inf_in[i-1];
271
272
  kappa_out[i-1] =   kappa_in[x-1];
273
  kappa_out[x-1] =   kappa_in[i-1];
274
275
276
  return out;
277
}
278
279
280
281
282
283
284
// ---------------------------
285
286
// This function returns the number of mutations between two cases from a 'data'
287
// object. It uses the indexing of cases in the DNA matrix to ensure
288
// correspondance between cases and their sequences (not all cases may have a
289
// sequence). 
290
291
// i and j are indices of cases on the scale 1:N; note that the vectors and
292
// matrices are indexed on 0:(N-1).
293
294
// [[Rcpp::export()]]
295
size_t cpp_get_n_mutations(Rcpp::List data, size_t i, size_t j) {
296
  Rcpp::LogicalVector has_dna = data["has_dna"];
297
  Rcpp::IntegerVector id_in_dna = data["id_in_dna"];
298
  Rcpp::IntegerMatrix D = data["D"];
299
300
  
301
  // Ideally we should return NA_integer here, but then the type of the function
302
  // should be a Rcpp::IntegerVector, which would complicate things. The second
303
  // best thing we can do here really is to issue an error when trying to get
304
  // number of mutations between cases with missing sequences.
305
  
306
  if (!(has_dna[i-1] && has_dna[j-1])) {
307
    Rcpp::stop("Trying to get genetic distances between missing sequences.");
308
  } 
309
310
  size_t out = D(id_in_dna[i-1] - 1, id_in_dna[j-1] - 1);
311
312
  return out;
313
  
314
}
315
316
317
318
319
320
321
// ---------------------------
322
323
// This function looks up a transmission chain to find the most recent ancestor
324
// with a sequence, for a given case 'i'. It stops at two conditions: i) it
325
// finds a sequenced ancestor, or ii) the current ancestor is 'NA'. It returns a
326
// List with three values: i) the index of the most recent ancestor (on the
327
// scale 1:N), ii) the total number of generations between this case and 'i',
328
// and iii) a logical 'found_sequenced_ancestor'. If the latter is FALSE, then
329
// previous values are 'NA_INTEGER'.
330
331
// This is the exported interface. It calls upon a non-exported function
332
// (lookup_sequenced_ancestor) which does not make memory allocation for the
333
// output, but instead modifies one of its arguments. This trade-off pays as it
334
// allows for unit testing via the interface, but remains quite fast as the
335
// non-exported function can be used internally. "i" is indexed on 1:N.
336
337
// [[Rcpp::export()]]
338
Rcpp::List cpp_lookup_sequenced_ancestor(Rcpp::List data, Rcpp::List param, size_t i) {
339
 
340
  Rcpp::IntegerVector alpha = param["alpha"];
341
  Rcpp::IntegerVector kappa = param["kappa"];
342
  Rcpp::LogicalVector has_dna = data["has_dna"];
343
  
344
  Rcpp::List out;
345
  Rcpp::IntegerVector out_ances(1);
346
  Rcpp::IntegerVector out_n_generations(1);
347
  Rcpp::LogicalVector out_found_sequenced_ancestor(1);
348
  out["alpha"] = out_ances;
349
  out["n_generations"] = out_n_generations;
350
  out["found_sequenced_ancestor"] = out_found_sequenced_ancestor;
351
  
352
  size_t ances[1];
353
  size_t n_generations[1];
354
  bool found_sequenced_ancestor[1];
355
356
  ances[0] = NA_INTEGER;
357
  n_generations[0] = NA_INTEGER;
358
  found_sequenced_ancestor[0] = false;
359
360
  // This function modifies its last argument
361
  lookup_sequenced_ancestor(alpha, kappa, has_dna, i, // inputs
362
                ances, n_generations, 
363
                found_sequenced_ancestor); // outputs
364
365
366
  out_ances[0] = static_cast<int>(ances[0]);
367
  out_n_generations[0] =  static_cast<int>(n_generations[0]); 
368
  out_found_sequenced_ancestor[0] = found_sequenced_ancestor[0];
369
370
  return out;
371
}
372
373
374
375
376
377
378
// ---------------------------
379
380
// This function is the internal version of cpp_lookup_sequenced_ancestor. It is
381
// not meant to be called by users, only by internal procedures, as it modifies
382
// the content of its last argument rather than creating a new object, which is
383
// obviously dangerous. Only use it carefully if you handled the creating of its
384
// last argument 'out'. 'out_' are technically outputs with three components:
385
// "ances" (IntegerVector of size 1), "n_generations" (same), and
386
// "found_sequenced_ancestor" (LogicalVector of length 1). "i" is indexed on
387
// 1:N.
388
389
void lookup_sequenced_ancestor(Rcpp::IntegerVector alpha, Rcpp::IntegerVector kappa, 
390
                   Rcpp::LogicalVector has_dna, size_t i, 
391
                   size_t *out_alpha, 
392
                   size_t *out_n_generations, 
393
                   bool *out_found_sequenced_ancestor
394
                   ) {
395
396
  if (!has_dna[i - 1] || alpha[i - 1] == NA_INTEGER) {
397
    return;
398
  }
399
  
400
 
401
  size_t current_case = i; // this one is indexed on 1:N
402
  size_t n_generations = kappa[current_case - 1];
403
  bool ances_has_dna = has_dna[alpha[current_case - 1] - 1]; // offset for indexing vectors
404
    
405
406
  // look recursively for ancestor with sequence if needed
407
        
408
  while (!ances_has_dna && (alpha[current_case - 1] != NA_INTEGER)) {
409
    current_case = alpha[current_case - 1]; // 1 step back up the transmission chain
410
    ances_has_dna = (alpha[current_case - 1] != NA_INTEGER) && // need to test for NA *first*
411
      has_dna[alpha[current_case - 1] - 1]; // offset for indexing vectors
412
    n_generations += kappa[current_case - 1];
413
  }
414
415
416
  // change outputs as needed
417
418
  
419
  if (ances_has_dna) {
420
      out_alpha[0] =  alpha[current_case - 1];
421
      out_n_generations[0] = n_generations;
422
      out_found_sequenced_ancestor[0] = true;
423
  } else {
424
    out_alpha[0] = NA_INTEGER;
425
    out_n_generations[0] = NA_INTEGER;
426
    out_found_sequenced_ancestor[0] = false;
427
  }
428
  
429
}
430
431