--- a
+++ b/src/likelihoods.cpp
@@ -0,0 +1,619 @@
+#include <Rmath.h>
+#include <Rcpp.h>
+#include "internals.h"
+#include "likelihoods.h"
+
+
+// IMPORTANT: ON INDEXING VECTORS AND ANCESTRIES
+
+// Most of the functions implemented here are susceptible to be called from R
+// via Rcpp, and are therefore treated as interfaces. This causes a number of
+// headaches when using indices of cases defined in R (1:N) to refer to elements
+// in Rcpp / Cpp vectors (0:N-1). By convention, we store all data on the
+// original scale (1:N), and modify indices whenever accessing elements of
+// vectors. In other words, in an expression like 'alpha[j]', 'j' should always
+// be on the internal scale (0:N-1).
+
+// In all these functions, 'SEXP i' is an optional vector of case indices, on
+// the 1:N scale.
+
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the probability of observing a number of
+// mutations between cases and their ancestors. See src/likelihoods.cpp for
+// details of the Rcpp implmentation.
+
+// The likelihood is based on the number of mutations between a case and its
+// ancestor; these are extracted from a pairwise genetic distance matrix
+// (data$D) the log-likelihood is computed as: sum(mu^nmut + (1-mu)^(L-nmut))
+// with:
+
+// 'mu' is the mutation probability
+// 'L' the number of sites in the alignment
+// 'n_mut' the number of mutations between an ancestor and its descendent
+// 'n_non_mut' the number of sites that have not mutated
+
+// For any given case at 'nmut' mutations from its ancestor, with kappa
+// generations in between, the log-likelihood is defined as:
+
+// log(mu) * n_mut + log(1 - mu) * {(L - n_mut) + (L * (kappa-1))}
+
+
+// when summing over several individuals, it becomes:
+
+// log(mu) * sum_i(n_mut_i) + log(1-mu) * sum_i((L - n_mut_i) + (L * (kappa_i - 1)))
+
+double cpp_ll_genetic(Rcpp::List data, Rcpp::List param, SEXP i,
+		      Rcpp::RObject custom_function) {
+  Rcpp::IntegerMatrix D = data["D"];
+  if (D.ncol() < 1) return 0.0;
+
+  size_t N = static_cast<size_t>(data["N"]);
+  if (N < 2) return 0.0;
+
+  Rcpp::List l;
+  if (custom_function != R_NilValue) {
+    l = Rcpp::as<Rcpp::List>(custom_function);
+  }
+  if (custom_function == R_NilValue || (l.size() > 0 && l[0] == R_NilValue)) {
+
+    // Variables from the data & param
+    Rcpp::NumericMatrix w_dens = data["log_w_dens"];
+    size_t K = w_dens.nrow();
+    double mu = Rcpp::as<double>(param["mu"]);
+    long int L = Rcpp::as<int>(data["L"]);
+    Rcpp::IntegerVector alpha = param["alpha"]; // values are on 1:N
+    Rcpp::IntegerVector kappa = param["kappa"];
+    Rcpp::LogicalVector has_dna = data["has_dna"];
+
+
+    // Local variables used for computatoins
+    size_t n_mut = 0;
+    size_t n_non_mut = 0;
+    double out = 0;
+    bool found[1];
+    size_t ances[1];
+    size_t n_generations[1];
+    found[0] = false;
+    ances[0] = NA_INTEGER;
+    n_generations[0] = NA_INTEGER;
+
+  
+    // Invalid values of mu
+    if (mu < 0.0 || mu > 1.0) {
+      return R_NegInf;
+    }
+
+    
+    // NOTE ON MISSING SEQUENCES
+
+    // Terms participating to the genetic likelihood correspond to pairs
+    // of ancestor-descendent which have a genetic sequence. The
+    // log-likelihood of other pairs is 0.0, and can therefore be
+    // ommitted. Note the possible source of confusion in indices here:
+
+    // 'has_dna' is a vector, thus indexed from 0:(N-1)
+	  
+    // 'cpp_get_n_mutations' is a function, and thus takes indices on 1:N
+
+
+    
+    // all cases are retained
+    
+    if (i == R_NilValue) {
+      for (size_t j = 0; j < N; j++) { // 'j' on 0:(N-1)
+	if (alpha[j] != NA_INTEGER) {
+
+	  // kappa restriction
+
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return R_NegInf;
+	  }
+
+	  // missing sequences handled here
+	  
+	  if (has_dna[j]) {
+	 
+	    lookup_sequenced_ancestor(alpha, kappa, has_dna, j + 1,
+				      ances, n_generations, found);
+
+	    if (found[0]) {
+
+	      n_mut = cpp_get_n_mutations(data, j + 1, ances[0]); // remember the offset
+	      n_non_mut = L - n_mut;
+
+	      out += n_mut*log(n_generations[0]*mu) +
+		n_non_mut*log(1 - n_generations[0]*mu);
+		
+	    }
+	  }
+	}
+      }
+
+    } else {
+      // only the cases listed in 'i' are retained
+      size_t length_i = static_cast<size_t>(LENGTH(i));
+      Rcpp::IntegerVector vec_i(i);
+      for (size_t k = 0; k < length_i; k++) {
+	size_t j = vec_i[k] - 1; // offset
+	if (alpha[j] != NA_INTEGER) {
+	  // kappa restriction
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return R_NegInf;
+	  }
+
+	  // missing sequences handled here
+	  	  
+	  if (has_dna[j]) {
+	 
+	    lookup_sequenced_ancestor(alpha, kappa, has_dna, j + 1, 
+				      ances, n_generations, found);
+
+	    if (found[0]) {
+
+	      n_mut = cpp_get_n_mutations(data, j + 1, ances[0]); // remember the offset
+	      n_non_mut = L - n_mut;
+	      
+	      out += n_mut*log(n_generations[0]*mu) +
+		n_non_mut*log(1 - n_generations[0]*mu);
+	      
+	    }
+	  }
+	  
+	}
+
+      }
+    }
+
+    return(out);
+    
+  } else { // use of a customized likelihood function
+    Rcpp::Function f = Rcpp::as<Rcpp::Function>(l[0]);
+    int arity = l[1];
+    if (arity == 3) return Rcpp::as<double>(f(data, param, i));
+    return Rcpp::as<double>(f(data, param));
+  }
+}
+
+
+double cpp_ll_genetic(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_genetic(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the probability of observing infection dates
+// of cases given the infection dates of their ancestors.
+
+double cpp_ll_timing_infections(Rcpp::List data, Rcpp::List param, SEXP i,
+				Rcpp::RObject custom_function) {
+  size_t N = static_cast<size_t>(data["N"]);
+  if(N < 2) return 0.0;
+
+  Rcpp::List l;
+  if (custom_function != R_NilValue) {
+    l = Rcpp::as<Rcpp::List>(custom_function);
+  }
+  if (custom_function == R_NilValue || (l.size() > 0 && l[0] == R_NilValue)) {
+
+    Rcpp::IntegerVector alpha = param["alpha"];
+    Rcpp::IntegerVector t_inf = param["t_inf"];
+    Rcpp::IntegerVector kappa = param["kappa"];
+    Rcpp::NumericMatrix w_dens = data["log_w_dens"];
+    size_t K = w_dens.nrow();
+
+    double out = 0.0;
+
+    // all cases are retained
+    if (i == R_NilValue) {
+      for (size_t j = 0; j < N; j++) {
+	if (alpha[j] != NA_INTEGER) {
+	  size_t delay = t_inf[j] - t_inf[alpha[j] - 1]; // offset
+	  if (delay < 1 || delay > w_dens.ncol()) {
+	    return  R_NegInf;
+	  }
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return  R_NegInf;
+	  }
+
+	  out += w_dens(kappa[j] - 1, delay - 1);
+	}
+      }
+    } else {
+      // only the cases listed in 'i' are retained
+      size_t length_i = static_cast<size_t>(LENGTH(i));
+      Rcpp::IntegerVector vec_i(i);
+      for (size_t k = 0; k < length_i; k++) {
+	size_t j = vec_i[k] - 1; // offset
+	if (alpha[j] != NA_INTEGER) {
+	  size_t delay = t_inf[j] - t_inf[alpha[j] - 1]; // offset
+	  if (delay < 1 || delay > w_dens.ncol()) {
+	    return  R_NegInf;
+	  }
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return  R_NegInf;
+	  }
+
+	  out += w_dens(kappa[j] - 1, delay - 1);
+	}
+
+      }
+    }
+
+    return out;
+  } else { // use of a customized likelihood function
+    Rcpp::Function f = Rcpp::as<Rcpp::Function>(l[0]);
+    int arity = l[1];
+    if (arity == 3) return Rcpp::as<double>(f(data, param, i));
+    return Rcpp::as<double>(f(data, param));
+  }
+}
+
+
+double cpp_ll_timing_infections(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_timing_infections(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the probability of reporting dates of cases
+// given their infection dates.
+
+double cpp_ll_timing_sampling(Rcpp::List data, Rcpp::List param, SEXP i,
+			      Rcpp::RObject custom_function) {
+  size_t N = static_cast<size_t>(data["N"]);
+  if(N < 2) return 0.0;
+
+  Rcpp::List l;
+  if (custom_function != R_NilValue) {
+    l = Rcpp::as<Rcpp::List>(custom_function);
+  }
+  if (custom_function == R_NilValue || (l.size() > 0 && l[0] == R_NilValue)) {
+
+    Rcpp::IntegerVector dates = data["dates"];
+    Rcpp::IntegerVector t_inf = param["t_inf"];
+    Rcpp::NumericVector f_dens = data["log_f_dens"];
+
+    double out = 0.0;
+
+    // all cases are retained
+    if (i == R_NilValue) {
+      for (size_t j = 0; j < N; j++) {
+	size_t delay = dates[j] - t_inf[j];
+	if (delay < 1 || delay > f_dens.size()) {
+	  return  R_NegInf;
+	}
+	out += f_dens[delay - 1];
+      }
+    } else {
+      // only the cases listed in 'i' are retained
+      size_t length_i = static_cast<size_t>(LENGTH(i));
+      Rcpp::IntegerVector vec_i(i);
+      for (size_t k = 0; k < length_i; k++) {
+	size_t j = vec_i[k] - 1; // offset
+	size_t delay = dates[j] - t_inf[j];
+	if (delay < 1 || delay > f_dens.size()) {
+	  return  R_NegInf;
+	}
+	out += f_dens[delay - 1];
+      }
+    }
+
+    return out;
+  }  else { // use of a customized likelihood function
+    Rcpp::Function f = Rcpp::as<Rcpp::Function>(l[0]);
+    int arity = l[1];
+    if (arity == 3) return Rcpp::as<double>(f(data, param, i));
+    return Rcpp::as<double>(f(data, param));
+  }
+}
+
+
+double cpp_ll_timing_sampling(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_timing_sampling(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the probability of a given number of
+// unreported cases on an ancestry.
+
+// The likelihood is given by a geometric distribution with probability 'pi'
+// to report a case
+
+// - 'kappa' is the number of generation between two successive cases
+// - 'kappa-1' is the number of unreported cases
+
+double cpp_ll_reporting(Rcpp::List data, Rcpp::List param, SEXP i,
+			Rcpp::RObject custom_function) {
+  Rcpp::NumericMatrix w_dens = data["log_w_dens"];
+  size_t K = w_dens.nrow();
+
+  size_t N = static_cast<size_t>(data["N"]);
+  if(N < 2) return 0.0;
+
+  double pi = static_cast<double>(param["pi"]);
+  Rcpp::IntegerVector kappa = param["kappa"];
+
+  // p(pi < 0) = p(pi > 1) = 0
+  if (pi < 0.0 || pi > 1.0) {
+    return R_NegInf;
+  }
+
+  Rcpp::List l;
+  if (custom_function != R_NilValue) {
+    l = Rcpp::as<Rcpp::List>(custom_function);
+  }
+  if (custom_function == R_NilValue || (l.size() > 0 && l[0] == R_NilValue)) {
+
+    double out = 0.0;
+
+    // all cases are retained
+    if (i == R_NilValue) {
+      for (size_t j = 0; j < N; j++) {
+	if (kappa[j] != NA_INTEGER) {
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return  R_NegInf;
+	  }
+	  out += R::dgeom(kappa[j] - 1.0, pi, 1); // first arg must be cast to double
+	}
+      }
+    } else {
+      // only the cases listed in 'i' are retained
+      size_t length_i = static_cast<size_t>(LENGTH(i));
+      Rcpp::IntegerVector vec_i(i);
+      for (size_t k = 0; k < length_i; k++) {
+	size_t j = vec_i[k] - 1; // offset
+	if (kappa[j] != NA_INTEGER) {
+	  if (kappa[j] < 1 || kappa[j] > K) {
+	    return  R_NegInf;
+	  }
+	  out += R::dgeom(kappa[j] - 1.0, pi, 1); // first arg must be cast to double
+	}
+      }
+    }
+
+    return out;
+  } else { // use of a customized likelihood function
+    Rcpp::Function f = Rcpp::as<Rcpp::Function>(l[0]);
+    int arity = l[1];
+    if (arity == 3) return Rcpp::as<double>(f(data, param, i));
+    return Rcpp::as<double>(f(data, param));
+  }
+}
+
+
+double cpp_ll_reporting(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_reporting(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the probability of observing a a reported
+// contact between cases and their ancestors. See
+// src/likelihoods.cpp for details of the Rcpp implmentation.
+
+// The likelihood is based on the contact status between a case and its
+// ancestor; this is extracted from a pairwise contact matrix (data$C), the
+// log-likelihood is computed as:
+// true_pos*eps + false_pos*eps*xi +
+// false_neg*(1- eps) + true_neg*(1 - eps*xi)
+//
+// with:
+// 'eps' is the contact reporting coverage
+// 'lambda' is the non-infectious contact rate
+// 'true_pos' is the number of contacts between transmission pairs
+// 'false_pos' is the number of contact between non-transmission pairs
+// 'false_neg' is the number of transmission pairs without contact
+// 'true_neg' is the number of non-transmission pairs without contact
+
+double cpp_ll_contact(Rcpp::List data, Rcpp::List param, SEXP i,
+		       Rcpp::RObject custom_function) {
+  Rcpp::NumericMatrix contacts = data["contacts"];
+  if (contacts.ncol() < 1) return 0.0;
+
+  size_t C_combn = static_cast<size_t>(data["C_combn"]);
+  size_t C_nrow = static_cast<size_t>(data["C_nrow"]);
+
+  size_t N = static_cast<size_t>(data["N"]);
+  if (N < 2) return 0.0;
+
+  Rcpp::List l;
+  if (custom_function != R_NilValue) {
+    l = Rcpp::as<Rcpp::List>(custom_function);
+  }
+  if (custom_function == R_NilValue || (l.size() > 0 && l[0] == R_NilValue)) {
+
+    double out;
+    double eps = Rcpp::as<double>(param["eps"]);
+    double lambda = Rcpp::as<double>(param["lambda"]);
+    Rcpp::IntegerVector alpha = param["alpha"];
+    Rcpp::IntegerVector kappa = param["kappa"];
+
+    size_t true_pos = 0;
+    size_t false_pos = 0;
+    size_t false_neg = 0;
+    size_t true_neg = 0;
+    size_t imports = 0;
+    size_t unobsv_case = 0;
+
+    // p(eps < 0 || lambda < 0) = 0
+    if (eps < 0.0 || lambda < 0.0) {
+      return R_NegInf;
+    }
+
+    // all cases are retained (currently no support for i subsetting)
+    for (size_t j = 0; j < N; j++) {
+      if (alpha[j] == NA_INTEGER) {
+	imports += 1;
+      } else if (kappa[j] > 1) {
+	unobsv_case += 1;
+      } else {
+	true_pos += contacts(j, alpha[j] - 1); // offset
+      }
+    }
+
+    false_pos = C_nrow - true_pos;
+    false_neg = N - imports - unobsv_case - true_pos;
+    true_neg = C_combn - true_pos - false_pos - false_neg;
+
+    // deal with special case when lambda == 0 and eps == 1, to avoid log(0)
+    if(lambda == 0.0) {
+      if(false_pos > 0) {
+	out = R_NegInf;
+      } else {
+	out = log(eps) * (double) true_pos +
+	  log(1 - eps) * (double) false_neg +
+	  log(1 - eps*lambda) * (double) true_neg;
+      }
+    } else if(eps == 1.0) {
+      if(false_neg > 0) {
+	out = R_NegInf;
+      } else {
+	out = log(eps) * (double) true_pos +
+	  log(eps*lambda) * (double) false_pos +
+	  log(1 - eps*lambda) * (double) true_neg;
+      }
+    } else {
+      out = log(eps) * (double) true_pos +
+	log(eps*lambda) * (double) false_pos +
+	log(1 - eps) * (double) false_neg +
+	log(1 - eps*lambda) * (double) true_neg;
+    }
+
+    return out;
+    
+  } else { //use of a customized likelihood function
+    Rcpp::Function f = Rcpp::as<Rcpp::Function>(l[0]);
+    int arity = l[1];
+    if (arity == 3) return Rcpp::as<double>(f(data, param, i));
+    return Rcpp::as<double>(f(data, param));
+  }
+}
+
+
+double cpp_ll_contact(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_contact(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the sums of the separate timing likelihoods,
+// which include:
+
+// - p(infection dates): see function cpp_ll_timing_infections
+// - p(collection dates): see function cpp_ll_timing_sampling
+
+double cpp_ll_timing(Rcpp::List data, Rcpp::List param, SEXP i,
+		     Rcpp::RObject custom_functions) {
+
+  if (custom_functions == R_NilValue) {
+    return cpp_ll_timing_infections(data, param, i) +
+      cpp_ll_timing_sampling(data, param, i);
+  } else { // use of a customized likelihood functions
+    Rcpp::List list_functions = Rcpp::as<Rcpp::List>(custom_functions);
+    return cpp_ll_timing_infections(data, param, i, list_functions["timing_infections"]) +
+      cpp_ll_timing_sampling(data, param, i, list_functions["timing_sampling"]);
+
+  }
+}
+
+
+double cpp_ll_timing(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_timing(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}
+
+
+
+
+// ---------------------------
+
+// This likelihood corresponds to the sums of the separate likelihoods, which
+// include:
+
+// - p(infection dates): see function cpp_ll_timing_infections
+// - p(collection dates): see function cpp_ll_timing_sampling
+// - p(genetic diversity): see function cpp_ll_genetic
+// - p(missing cases): see function cpp_ll_reporting
+// - p(contact): see function cpp_ll_contact 
+
+double cpp_ll_all(Rcpp::List data, Rcpp::List param, SEXP i,
+		  Rcpp::RObject custom_functions) {
+
+  if (custom_functions == R_NilValue) {
+
+    return cpp_ll_timing_infections(data, param, i) +
+      cpp_ll_timing_sampling(data, param, i) +
+      cpp_ll_genetic(data, param, i) +
+      cpp_ll_reporting(data, param, i) +
+      cpp_ll_contact(data, param, i);
+
+  }  else { // use of a customized likelihood functions
+    Rcpp::List list_functions = Rcpp::as<Rcpp::List>(custom_functions);
+
+    return cpp_ll_timing_infections(data, param, i, list_functions["timing_infections"]) +
+      cpp_ll_timing_sampling(data, param, i, list_functions["timing_sampling"]) +
+      cpp_ll_genetic(data, param, i, list_functions["genetic"]) +
+      cpp_ll_reporting(data, param, i, list_functions["reporting"]) +
+      cpp_ll_contact(data, param, i, list_functions["contact"]);
+
+  }
+}
+
+
+double cpp_ll_all(Rcpp::List data, Rcpp::List param, size_t i,
+              Rcpp::RObject custom_function) {
+  SEXP si = PROTECT(Rcpp::wrap(i));
+  double ret = cpp_ll_all(data, param, si, custom_function);
+  UNPROTECT(1);
+  return ret;
+}