Switch to side-by-side view

--- a
+++ b/R/outbreaker_find_imports.R
@@ -0,0 +1,89 @@
+
+## This function is not exported and is meant for internal use in
+## outbreaker2. It is near identical to the MCMC implemented by
+## 'outbreaker.move'. The only difference is it has its own number of iterations
+## ('config$n.iter.import') and sampling frequency
+## ('config$sample.every.import'), and stores individual likelihoods for each
+## case and saved iteration. The rationale is to use these chains to compute the
+## 'global influences' of each case, flag outliers based on these values and an
+## arbitrary threshold ('config$outlier.threshold'), and mark these cases as
+## imported, i.e. for which the ancestor will be 'NA'.
+
+
+outbreaker_find_imports <- function(moves, data, param_current,
+                                    param_store, config,
+                                    likelihoods) {
+  ## send back unchanged chains if config disabled the detection of imported cases ##
+  if (!config$find_import) {
+    return(list(config = config,
+                param_current = param_current,
+                param_store = param_store))
+  }
+
+  ## store initial param values ##
+  ini_param <- list(current = param_current, store = param_store)
+
+  ## get number of moves ##
+  J <- length(moves)
+
+  ## create matrix of individual influences ##
+  n_measures <- floor((config$n_iter_import - 1000) / config$sample_every_import)
+  influences <- matrix(0, ncol = data$N, nrow = n_measures)
+  counter <- 1L
+
+  ## remove the contact likelihood from outlier detection
+  tmp_likelihoods <- likelihoods
+  tmp_likelihoods$contact <- list(function(data, param) return(0), 0)
+
+  ## RUN MCMC ##
+  for (i in seq.int(2, config$n_iter_import, 1)) {
+    ## move parameters / augmented data
+    for (j in seq_len(J)) {
+      ## move parameters
+      param_current <- moves[[j]](param_current)
+
+      ## safemode
+      if (config$paranoid) {
+        diagnostic <- look_for_trouble(param_current, param_store, data)
+        if (!diagnostic$pass) {
+          stop(paste0(
+            "\n\n PARANOID MODE DETECTED AN ERROR WHILE FINDING IMPORTS:\n",
+            sprintf("at iteration %d, movement %d (%s) with the following diagnostics:\n%s\n",
+                    i, j, names(moves)[j], diagnostic$msg)))
+        }
+      }
+    }
+
+    ## store outputs if needed
+    if ((i %% config$sample_every_import) == 0 && i>1000) {
+      influences[counter,] <- - vapply(seq_len(data$N),
+                                       function(i) cpp_ll_all(data, param_current, i, tmp_likelihoods),
+                                       numeric(1))
+      counter <- counter + 1L
+    }
+  } # end of the chain
+
+  ## FIND OUTLIERS BASED ON INFLUENCE ##
+  mean_influences <- colMeans(influences)
+  mean_influence <- mean(mean_influences, na.rm = TRUE)
+  threshold <- mean_influence * config$outlier_threshold
+  outliers <- mean_influences > threshold
+
+
+  ## All outliers are considered as introductions, so that ancestries (alpha) are set to 'NA' and
+  ## the number of generations between cases and their ancestor (kappa) is set to NA; the
+  ## movements of alpha and kappa for these cases is also disabled; because the config has been
+  ## altered in these cases, we systematically return the config as well as the initial
+  ## parameters.
+
+  ini_param$store$alpha[[1]][outliers] <- ini_param$current$alpha[outliers] <- NA
+  ini_param$store$kappa[[1]][outliers] <- ini_param$current$kappa[outliers] <- NA
+  ini_param$store$influences <- mean_influence
+  ini_param$store$threshold <- threshold
+  config$move_alpha[outliers] <- FALSE
+  config$move_kappa[outliers] <- FALSE
+
+  return(list(config = config,
+              param_current = ini_param$current,
+              param_store = ini_param$store))
+}