Diff of /src/snn_graph.cpp [000000] .. [413088]

Switch to side-by-side view

--- a
+++ b/src/snn_graph.cpp
@@ -0,0 +1,176 @@
+#include "Rcpp.h"
+#include <deque>
+#include <algorithm>
+
+/* Adapted from bluster package
+ * https://bioconductor.org/packages/release/bioc/html/bluster.html
+ */
+
+/* The 'rank' version performs the original SNN clustering described by Xu and Su (2015, Bioinformatics).
+ * This defines the weight between two nodes as (k - 0.5 * r), where r is the smallest sum of ranks for any node in both NN-sets.
+ * The rank is computed separately in each NN set, with each node being 0-rank in its own set.
+ */
+
+// [[Rcpp::export(rng=false)]]
+Rcpp::List build_snn_rank(Rcpp::IntegerMatrix neighbors) {
+  const size_t k=neighbors.ncol();
+  const size_t ncells=neighbors.nrow();
+
+  // Building a host table, identifying the reverse relation from nearest neighbours to cells.
+  auto mIt=neighbors.begin();
+  std::deque<std::deque<std::pair<size_t, int> > > hosts(ncells);
+  for (size_t i=1; i<=k; ++i) {
+    for (size_t j=0; j<ncells; ++j, ++mIt) {
+      hosts[*mIt - 1].push_back(std::make_pair(i, j)); // Getting to 0-based index, keeping 1-based ranks for now.
+    }
+  }
+
+  std::deque<int> output_pairs;
+  std::deque<double> output_weights;
+  std::deque<size_t> current_added;
+  std::deque<size_t> current_score(ncells);
+
+  for (size_t j=0; j<ncells; ++j) {
+    auto rowtmp=neighbors.row(j);
+    auto rtIt=rowtmp.begin();
+
+    int cur_neighbor;
+    for (size_t i=0; i<=k; ++i) {
+      if (i==0) {
+        cur_neighbor=j;
+      } else {
+        // Adding the actual nearest neighbors for cell 'j'.
+        cur_neighbor=*rtIt - 1;
+        ++rtIt;
+
+        if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
+          const size_t& currank=i; // +0, as neighbour 'i' is rank 0 with respect to itself.
+          size_t& existing_other=current_score[cur_neighbor];
+          if (existing_other==0) {
+            existing_other=currank;
+            current_added.push_back(cur_neighbor);
+          } else if (existing_other > currank) {
+            existing_other=currank;
+          }
+        }
+      }
+
+      // Adding the cells connected by shared nearest neighbors, again recording the lowest combined rank per neighbor.
+      const auto& hosted=hosts[cur_neighbor];
+      for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
+        const int& othernode=hIt->second;
+
+        if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
+          size_t currank=hIt->first + i;
+          size_t& existing_other=current_score[othernode];
+          if (existing_other==0) {
+            existing_other=currank;
+            current_added.push_back(othernode);
+          } else if (existing_other > currank) {
+            existing_other=currank;
+          }
+        }
+      }
+    }
+
+    for (auto othernode : current_added) {
+      // Converting to edges.
+      output_pairs.push_back(j + 1);
+      output_pairs.push_back(othernode + 1);
+
+      // Ensuring that an edge with a positive weight is always reported.
+      size_t& otherscore=current_score[othernode];
+      double finalscore = static_cast<double>(k) - 0.5 * static_cast<double>(otherscore);
+      output_weights.push_back(std::max(finalscore, 1e-6));
+
+      // Resetting all those added to zero.
+      otherscore=0;
+    }
+    current_added.clear();
+  }
+
+  Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
+  Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
+  return Rcpp::List::create(pout, wout);
+}
+
+/* The 'number' version performs a much simpler SNN clustering.
+ * Here, the weight between two nodes is simply the number of shared nodes in both NN-sets.
+ * Each node is also included in its own set, yielding a range of [0, k+1] weights.
+ */
+
+// [[Rcpp::export(rng=false)]]
+Rcpp::List build_snn_number(Rcpp::IntegerMatrix neighbors) {
+  const size_t k=neighbors.ncol();
+  const size_t ncells=neighbors.nrow();
+
+  // Building a host table, identifying the reverse relation from nearest neighbours to cells.
+  auto mIt=neighbors.begin();
+  std::deque<std::deque<size_t> > hosts(ncells);
+  for (size_t i=1; i<=k; ++i) {
+    for (size_t j=0; j<ncells; ++j, ++mIt) {
+      hosts[*mIt - 1].push_back(j); // Getting to 0-based index.
+    }
+  }
+
+  std::deque<int> output_pairs;
+  std::deque<double> output_weights;
+  std::deque<size_t> current_added;
+  std::deque<size_t> current_score(ncells);
+
+  for (size_t j=0; j<ncells; ++j) {
+    auto rowtmp=neighbors.row(j);
+    auto rtIt=rowtmp.begin();
+
+    int cur_neighbor;
+    for (size_t i=0; i<=k; ++i) {
+      if (i==0) {
+        cur_neighbor=j;
+      } else {
+        // Adding the actual nearest neighbors for cell 'j'.
+        cur_neighbor=*rtIt - 1;
+        ++rtIt;
+
+        if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
+          size_t& existing_other=current_score[cur_neighbor];
+          if (existing_other==0) {
+            current_added.push_back(cur_neighbor);
+          }
+          ++existing_other;
+        }
+      }
+
+      // Adding the cells connected by shared nearest neighbors, recording the number.
+      const auto& hosted=hosts[cur_neighbor];
+      for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
+        const int& othernode=*hIt;
+
+        if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
+          size_t& existing_other=current_score[othernode];
+          if (existing_other==0) {
+            current_added.push_back(othernode);
+          }
+          ++existing_other;
+        }
+      }
+    }
+
+    for (auto othernode : current_added) {
+      // Converting to edges.
+      output_pairs.push_back(j + 1);
+      output_pairs.push_back(othernode + 1);
+
+      // Ensuring that an edge with a positive weight is always reported.
+      size_t& otherscore=current_score[othernode];
+      output_weights.push_back(std::max(static_cast<double>(otherscore), 1e-6));
+
+      // Resetting all those added to zero.
+      otherscore=0;
+    }
+    current_added.clear();
+  }
+
+  Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
+  Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
+  return Rcpp::List::create(pout, wout);
+}
\ No newline at end of file