--- a +++ b/src/snn_graph.cpp @@ -0,0 +1,176 @@ +#include "Rcpp.h" +#include <deque> +#include <algorithm> + +/* Adapted from bluster package + * https://bioconductor.org/packages/release/bioc/html/bluster.html + */ + +/* The 'rank' version performs the original SNN clustering described by Xu and Su (2015, Bioinformatics). + * This defines the weight between two nodes as (k - 0.5 * r), where r is the smallest sum of ranks for any node in both NN-sets. + * The rank is computed separately in each NN set, with each node being 0-rank in its own set. + */ + +// [[Rcpp::export(rng=false)]] +Rcpp::List build_snn_rank(Rcpp::IntegerMatrix neighbors) { + const size_t k=neighbors.ncol(); + const size_t ncells=neighbors.nrow(); + + // Building a host table, identifying the reverse relation from nearest neighbours to cells. + auto mIt=neighbors.begin(); + std::deque<std::deque<std::pair<size_t, int> > > hosts(ncells); + for (size_t i=1; i<=k; ++i) { + for (size_t j=0; j<ncells; ++j, ++mIt) { + hosts[*mIt - 1].push_back(std::make_pair(i, j)); // Getting to 0-based index, keeping 1-based ranks for now. + } + } + + std::deque<int> output_pairs; + std::deque<double> output_weights; + std::deque<size_t> current_added; + std::deque<size_t> current_score(ncells); + + for (size_t j=0; j<ncells; ++j) { + auto rowtmp=neighbors.row(j); + auto rtIt=rowtmp.begin(); + + int cur_neighbor; + for (size_t i=0; i<=k; ++i) { + if (i==0) { + cur_neighbor=j; + } else { + // Adding the actual nearest neighbors for cell 'j'. + cur_neighbor=*rtIt - 1; + ++rtIt; + + if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations. + const size_t& currank=i; // +0, as neighbour 'i' is rank 0 with respect to itself. + size_t& existing_other=current_score[cur_neighbor]; + if (existing_other==0) { + existing_other=currank; + current_added.push_back(cur_neighbor); + } else if (existing_other > currank) { + existing_other=currank; + } + } + } + + // Adding the cells connected by shared nearest neighbors, again recording the lowest combined rank per neighbor. + const auto& hosted=hosts[cur_neighbor]; + for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) { + const int& othernode=hIt->second; + + if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations. + size_t currank=hIt->first + i; + size_t& existing_other=current_score[othernode]; + if (existing_other==0) { + existing_other=currank; + current_added.push_back(othernode); + } else if (existing_other > currank) { + existing_other=currank; + } + } + } + } + + for (auto othernode : current_added) { + // Converting to edges. + output_pairs.push_back(j + 1); + output_pairs.push_back(othernode + 1); + + // Ensuring that an edge with a positive weight is always reported. + size_t& otherscore=current_score[othernode]; + double finalscore = static_cast<double>(k) - 0.5 * static_cast<double>(otherscore); + output_weights.push_back(std::max(finalscore, 1e-6)); + + // Resetting all those added to zero. + otherscore=0; + } + current_added.clear(); + } + + Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end()); + Rcpp::NumericVector wout(output_weights.begin(), output_weights.end()); + return Rcpp::List::create(pout, wout); +} + +/* The 'number' version performs a much simpler SNN clustering. + * Here, the weight between two nodes is simply the number of shared nodes in both NN-sets. + * Each node is also included in its own set, yielding a range of [0, k+1] weights. + */ + +// [[Rcpp::export(rng=false)]] +Rcpp::List build_snn_number(Rcpp::IntegerMatrix neighbors) { + const size_t k=neighbors.ncol(); + const size_t ncells=neighbors.nrow(); + + // Building a host table, identifying the reverse relation from nearest neighbours to cells. + auto mIt=neighbors.begin(); + std::deque<std::deque<size_t> > hosts(ncells); + for (size_t i=1; i<=k; ++i) { + for (size_t j=0; j<ncells; ++j, ++mIt) { + hosts[*mIt - 1].push_back(j); // Getting to 0-based index. + } + } + + std::deque<int> output_pairs; + std::deque<double> output_weights; + std::deque<size_t> current_added; + std::deque<size_t> current_score(ncells); + + for (size_t j=0; j<ncells; ++j) { + auto rowtmp=neighbors.row(j); + auto rtIt=rowtmp.begin(); + + int cur_neighbor; + for (size_t i=0; i<=k; ++i) { + if (i==0) { + cur_neighbor=j; + } else { + // Adding the actual nearest neighbors for cell 'j'. + cur_neighbor=*rtIt - 1; + ++rtIt; + + if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations. + size_t& existing_other=current_score[cur_neighbor]; + if (existing_other==0) { + current_added.push_back(cur_neighbor); + } + ++existing_other; + } + } + + // Adding the cells connected by shared nearest neighbors, recording the number. + const auto& hosted=hosts[cur_neighbor]; + for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) { + const int& othernode=*hIt; + + if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations. + size_t& existing_other=current_score[othernode]; + if (existing_other==0) { + current_added.push_back(othernode); + } + ++existing_other; + } + } + } + + for (auto othernode : current_added) { + // Converting to edges. + output_pairs.push_back(j + 1); + output_pairs.push_back(othernode + 1); + + // Ensuring that an edge with a positive weight is always reported. + size_t& otherscore=current_score[othernode]; + output_weights.push_back(std::max(static_cast<double>(otherscore), 1e-6)); + + // Resetting all those added to zero. + otherscore=0; + } + current_added.clear(); + } + + Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end()); + Rcpp::NumericVector wout(output_weights.begin(), output_weights.end()); + return Rcpp::List::create(pout, wout); +} \ No newline at end of file