[413088]: / src / snn_graph.cpp

Download this file

176 lines (149 with data), 6.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#include "Rcpp.h"
#include <deque>
#include <algorithm>
/* Adapted from bluster package
* https://bioconductor.org/packages/release/bioc/html/bluster.html
*/
/* The 'rank' version performs the original SNN clustering described by Xu and Su (2015, Bioinformatics).
* This defines the weight between two nodes as (k - 0.5 * r), where r is the smallest sum of ranks for any node in both NN-sets.
* The rank is computed separately in each NN set, with each node being 0-rank in its own set.
*/
// [[Rcpp::export(rng=false)]]
Rcpp::List build_snn_rank(Rcpp::IntegerMatrix neighbors) {
const size_t k=neighbors.ncol();
const size_t ncells=neighbors.nrow();
// Building a host table, identifying the reverse relation from nearest neighbours to cells.
auto mIt=neighbors.begin();
std::deque<std::deque<std::pair<size_t, int> > > hosts(ncells);
for (size_t i=1; i<=k; ++i) {
for (size_t j=0; j<ncells; ++j, ++mIt) {
hosts[*mIt - 1].push_back(std::make_pair(i, j)); // Getting to 0-based index, keeping 1-based ranks for now.
}
}
std::deque<int> output_pairs;
std::deque<double> output_weights;
std::deque<size_t> current_added;
std::deque<size_t> current_score(ncells);
for (size_t j=0; j<ncells; ++j) {
auto rowtmp=neighbors.row(j);
auto rtIt=rowtmp.begin();
int cur_neighbor;
for (size_t i=0; i<=k; ++i) {
if (i==0) {
cur_neighbor=j;
} else {
// Adding the actual nearest neighbors for cell 'j'.
cur_neighbor=*rtIt - 1;
++rtIt;
if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
const size_t& currank=i; // +0, as neighbour 'i' is rank 0 with respect to itself.
size_t& existing_other=current_score[cur_neighbor];
if (existing_other==0) {
existing_other=currank;
current_added.push_back(cur_neighbor);
} else if (existing_other > currank) {
existing_other=currank;
}
}
}
// Adding the cells connected by shared nearest neighbors, again recording the lowest combined rank per neighbor.
const auto& hosted=hosts[cur_neighbor];
for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
const int& othernode=hIt->second;
if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
size_t currank=hIt->first + i;
size_t& existing_other=current_score[othernode];
if (existing_other==0) {
existing_other=currank;
current_added.push_back(othernode);
} else if (existing_other > currank) {
existing_other=currank;
}
}
}
}
for (auto othernode : current_added) {
// Converting to edges.
output_pairs.push_back(j + 1);
output_pairs.push_back(othernode + 1);
// Ensuring that an edge with a positive weight is always reported.
size_t& otherscore=current_score[othernode];
double finalscore = static_cast<double>(k) - 0.5 * static_cast<double>(otherscore);
output_weights.push_back(std::max(finalscore, 1e-6));
// Resetting all those added to zero.
otherscore=0;
}
current_added.clear();
}
Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
return Rcpp::List::create(pout, wout);
}
/* The 'number' version performs a much simpler SNN clustering.
* Here, the weight between two nodes is simply the number of shared nodes in both NN-sets.
* Each node is also included in its own set, yielding a range of [0, k+1] weights.
*/
// [[Rcpp::export(rng=false)]]
Rcpp::List build_snn_number(Rcpp::IntegerMatrix neighbors) {
const size_t k=neighbors.ncol();
const size_t ncells=neighbors.nrow();
// Building a host table, identifying the reverse relation from nearest neighbours to cells.
auto mIt=neighbors.begin();
std::deque<std::deque<size_t> > hosts(ncells);
for (size_t i=1; i<=k; ++i) {
for (size_t j=0; j<ncells; ++j, ++mIt) {
hosts[*mIt - 1].push_back(j); // Getting to 0-based index.
}
}
std::deque<int> output_pairs;
std::deque<double> output_weights;
std::deque<size_t> current_added;
std::deque<size_t> current_score(ncells);
for (size_t j=0; j<ncells; ++j) {
auto rowtmp=neighbors.row(j);
auto rtIt=rowtmp.begin();
int cur_neighbor;
for (size_t i=0; i<=k; ++i) {
if (i==0) {
cur_neighbor=j;
} else {
// Adding the actual nearest neighbors for cell 'j'.
cur_neighbor=*rtIt - 1;
++rtIt;
if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
size_t& existing_other=current_score[cur_neighbor];
if (existing_other==0) {
current_added.push_back(cur_neighbor);
}
++existing_other;
}
}
// Adding the cells connected by shared nearest neighbors, recording the number.
const auto& hosted=hosts[cur_neighbor];
for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
const int& othernode=*hIt;
if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
size_t& existing_other=current_score[othernode];
if (existing_other==0) {
current_added.push_back(othernode);
}
++existing_other;
}
}
}
for (auto othernode : current_added) {
// Converting to edges.
output_pairs.push_back(j + 1);
output_pairs.push_back(othernode + 1);
// Ensuring that an edge with a positive weight is always reported.
size_t& otherscore=current_score[othernode];
output_weights.push_back(std::max(static_cast<double>(otherscore), 1e-6));
// Resetting all those added to zero.
otherscore=0;
}
current_added.clear();
}
Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
return Rcpp::List::create(pout, wout);
}