a b/src/snn_graph.cpp
1
#include "Rcpp.h"
2
#include <deque>
3
#include <algorithm>
4
5
/* Adapted from bluster package
6
 * https://bioconductor.org/packages/release/bioc/html/bluster.html
7
 */
8
9
/* The 'rank' version performs the original SNN clustering described by Xu and Su (2015, Bioinformatics).
10
 * This defines the weight between two nodes as (k - 0.5 * r), where r is the smallest sum of ranks for any node in both NN-sets.
11
 * The rank is computed separately in each NN set, with each node being 0-rank in its own set.
12
 */
13
14
// [[Rcpp::export(rng=false)]]
15
Rcpp::List build_snn_rank(Rcpp::IntegerMatrix neighbors) {
16
  const size_t k=neighbors.ncol();
17
  const size_t ncells=neighbors.nrow();
18
19
  // Building a host table, identifying the reverse relation from nearest neighbours to cells.
20
  auto mIt=neighbors.begin();
21
  std::deque<std::deque<std::pair<size_t, int> > > hosts(ncells);
22
  for (size_t i=1; i<=k; ++i) {
23
    for (size_t j=0; j<ncells; ++j, ++mIt) {
24
      hosts[*mIt - 1].push_back(std::make_pair(i, j)); // Getting to 0-based index, keeping 1-based ranks for now.
25
    }
26
  }
27
28
  std::deque<int> output_pairs;
29
  std::deque<double> output_weights;
30
  std::deque<size_t> current_added;
31
  std::deque<size_t> current_score(ncells);
32
33
  for (size_t j=0; j<ncells; ++j) {
34
    auto rowtmp=neighbors.row(j);
35
    auto rtIt=rowtmp.begin();
36
37
    int cur_neighbor;
38
    for (size_t i=0; i<=k; ++i) {
39
      if (i==0) {
40
        cur_neighbor=j;
41
      } else {
42
        // Adding the actual nearest neighbors for cell 'j'.
43
        cur_neighbor=*rtIt - 1;
44
        ++rtIt;
45
46
        if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
47
          const size_t& currank=i; // +0, as neighbour 'i' is rank 0 with respect to itself.
48
          size_t& existing_other=current_score[cur_neighbor];
49
          if (existing_other==0) {
50
            existing_other=currank;
51
            current_added.push_back(cur_neighbor);
52
          } else if (existing_other > currank) {
53
            existing_other=currank;
54
          }
55
        }
56
      }
57
58
      // Adding the cells connected by shared nearest neighbors, again recording the lowest combined rank per neighbor.
59
      const auto& hosted=hosts[cur_neighbor];
60
      for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
61
        const int& othernode=hIt->second;
62
63
        if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
64
          size_t currank=hIt->first + i;
65
          size_t& existing_other=current_score[othernode];
66
          if (existing_other==0) {
67
            existing_other=currank;
68
            current_added.push_back(othernode);
69
          } else if (existing_other > currank) {
70
            existing_other=currank;
71
          }
72
        }
73
      }
74
    }
75
76
    for (auto othernode : current_added) {
77
      // Converting to edges.
78
      output_pairs.push_back(j + 1);
79
      output_pairs.push_back(othernode + 1);
80
81
      // Ensuring that an edge with a positive weight is always reported.
82
      size_t& otherscore=current_score[othernode];
83
      double finalscore = static_cast<double>(k) - 0.5 * static_cast<double>(otherscore);
84
      output_weights.push_back(std::max(finalscore, 1e-6));
85
86
      // Resetting all those added to zero.
87
      otherscore=0;
88
    }
89
    current_added.clear();
90
  }
91
92
  Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
93
  Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
94
  return Rcpp::List::create(pout, wout);
95
}
96
97
/* The 'number' version performs a much simpler SNN clustering.
98
 * Here, the weight between two nodes is simply the number of shared nodes in both NN-sets.
99
 * Each node is also included in its own set, yielding a range of [0, k+1] weights.
100
 */
101
102
// [[Rcpp::export(rng=false)]]
103
Rcpp::List build_snn_number(Rcpp::IntegerMatrix neighbors) {
104
  const size_t k=neighbors.ncol();
105
  const size_t ncells=neighbors.nrow();
106
107
  // Building a host table, identifying the reverse relation from nearest neighbours to cells.
108
  auto mIt=neighbors.begin();
109
  std::deque<std::deque<size_t> > hosts(ncells);
110
  for (size_t i=1; i<=k; ++i) {
111
    for (size_t j=0; j<ncells; ++j, ++mIt) {
112
      hosts[*mIt - 1].push_back(j); // Getting to 0-based index.
113
    }
114
  }
115
116
  std::deque<int> output_pairs;
117
  std::deque<double> output_weights;
118
  std::deque<size_t> current_added;
119
  std::deque<size_t> current_score(ncells);
120
121
  for (size_t j=0; j<ncells; ++j) {
122
    auto rowtmp=neighbors.row(j);
123
    auto rtIt=rowtmp.begin();
124
125
    int cur_neighbor;
126
    for (size_t i=0; i<=k; ++i) {
127
      if (i==0) {
128
        cur_neighbor=j;
129
      } else {
130
        // Adding the actual nearest neighbors for cell 'j'.
131
        cur_neighbor=*rtIt - 1;
132
        ++rtIt;
133
134
        if (static_cast<size_t>(cur_neighbor) < j) { // avoid duplicates from symmetry in the SNN calculations.
135
          size_t& existing_other=current_score[cur_neighbor];
136
          if (existing_other==0) {
137
            current_added.push_back(cur_neighbor);
138
          }
139
          ++existing_other;
140
        }
141
      }
142
143
      // Adding the cells connected by shared nearest neighbors, recording the number.
144
      const auto& hosted=hosts[cur_neighbor];
145
      for (auto hIt=hosted.begin(); hIt!=hosted.end(); ++hIt) {
146
        const int& othernode=*hIt;
147
148
        if (static_cast<size_t>(othernode) < j) { // avoid duplicates from symmetry in the SNN calculations.
149
          size_t& existing_other=current_score[othernode];
150
          if (existing_other==0) {
151
            current_added.push_back(othernode);
152
          }
153
          ++existing_other;
154
        }
155
      }
156
    }
157
158
    for (auto othernode : current_added) {
159
      // Converting to edges.
160
      output_pairs.push_back(j + 1);
161
      output_pairs.push_back(othernode + 1);
162
163
      // Ensuring that an edge with a positive weight is always reported.
164
      size_t& otherscore=current_score[othernode];
165
      output_weights.push_back(std::max(static_cast<double>(otherscore), 1e-6));
166
167
      // Resetting all those added to zero.
168
      otherscore=0;
169
    }
170
    current_added.clear();
171
  }
172
173
  Rcpp::IntegerVector pout(output_pairs.begin(), output_pairs.end());
174
  Rcpp::NumericVector wout(output_weights.begin(), output_weights.end());
175
  return Rcpp::List::create(pout, wout);
176
}