Diff of /R/getNEMO.R [000000] .. [494cbf]

Switch to unified view

a b/R/getNEMO.R
1
#' @name getNEMO
2
#' @title Get subtypes from NEMO
3
#' @description This function wraps the NEMO (Neighborhood based multi-omics clustering) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices.
5
#' @param N.clust Number of clusters.
6
#' @param num.neighbors The number of neighbors to use for each omic.
7
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
8
#' @return A list with the following components:
9
#'
10
#'         \code{fit}       an object returned by \link[NEMO]{nemo.clustering}.
11
#'
12
#'         \code{clust.res} a data.frame storing sample ID and corresponding clusters.
13
#'
14
#'         \code{mo.method} a string value indicating the method used for multi-omics integrative clustering.
15
#' @import SNFtool
16
#' @export
17
#' @references Rappoport N, Shamir R (2019). NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics, 35(18):3348-3356.
18
#' @examples # There is no example and please refer to vignette.
19
getNEMO <- function(data          = NULL,
20
                    N.clust       = NULL,
21
                    type          = rep("gaussian", length(data)),
22
                    num.neighbors = NA) {
23
24
  # check data
25
  n_dat <- length(data)
26
  if(n_dat > 6){
27
    stop('current verision of MOVICS can support up to 6 datasets.')
28
  }
29
  if(n_dat < 2){
30
    stop('current verision of MOVICS needs at least 2 omics data.')
31
  }
32
33
  useless.argument <- type
34
  fit <- nemo.clustering(omics.list    = data,
35
                         num.clusters  = N.clust,
36
                         num.neighbors = num.neighbors)
37
38
  clustres <- data.frame(samID = colnames(data[[1]]),
39
                         clust = as.numeric(fit),
40
                         row.names = colnames(data[[1]]),
41
                         stringsAsFactors = FALSE)
42
  #clustres <- clustres[order(clustres$clust),]
43
44
  return(list(fit = fit, clust.res = clustres, mo.method = "NEMO"))
45
}
46
47
#' @title affinityMatrix
48
#' @name affinityMatrix
49
#' @param Diff Distance Matrix.
50
#' @param K Number of nearest neighbors.
51
#' @param sigma Variance for local model.
52
#' @author Bo Wang, Aziz Mezlini, Feyyaz Demir, Marc Fiume, Zhuowen Tu, Michael Brudno, Benjamin Haibe-Kains, Anna Goldenberg
53
#' @references Wang B, Mezlini AM, Demir F, et al (2014). Similarity network fusion for aggregating data types on a genomic scale. Nat Methods, 11(3):333-337.
54
#' @keywords internal
55
#' @return affinityMatrix
56
affinityMatrix <- function(Diff,K=20,sigma=0.5) {
57
  ###This function constructs similarity networks.
58
  N = nrow(Diff)
59
60
  Diff = (Diff + t(Diff)) / 2
61
  diag(Diff) = 0;
62
  sortedColumns = as.matrix(t(apply(Diff,2,sort)))
63
  finiteMean <- function(x) { mean(x[is.finite(x)]) }
64
  means = apply(sortedColumns[,1:K+1],1,finiteMean)+.Machine$double.eps;
65
66
  avg <- function(x,y) ((x+y)/2)
67
  Sig = outer(means,means,avg)/3*2 + Diff/3 + .Machine$double.eps;
68
  Sig[Sig <= .Machine$double.eps] = .Machine$double.eps
69
  densities = dnorm(Diff,0,sigma*Sig,log = FALSE)
70
71
  W = (densities + t(densities)) / 2
72
  return(W)
73
}
74
75
#' @title dist2
76
#' @name dist2
77
#' @param X A data matrix where each row is a different data point.
78
#' @param C A data matrix where each row is a different data point. If this matrix is the same as X, pairwise distances for all data points are computed.
79
#' @author Bo Wang, Aziz Mezlini, Feyyaz Demir, Marc Fiume, Zhuowen Tu, Michael Brudno, Benjamin Haibe-Kains, Anna Goldenberg
80
#' @references Wang B, Mezlini AM, Demir F, et al (2014). Similarity network fusion for aggregating data types on a genomic scale. Nat Methods, 11(3):333-337.
81
#' @keywords internal
82
#' @return dist2
83
dist2 <- function(X,C) {
84
  ndata = nrow(X)
85
  ncentres = nrow(C)
86
87
  sumsqX = rowSums(X^2)
88
  sumsqC = rowSums(C^2)
89
90
  XC = 2 * (X %*% t(C))
91
92
  res = matrix(rep(sumsqX,times=ncentres),ndata,ncentres) + t(matrix(rep(sumsqC,times=ndata),ncentres,ndata)) - XC
93
  res[res < 0] = 0
94
  return(res)
95
}
96
97
98
#' @title Spectral clustering
99
#' @name spectralClustering
100
#' @keywords internal
101
#' @author Bo Wang, Aziz Mezlini, Feyyaz Demir, Marc Fiume, Zhuowen Tu, Michael Brudno, Benjamin Haibe-Kains, Anna Goldenberg
102
#' @references Wang B, Mezlini AM, Demir F, et al (2014). Similarity network fusion for aggregating data types on a genomic scale. Nat Methods, 11(3):333-337.
103
#' @return spectralClustering
104
spectralClustering = SNFtool::spectralClustering
105
106
#' @title NEMO num clusters
107
#' @name nemo.num.clusters
108
#' @description Estimates the number of clusters in an affinity graph.
109
#' @param W the affinity graph.
110
#' @param NUMC possible values for the number of clusters. Defaults to 2:15.
111
#' @return the estimated number of clusters in the graph.
112
#' @author Nimrod Rappoport
113
#' @references Rappoport N, Shamir R (2019). NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics, 35(18):3348-3356.
114
#' @keywords internal
115
nemo.num.clusters <- function(W, NUMC=2:15) {
116
  if (min(NUMC) == 1) {
117
    warning("Note that we always assume there are more than one cluster.")
118
    NUMC = NUMC[NUMC > 1]
119
  }
120
  W = (W + t(W))/2
121
  diag(W) = 0
122
  if (length(NUMC) > 0) {
123
    degs = rowSums(W)
124
    degs[degs == 0] = .Machine$double.eps
125
    D = diag(degs)
126
    L = D - W
127
    Di = diag(1/sqrt(degs))
128
    L = Di %*% L %*% Di
129
    print(dim(L))
130
    eigs = eigen(L)
131
    eigs_order = sort(eigs$values, index.return = TRUE)$ix
132
    eigs$values = eigs$values[eigs_order]
133
    eigs$vectors = eigs$vectors[, eigs_order]
134
    eigengap = abs(diff(eigs$values))
135
    eigengap = (1:length(eigengap)) * eigengap
136
137
    t1 <- sort(eigengap[NUMC], decreasing = TRUE, index.return = TRUE)$ix
138
    return(NUMC[t1[1]])
139
  }
140
}
141
142
#' @title NEMO affinity graph
143
#' @name nemo.affinity.graph
144
#' @description Constructs a single affinity graph measuring similarity across different omics.
145
#' @param raw.data A list of the data to be clustered, where each an entry is a matrix of features x samples.
146
#' @param k The number of neighbors to use for each omic. It can either be a number, a list of numbers
147
#' or NA. If it is a number, this is the number of neighbors used for all omics. If this is a list,
148
#' the number of neighbors are taken for each omic from that list. If it is NA, each omic chooses the
149
#' number of neighbors to be the number of samples divided by NUM.NEIGHBORS.RATIO.
150
#' @return A single matrix measuring similarity between the samples across all omics.
151
#' @author Nimrod Rappoport
152
#' @references Rappoport N, Shamir R (2019). NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics, 35(18):3348-3356.
153
#' @keywords internal
154
nemo.affinity.graph <- function(raw.data, k = NA, NUM.NEIGHBORS.RATIO = 6) {
155
  if (is.na(k)) {
156
    k = as.numeric(lapply(1:length(raw.data), function(i) round(ncol(raw.data[[i]]) / NUM.NEIGHBORS.RATIO)))
157
  } else if (length(k) == 1) {
158
    k = rep(k, length(raw.data))
159
  }
160
  sim.data = lapply(1:length(raw.data), function(i) {affinityMatrix(dist2(as.matrix(t(raw.data[[i]])),
161
                                                                          as.matrix(t(raw.data[[i]]))), k[i], 0.5)})
162
  affinity.per.omic = lapply(1:length(raw.data), function(i) {
163
    sim.datum = sim.data[[i]]
164
    non.sym.knn = apply(sim.datum, 1, function(sim.row) {
165
      returned.row = sim.row
166
      threshold = sort(sim.row, decreasing = TRUE)[k[i]]
167
      returned.row[sim.row < threshold] = 0
168
      row.sum = sum(returned.row)
169
      returned.row[sim.row >= threshold] = returned.row[sim.row >= threshold] / row.sum
170
      return(returned.row)
171
    })
172
    sym.knn = non.sym.knn + t(non.sym.knn)
173
    return(sym.knn)
174
  })
175
  patient.names = Reduce(union, lapply(raw.data, colnames))
176
  num.patients = length(patient.names)
177
  returned.affinity.matrix = matrix(0, ncol = num.patients, nrow=num.patients)
178
  rownames(returned.affinity.matrix) = patient.names
179
  colnames(returned.affinity.matrix) = patient.names
180
181
  shared.omic.count = matrix(0, ncol = num.patients, nrow=num.patients)
182
  rownames(shared.omic.count) = patient.names
183
  colnames(shared.omic.count) = patient.names
184
185
  for (j in 1:length(raw.data)) {
186
    curr.omic.patients = colnames(raw.data[[j]])
187
    returned.affinity.matrix[curr.omic.patients, curr.omic.patients] = returned.affinity.matrix[curr.omic.patients, curr.omic.patients] + affinity.per.omic[[j]][curr.omic.patients, curr.omic.patients]
188
    shared.omic.count[curr.omic.patients, curr.omic.patients] = shared.omic.count[curr.omic.patients, curr.omic.patients] + 1
189
  }
190
191
  final.ret = returned.affinity.matrix / shared.omic.count
192
  lower.tri.ret = final.ret[lower.tri(final.ret)]
193
  final.ret[shared.omic.count == 0] = mean(lower.tri.ret[!is.na(lower.tri.ret)])
194
195
  return(final.ret)
196
}
197
198
#' @title NEMO clustering
199
#' @name nemo.clustering
200
#' @description Performs multi-omic clustering on a datset using the NEMO algorithm.
201
#' Uses nemo.num.clusters to estimate the number of clusters.
202
#' @param omics.list A list of the data to be clustered, where each an entry is a matrix of features x samples.
203
#' @param k The number of neighbors to use for each omic. It can either be a number, a list of numbers
204
#' or NA. If it is a number, this is the number of neighbors used for all omics. If this is a list,
205
#' the number of neighbors are taken for each omic from that list. If it is NA, each omic chooses the
206
#' number of neighbors to be the number of samples divided by NUM.NEIGHBORS.RATIO.
207
#' @return A single matrix measuring similarity between the samples across all omics.
208
#' @author Nimrod Rappoport
209
#' @references Rappoport N, Shamir R (2019). NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics, 35(18):3348-3356.
210
#' @keywords internal
211
nemo.clustering <- function(omics.list, num.clusters = NULL, num.neighbors = NA) {
212
  if (is.null(num.clusters)) {
213
    num.clusters = NA
214
  }
215
216
  graph = nemo.affinity.graph(omics.list, k = num.neighbors)
217
  if (is.na(num.clusters)) {
218
    num.clusters = nemo.num.clusters(graph)
219
  }
220
  clustering = spectralClustering(graph, num.clusters)
221
  names(clustering) = colnames(graph)
222
  return(clustering)
223
}