Diff of /R/getConsensusMOIC.R [000000] .. [494cbf]

Switch to unified view

a b/R/getConsensusMOIC.R
1
#' @name getConsensusMOIC
2
#' @title Get subtypes from consensus clustering of multiple multi-omics integrative clustering algorithms
3
#' @description Since this R package integrates 10 mainstream multi-omics clustering algorithms, we borrow the idea of consensus clustering for later integration of the clustering results derived from different algorithms, so as to improve the clustering robustness. The simplest way to run `getConsensusMOIC()` is to pass a list of object returned by `get\%algorithm_name\%()` or by `getMOIC()` with specific argument of `methodslist`.
4
#' @param moic.res.list A list of object returned by `getMOIC()`.
5
#' @param distance A string value of distance measurement for hierarchical clustering; 'euclidean' by default.
6
#' @param linkage A string value of clustering method for hierarchical clustering; 'ward.D' by default.
7
#' @param mapcolor A string vector for heatmap mapping color.
8
#' @param clust.col A string vector storing colors for annotating each cluster at the top of heatmap.
9
#' @param showID A logic value to indicate if showing the sample ID; FALSE by default.
10
#' @param fig.path A string value to indicate the output path for storing the consensus heatmap.
11
#' @param fig.name A string value to indicate the name of the consensus heatmap.
12
#' @param width A numeric value to indicate the width of output figure.
13
#' @param height A numeric value to indicate the height of output figure.
14
#' @return A consensus heatmap  and a list contains the following components:
15
#'
16
#'         \code{consensus.hm} an object returned by \link[ComplexHeatmap]{pheatmap}
17
#'
18
#'         \code{similarity}   a similary matrix for pair-wise samples with entries ranging from 0 to 1
19
#'
20
#'         \code{sil}          a silhouette object that can be further passed to \link[MOVICS]{getSilhouette}
21
#'
22
#'         \code{clust.res}    a data.frame storing sample ID and corresponding clusters
23
#'
24
#'         \code{clust.dend}   a dendrogram of sample clustering
25
#'
26
#'         \code{mo.method}    a string value indicating the method used for multi-omics integrative clustering
27
#' @importFrom ClassDiscovery distanceMatrix
28
#' @importFrom grDevices pdf dev.off colorRampPalette
29
#' @importFrom ComplexHeatmap pheatmap
30
#' @import cluster
31
#' @export
32
#' @examples # There is no example and please refer to vignette.
33
#' @references Gu Z, Eils R, Schlesner M (2016). Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics, 32(18):2847-2849.
34
getConsensusMOIC <- function(moic.res.list = NULL,
35
                             distance      = "euclidean",
36
                             linkage       = "ward.D",
37
                             mapcolor      = c("#000004FF", "#56106EFF", "#BB3754FF", "#F98C0AFF", "#FCFFA4FF"),
38
                             clust.col     = c("#2EC4B6", "#E71D36", "#FF9F1C", "#BDD5EA", "#FFA5AB", "#011627","#023E8A","#9D4EDD"),
39
                             showID        = FALSE,
40
                             fig.path      = getwd(),
41
                             fig.name      = "consensusheatmap",
42
                             width         = 5.5,
43
                             height        = 5) {
44
45
  if(!is.list(moic.res.list)) {stop("a list of clust.res returned by getMOIC() should be provided.")}
46
47
  if(length(moic.res.list) < 2) {stop("at least two multi-omics clustering methods should be included.")}
48
49
  moic.res <- lapply(moic.res.list, function(x) x$clust.res)
50
51
  if(!length(unique(sapply(moic.res, function(x) {length(unique(x$clust))}))) == 1) {
52
    stop("clustering number mismatched across different algorithms!")
53
  }
54
55
  N.clust <- length(unique(moic.res[[1]]$clust))
56
  colvec <- clust.col[1:N.clust]
57
  names(colvec) <- paste0("CS",1:N.clust)
58
59
  fixed.sam <- moic.res[[1]]$samID
60
61
  consensus.matrix <- matrix(0, nrow=length(fixed.sam), ncol=length(fixed.sam))
62
  for (i in 1:length(moic.res)) {
63
    res <- moic.res[[i]]
64
    grpInfo <- res$clust; names(grpInfo) <- res$samID
65
66
    ans = as.data.frame(matrix(0, nrow=length(fixed.sam), ncol=length(fixed.sam)))
67
    rownames(ans) = colnames(ans) <- fixed.sam
68
    unique.grp = unique(grpInfo)
69
70
    for (k in 1:length(unique.grp)){
71
      grpK.members = names(grpInfo)[grpInfo==unique.grp[k]]
72
      ans[grpK.members, grpK.members] = 1
73
    }
74
    consensus.matrix <- consensus.matrix + as.matrix(ans)
75
  }
76
77
  similarity.matrix <- as.data.frame(consensus.matrix/length(moic.res))
78
  # hcs <- hclust(vegdist(as.matrix(1-similarity.matrix), method = "jaccard"), "ward.D")
79
  hcs <- hclust(ClassDiscovery::distanceMatrix(as.matrix(1-similarity.matrix), distance), linkage)
80
  coca.moic <- cutree(hcs, N.clust)
81
82
  clustres <- data.frame(samID = fixed.sam,
83
                         clust = as.numeric(coca.moic),
84
                         row.names = fixed.sam,
85
                         stringsAsFactors = FALSE)
86
  #clustres <- clustres[order(clustres$clust),]
87
88
  annCol <- data.frame("Subtype" = paste0("CS", clustres[fixed.sam,"clust"]), # consensus multi-omics integrative clustering
89
                       row.names = fixed.sam)
90
  annColors <- list("Subtype" = colvec)
91
92
  # save to pdf
93
  outFig <- paste0(fig.name,".pdf")
94
  hm <- ComplexHeatmap::pheatmap(mat               = as.matrix(similarity.matrix),
95
                                 cluster_rows      = hcs,
96
                                 cluster_cols      = hcs,
97
                                 border_color      = NA,
98
                                 show_colnames     = showID,
99
                                 show_rownames     = showID,
100
                                 annotation_col    = annCol,
101
                                 annotation_colors = annColors,
102
                                 legend_breaks     = c(0,0.2,0.4,0.6,0.8,1),
103
                                 legend_labels     = c(0,0.2,0.4,0.6,0.8,1),
104
                                 color             = grDevices::colorRampPalette(mapcolor)(64))
105
106
  pdf(file.path(fig.path, outFig), width = width, height = height)
107
  draw(hm)
108
  invisible(dev.off())
109
110
  # print to screen
111
  draw(hm)
112
113
  # silhoutte
114
  similarity_matrix <- as.matrix(similarity.matrix)
115
  similarity_matrix <- (similarity_matrix+t(similarity_matrix))/2
116
  diag(similarity_matrix) <- 0
117
  normalize <- function(X) X / rowSums(X)
118
  similarity_matrix <- normalize(similarity_matrix)
119
120
  cluster_id <- 1:N.clust
121
  sil <- matrix(NA, length(coca.moic), 3, dimnames = list(names(coca.moic), c("cluster","neighbor","sil_width")))
122
  for(j in 1:N.clust) {
123
    index <- (coca.moic == cluster_id[j])
124
    Nj <- sum(index)
125
    sil[index, "cluster"] <- cluster_id[j]
126
    dindex <- rbind(apply(similarity_matrix[!index, index, drop = FALSE], 2,
127
                          function(r) tapply(r, coca.moic[!index], mean)))
128
    maxC <- apply(dindex, 2, which.max)
129
    sil[index,"neighbor"] <- cluster_id[-j][maxC]
130
    s.i <- if(Nj > 1) {
131
      a.i <- colSums(similarity_matrix[index, index])/(Nj - 1)
132
      b.i <- dindex[cbind(maxC, seq(along = maxC))]
133
      ifelse(a.i != b.i, (a.i - b.i) / pmax(b.i, a.i), 0)
134
    } else {0}
135
    sil[index,"sil_width"] <- s.i
136
  }
137
  attr(sil, "Ordered") <- FALSE
138
  class(sil) <- "silhouette"
139
140
  return(list(consensus.hm = hm, similarity.matrix = similarity.matrix, sil = sil, clust.res = clustres, clust.dend = hcs, mo.method = "consensusMOIC"))
141
}