Switch to unified view

a b/R/getConsensusClustering.R
1
#' @name getConsensusClustering
2
#' @title Get subtypes from ConsensusClustering
3
#' @description This function wraps the Consensus Clustering algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices.
5
#' @param N.clust Number of clusters.
6
#' @param norMethod A string vector indicate the normalization method for consensus clustering.
7
#' @param reps An integer value to indicate the number of subsamples.
8
#' @param pItem A numerical value to indicate the proportion of items to sample.
9
#' @param pFeature A numerical value to indicate the proportion of features to sample.
10
#' @param clusterAlg A string value to indicate the cluster algorithm.
11
#' @param innerLinkage A string value to indicate the heirachical linakge method for subsampling.
12
#' @param finalLinkage A string value to indicate the heirarchical method for consensus matrix.
13
#' @param distance A string value to indicate the distance function.
14
#' @param plot A string value to indicate the output format for heatmap.
15
#' @param writeTable A logical value to indicate if writing output and log to csv.
16
#' @param title A string value for output directory.
17
#' @param seed A numerical value to set random seed for reproducible results.
18
#' @param verbose A logical value to indicate if printing messages to the screen to indicate progress.
19
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
20
#' @return A list with the following components:
21
#'
22
#'         \code{fit}        an object returned by \link[ConsensusClusterPlus]{ConsensusClusterPlus}.
23
#'
24
#'         \code{clust.res}  a data.frame storing sample ID and corresponding clusters.
25
#'
26
#'         \code{clust.dend} a dendrogram of sample clustering.
27
#'
28
#'         \code{mo.method}  a string value indicating the method used for multi-omics integrative clustering.
29
#' @export
30
#' @examples # There is no example and please refer to vignette.
31
#' @import ConsensusClusterPlus
32
#' @importFrom dplyr %>%
33
#' @references Monti S, Tamayo P, Mesirov J, et al (2003). Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Mach Learn, 52:91-118.
34
getConsensusClustering <- function(data         = NULL,
35
                                   N.clust      = NULL,
36
                                   type         = rep("gaussian", length(data)),
37
                                   norMethod    = "none",
38
                                   reps         = 500,
39
                                   pItem        = 0.8,
40
                                   pFeature     = 0.8,
41
                                   clusterAlg   = "hc",
42
                                   innerLinkage = "ward.D",
43
                                   finalLinkage = "ward.D",
44
                                   distance     = "pearson",
45
                                   plot         = NULL,
46
                                   writeTable   = F,
47
                                   title        = file.path(getwd(),"consensuscluster"),
48
                                   seed         = 123456,
49
                                   verbose      = F){
50
51
  # check data
52
  n_dat <- length(data)
53
  if(n_dat > 6){
54
    stop('current verision of MOVICS can support up to 6 datasets.')
55
  }
56
  if(n_dat < 2){
57
    stop('current verision of MOVICS needs at least 2 omics data.')
58
  }
59
60
  useless.argument <- type
61
  if(is.null(norMethod)) {
62
    d <- do.call(rbind, data)
63
  } else {
64
    if(!is.element(norMethod, c("median-centered","mean-centered","z-score","none"))) {
65
      stop("the normalized method should be one of median-centered, mean-centered, z-score or none!")
66
    }
67
    if(norMethod == "median-centered") {
68
      d <- do.call(rbind, data)
69
      d <- sweep(d,1, apply(d,1,median,na.rm=T))
70
    }
71
    if(norMethod == "mean-centered") {
72
      d <- do.call(rbind, data)
73
      d <- sweep(d,1, apply(d,1,mean,na.rm=T))
74
    }
75
    if(norMethod == "z-score") {
76
      d <- do.call(rbind, data)
77
      d <- t(scale(t(d)))
78
    }
79
    if(norMethod == "none") {
80
      d <- do.call(rbind, data)
81
    }
82
  }
83
84
    fit <-  ConsensusClusterPlus(d            = as.matrix(d),
85
                                 maxK         = ifelse(N.clust == 2, 3, N.clust), # cannot set as 2
86
                                 reps         = reps,
87
                                 pItem        = pItem,
88
                                 pFeature     = pFeature,
89
                                 clusterAlg   = clusterAlg,
90
                                 innerLinkage = innerLinkage,
91
                                 finalLinkage = finalLinkage,
92
                                 distance     = distance,
93
                                 seed         = seed,
94
                                 verbose      = verbose,
95
                                 plot         = plot,
96
                                 writeTable   = writeTable,
97
                                 title        = title)
98
  res <- fit[[N.clust]]
99
100
  clustres <- data.frame(samID = colnames(data[[1]]),
101
                         clust = as.numeric(res$consensusClass),
102
                         row.names = colnames(data[[1]]),
103
                         stringsAsFactors = F)
104
  #clustres <- clustres[order(clustres$clust),]
105
106
  return(list(fit = fit, clust.res = clustres, clust.dend = res$consensusTree, mo.method = "ConsensusClustering"))
107
}