MOVICS / Git / Diff of /R/getConsensusClustering.R

Models:
AlyssaS/
MOVICS
Downloads: 1
Diff of /R/getConsensusClustering.R [000000] .. [494cbf]
Switch to side-by-side view

--- a
+++ b/R/getConsensusClustering.R
@@ -0,0 +1,107 @@
+#' @name getConsensusClustering
+#' @title Get subtypes from ConsensusClustering
+#' @description This function wraps the Consensus Clustering algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
+#' @param data List of matrices.
+#' @param N.clust Number of clusters.
+#' @param norMethod A string vector indicate the normalization method for consensus clustering.
+#' @param reps An integer value to indicate the number of subsamples.
+#' @param pItem A numerical value to indicate the proportion of items to sample.
+#' @param pFeature A numerical value to indicate the proportion of features to sample.
+#' @param clusterAlg A string value to indicate the cluster algorithm.
+#' @param innerLinkage A string value to indicate the heirachical linakge method for subsampling.
+#' @param finalLinkage A string value to indicate the heirarchical method for consensus matrix.
+#' @param distance A string value to indicate the distance function.
+#' @param plot A string value to indicate the output format for heatmap.
+#' @param writeTable A logical value to indicate if writing output and log to csv.
+#' @param title A string value for output directory.
+#' @param seed A numerical value to set random seed for reproducible results.
+#' @param verbose A logical value to indicate if printing messages to the screen to indicate progress.
+#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
+#' @return A list with the following components:
+#'
+#'         \code{fit}        an object returned by \link[ConsensusClusterPlus]{ConsensusClusterPlus}.
+#'
+#'         \code{clust.res}  a data.frame storing sample ID and corresponding clusters.
+#'
+#'         \code{clust.dend} a dendrogram of sample clustering.
+#'
+#'         \code{mo.method}  a string value indicating the method used for multi-omics integrative clustering.
+#' @export
+#' @examples # There is no example and please refer to vignette.
+#' @import ConsensusClusterPlus
+#' @importFrom dplyr %>%
+#' @references Monti S, Tamayo P, Mesirov J, et al (2003). Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Mach Learn, 52:91-118.
+getConsensusClustering <- function(data         = NULL,
+                                   N.clust      = NULL,
+                                   type         = rep("gaussian", length(data)),
+                                   norMethod    = "none",
+                                   reps         = 500,
+                                   pItem        = 0.8,
+                                   pFeature     = 0.8,
+                                   clusterAlg   = "hc",
+                                   innerLinkage = "ward.D",
+                                   finalLinkage = "ward.D",
+                                   distance     = "pearson",
+                                   plot         = NULL,
+                                   writeTable   = F,
+                                   title        = file.path(getwd(),"consensuscluster"),
+                                   seed         = 123456,
+                                   verbose      = F){
+
+  # check data
+  n_dat <- length(data)
+  if(n_dat > 6){
+    stop('current verision of MOVICS can support up to 6 datasets.')
+  }
+  if(n_dat < 2){
+    stop('current verision of MOVICS needs at least 2 omics data.')
+  }
+
+  useless.argument <- type
+  if(is.null(norMethod)) {
+    d <- do.call(rbind, data)
+  } else {
+    if(!is.element(norMethod, c("median-centered","mean-centered","z-score","none"))) {
+      stop("the normalized method should be one of median-centered, mean-centered, z-score or none!")
+    }
+    if(norMethod == "median-centered") {
+      d <- do.call(rbind, data)
+      d <- sweep(d,1, apply(d,1,median,na.rm=T))
+    }
+    if(norMethod == "mean-centered") {
+      d <- do.call(rbind, data)
+      d <- sweep(d,1, apply(d,1,mean,na.rm=T))
+    }
+    if(norMethod == "z-score") {
+      d <- do.call(rbind, data)
+      d <- t(scale(t(d)))
+    }
+    if(norMethod == "none") {
+      d <- do.call(rbind, data)
+    }
+  }
+
+    fit <-  ConsensusClusterPlus(d            = as.matrix(d),
+                                 maxK         = ifelse(N.clust == 2, 3, N.clust), # cannot set as 2
+                                 reps         = reps,
+                                 pItem        = pItem,
+                                 pFeature     = pFeature,
+                                 clusterAlg   = clusterAlg,
+                                 innerLinkage = innerLinkage,
+                                 finalLinkage = finalLinkage,
+                                 distance     = distance,
+                                 seed         = seed,
+                                 verbose      = verbose,
+                                 plot         = plot,
+                                 writeTable   = writeTable,
+                                 title        = title)
+  res <- fit[[N.clust]]
+
+  clustres <- data.frame(samID = colnames(data[[1]]),
+                         clust = as.numeric(res$consensusClass),
+                         row.names = colnames(data[[1]]),
+                         stringsAsFactors = F)
+  #clustres <- clustres[order(clustres$clust),]
+
+  return(list(fit = fit, clust.res = clustres, clust.dend = res$consensusTree, mo.method = "ConsensusClustering"))
+}