Diff of /R/getiClusterBayes.R [000000] .. [494cbf]

Switch to side-by-side view

--- a
+++ b/R/getiClusterBayes.R
@@ -0,0 +1,164 @@
+#' @name getiClusterBayes
+#' @title Get subtypes from iClusterBayes
+#' @description This function wraps the iClusterBayes (Integrative clustering by Bayesian latent variable model) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
+#' @param data List of matrices with maximum of 6 subdatasets.
+#' @param N.clust Number of clusters.
+#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
+#' @param n.burnin An integer value to indicate the number of MCMC burnin.
+#' @param n.draw An integer value to indicate the number of MCMC draw.
+#' @param prior.gamma A numerical vector to indicate the prior probability for the indicator variable gamma of each subdataset.
+#' @param sdev A numerical value to indicate the standard deviation of random walk proposal for the latent variable.
+#' @param thin A numerical value to thin the MCMC chain in order to reduce autocorrelation.
+#' @export
+#' @return A list with the following components:
+#'
+#'         \code{fit}       an object returned by \link[iClusterPlus]{iClusterBayes}.
+#'
+#'         \code{clust.res} a data.frame storing sample ID and corresponding clusters.
+#'
+#'         \code{feat.res}  the results of features selection process.
+#'
+#'         \code{mo.method} a string value indicating the method used for multi-omics integrative clustering.
+#' @import iClusterPlus
+#' @importFrom dplyr %>%
+#' @examples # There is no example and please refer to vignette.
+#' @references Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG (2018). A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics, 19(1):71-86.
+getiClusterBayes <- function(data        = NULL,
+                             N.clust     = NULL,
+                             type        = rep("gaussian", length(data)),
+                             n.burnin    = 18000,
+                             n.draw      = 12000,
+                             prior.gamma = rep(0.5,length(data)),
+                             sdev        = 0.05,
+                             thin        = 3) {
+
+  # check data
+  n_dat <- length(data)
+  if(n_dat > 6){
+    stop('current verision of MOVICS can support up to 6 datasets.')
+  }
+  if(n_dat < 2){
+    stop('current verision of MOVICS needs at least 2 omics data.')
+  }
+
+  # remove features that made of categories not equal to 2
+  if(is.element("binomial",type)) {
+    bindex <- which(type == "binomial")
+    for (i in bindex) {
+      a <- which(rowSums(data[[i]]) == 0)
+      b <- which(rowSums(data[[i]]) == ncol(data[[i]]))
+      if(length(a) > 0) {
+        data[[i]] <- data[[i]][which(rowSums(data[[i]]) != 0),] # remove all zero
+      }
+
+      if(length(b) > 0) {
+        data[[i]] <- data[[i]][which(rowSums(data[[i]]) != ncol(data[[i]])),] # remove all one
+      }
+
+      if(length(a) + length(b) > 0) {
+        message(paste0("--", names(data)[i],": a total of ",length(a) + length(b), " features were removed due to the categories were not equal to 2!"))
+      }
+    }
+  }
+
+  data.backup <- data
+  data <- lapply(data, t)
+
+  if(n_dat == 1){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  if(n_dat == 2){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         dt2         = data[[2]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  if(n_dat == 3){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         dt2         = data[[2]],
+                         dt3         = data[[3]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  if(n_dat == 4){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         dt2         = data[[2]],
+                         dt3         = data[[3]],
+                         dt4         = data[[4]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  if(n_dat == 5){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         dt2         = data[[2]],
+                         dt3         = data[[3]],
+                         dt4         = data[[4]],
+                         dt5         = data[[5]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  if(n_dat == 6){
+    res <- iClusterBayes(dt1         = data[[1]],
+                         dt2         = data[[2]],
+                         dt3         = data[[3]],
+                         dt4         = data[[4]],
+                         dt5         = data[[5]],
+                         dt6         = data[[6]],
+                         type        = type,
+                         K           = N.clust - 1,
+                         n.burnin    = n.burnin,
+                         n.draw      = n.draw,
+                         prior.gamma = prior.gamma,
+                         sdev        = sdev,
+                         thin        = thin)
+  }
+  message("clustering done...")
+  clustres <- data.frame(samID = rownames(data[[1]]),
+                         clust = res$clusters,
+                         row.names = rownames(data[[1]]),
+                         stringsAsFactors = FALSE)
+  #clustres <- clustres[order(clustres$clust),]
+
+  featres <- data.frame(feature = as.character(unlist(lapply(data.backup, rownames))),
+                        dataset = rep(names(data),as.numeric(sapply(data.backup, function(x) length(rownames(x))))),
+                        post.prob = unlist(res$beta.pp),
+                        stringsAsFactors = FALSE)
+
+  feat.res <- NULL
+  for (d in unique(featres$dataset)) {
+    tmp <- featres[which(featres$dataset == d),]
+    tmp <- tmp[order(tmp$post.prob, decreasing = TRUE),]
+    tmp$rank <- 1:nrow(tmp)
+    feat.res <- rbind.data.frame(feat.res,tmp)
+  }
+  message("feature selection done...")
+
+  return(list(fit = res, clust.res = clustres, feat.res = feat.res, mo.method = "iClusterBayes"))
+}