a b/R/getiClusterBayes.R
1
#' @name getiClusterBayes
2
#' @title Get subtypes from iClusterBayes
3
#' @description This function wraps the iClusterBayes (Integrative clustering by Bayesian latent variable model) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices with maximum of 6 subdatasets.
5
#' @param N.clust Number of clusters.
6
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
7
#' @param n.burnin An integer value to indicate the number of MCMC burnin.
8
#' @param n.draw An integer value to indicate the number of MCMC draw.
9
#' @param prior.gamma A numerical vector to indicate the prior probability for the indicator variable gamma of each subdataset.
10
#' @param sdev A numerical value to indicate the standard deviation of random walk proposal for the latent variable.
11
#' @param thin A numerical value to thin the MCMC chain in order to reduce autocorrelation.
12
#' @export
13
#' @return A list with the following components:
14
#'
15
#'         \code{fit}       an object returned by \link[iClusterPlus]{iClusterBayes}.
16
#'
17
#'         \code{clust.res} a data.frame storing sample ID and corresponding clusters.
18
#'
19
#'         \code{feat.res}  the results of features selection process.
20
#'
21
#'         \code{mo.method} a string value indicating the method used for multi-omics integrative clustering.
22
#' @import iClusterPlus
23
#' @importFrom dplyr %>%
24
#' @examples # There is no example and please refer to vignette.
25
#' @references Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG (2018). A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics, 19(1):71-86.
26
getiClusterBayes <- function(data        = NULL,
27
                             N.clust     = NULL,
28
                             type        = rep("gaussian", length(data)),
29
                             n.burnin    = 18000,
30
                             n.draw      = 12000,
31
                             prior.gamma = rep(0.5,length(data)),
32
                             sdev        = 0.05,
33
                             thin        = 3) {
34
35
  # check data
36
  n_dat <- length(data)
37
  if(n_dat > 6){
38
    stop('current verision of MOVICS can support up to 6 datasets.')
39
  }
40
  if(n_dat < 2){
41
    stop('current verision of MOVICS needs at least 2 omics data.')
42
  }
43
44
  # remove features that made of categories not equal to 2
45
  if(is.element("binomial",type)) {
46
    bindex <- which(type == "binomial")
47
    for (i in bindex) {
48
      a <- which(rowSums(data[[i]]) == 0)
49
      b <- which(rowSums(data[[i]]) == ncol(data[[i]]))
50
      if(length(a) > 0) {
51
        data[[i]] <- data[[i]][which(rowSums(data[[i]]) != 0),] # remove all zero
52
      }
53
54
      if(length(b) > 0) {
55
        data[[i]] <- data[[i]][which(rowSums(data[[i]]) != ncol(data[[i]])),] # remove all one
56
      }
57
58
      if(length(a) + length(b) > 0) {
59
        message(paste0("--", names(data)[i],": a total of ",length(a) + length(b), " features were removed due to the categories were not equal to 2!"))
60
      }
61
    }
62
  }
63
64
  data.backup <- data
65
  data <- lapply(data, t)
66
67
  if(n_dat == 1){
68
    res <- iClusterBayes(dt1         = data[[1]],
69
                         type        = type,
70
                         K           = N.clust - 1,
71
                         n.burnin    = n.burnin,
72
                         n.draw      = n.draw,
73
                         prior.gamma = prior.gamma,
74
                         sdev        = sdev,
75
                         thin        = thin)
76
  }
77
  if(n_dat == 2){
78
    res <- iClusterBayes(dt1         = data[[1]],
79
                         dt2         = data[[2]],
80
                         type        = type,
81
                         K           = N.clust - 1,
82
                         n.burnin    = n.burnin,
83
                         n.draw      = n.draw,
84
                         prior.gamma = prior.gamma,
85
                         sdev        = sdev,
86
                         thin        = thin)
87
  }
88
  if(n_dat == 3){
89
    res <- iClusterBayes(dt1         = data[[1]],
90
                         dt2         = data[[2]],
91
                         dt3         = data[[3]],
92
                         type        = type,
93
                         K           = N.clust - 1,
94
                         n.burnin    = n.burnin,
95
                         n.draw      = n.draw,
96
                         prior.gamma = prior.gamma,
97
                         sdev        = sdev,
98
                         thin        = thin)
99
  }
100
  if(n_dat == 4){
101
    res <- iClusterBayes(dt1         = data[[1]],
102
                         dt2         = data[[2]],
103
                         dt3         = data[[3]],
104
                         dt4         = data[[4]],
105
                         type        = type,
106
                         K           = N.clust - 1,
107
                         n.burnin    = n.burnin,
108
                         n.draw      = n.draw,
109
                         prior.gamma = prior.gamma,
110
                         sdev        = sdev,
111
                         thin        = thin)
112
  }
113
  if(n_dat == 5){
114
    res <- iClusterBayes(dt1         = data[[1]],
115
                         dt2         = data[[2]],
116
                         dt3         = data[[3]],
117
                         dt4         = data[[4]],
118
                         dt5         = data[[5]],
119
                         type        = type,
120
                         K           = N.clust - 1,
121
                         n.burnin    = n.burnin,
122
                         n.draw      = n.draw,
123
                         prior.gamma = prior.gamma,
124
                         sdev        = sdev,
125
                         thin        = thin)
126
  }
127
  if(n_dat == 6){
128
    res <- iClusterBayes(dt1         = data[[1]],
129
                         dt2         = data[[2]],
130
                         dt3         = data[[3]],
131
                         dt4         = data[[4]],
132
                         dt5         = data[[5]],
133
                         dt6         = data[[6]],
134
                         type        = type,
135
                         K           = N.clust - 1,
136
                         n.burnin    = n.burnin,
137
                         n.draw      = n.draw,
138
                         prior.gamma = prior.gamma,
139
                         sdev        = sdev,
140
                         thin        = thin)
141
  }
142
  message("clustering done...")
143
  clustres <- data.frame(samID = rownames(data[[1]]),
144
                         clust = res$clusters,
145
                         row.names = rownames(data[[1]]),
146
                         stringsAsFactors = FALSE)
147
  #clustres <- clustres[order(clustres$clust),]
148
149
  featres <- data.frame(feature = as.character(unlist(lapply(data.backup, rownames))),
150
                        dataset = rep(names(data),as.numeric(sapply(data.backup, function(x) length(rownames(x))))),
151
                        post.prob = unlist(res$beta.pp),
152
                        stringsAsFactors = FALSE)
153
154
  feat.res <- NULL
155
  for (d in unique(featres$dataset)) {
156
    tmp <- featres[which(featres$dataset == d),]
157
    tmp <- tmp[order(tmp$post.prob, decreasing = TRUE),]
158
    tmp$rank <- 1:nrow(tmp)
159
    feat.res <- rbind.data.frame(feat.res,tmp)
160
  }
161
  message("feature selection done...")
162
163
  return(list(fit = res, clust.res = clustres, feat.res = feat.res, mo.method = "iClusterBayes"))
164
}