[494cbf]: / R / getiClusterBayes.R

Download this file

165 lines (155 with data), 7.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#' @name getiClusterBayes
#' @title Get subtypes from iClusterBayes
#' @description This function wraps the iClusterBayes (Integrative clustering by Bayesian latent variable model) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
#' @param data List of matrices with maximum of 6 subdatasets.
#' @param N.clust Number of clusters.
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
#' @param n.burnin An integer value to indicate the number of MCMC burnin.
#' @param n.draw An integer value to indicate the number of MCMC draw.
#' @param prior.gamma A numerical vector to indicate the prior probability for the indicator variable gamma of each subdataset.
#' @param sdev A numerical value to indicate the standard deviation of random walk proposal for the latent variable.
#' @param thin A numerical value to thin the MCMC chain in order to reduce autocorrelation.
#' @export
#' @return A list with the following components:
#'
#' \code{fit} an object returned by \link[iClusterPlus]{iClusterBayes}.
#'
#' \code{clust.res} a data.frame storing sample ID and corresponding clusters.
#'
#' \code{feat.res} the results of features selection process.
#'
#' \code{mo.method} a string value indicating the method used for multi-omics integrative clustering.
#' @import iClusterPlus
#' @importFrom dplyr %>%
#' @examples # There is no example and please refer to vignette.
#' @references Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG (2018). A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics, 19(1):71-86.
getiClusterBayes <- function(data = NULL,
N.clust = NULL,
type = rep("gaussian", length(data)),
n.burnin = 18000,
n.draw = 12000,
prior.gamma = rep(0.5,length(data)),
sdev = 0.05,
thin = 3) {
# check data
n_dat <- length(data)
if(n_dat > 6){
stop('current verision of MOVICS can support up to 6 datasets.')
}
if(n_dat < 2){
stop('current verision of MOVICS needs at least 2 omics data.')
}
# remove features that made of categories not equal to 2
if(is.element("binomial",type)) {
bindex <- which(type == "binomial")
for (i in bindex) {
a <- which(rowSums(data[[i]]) == 0)
b <- which(rowSums(data[[i]]) == ncol(data[[i]]))
if(length(a) > 0) {
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != 0),] # remove all zero
}
if(length(b) > 0) {
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != ncol(data[[i]])),] # remove all one
}
if(length(a) + length(b) > 0) {
message(paste0("--", names(data)[i],": a total of ",length(a) + length(b), " features were removed due to the categories were not equal to 2!"))
}
}
}
data.backup <- data
data <- lapply(data, t)
if(n_dat == 1){
res <- iClusterBayes(dt1 = data[[1]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
if(n_dat == 2){
res <- iClusterBayes(dt1 = data[[1]],
dt2 = data[[2]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
if(n_dat == 3){
res <- iClusterBayes(dt1 = data[[1]],
dt2 = data[[2]],
dt3 = data[[3]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
if(n_dat == 4){
res <- iClusterBayes(dt1 = data[[1]],
dt2 = data[[2]],
dt3 = data[[3]],
dt4 = data[[4]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
if(n_dat == 5){
res <- iClusterBayes(dt1 = data[[1]],
dt2 = data[[2]],
dt3 = data[[3]],
dt4 = data[[4]],
dt5 = data[[5]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
if(n_dat == 6){
res <- iClusterBayes(dt1 = data[[1]],
dt2 = data[[2]],
dt3 = data[[3]],
dt4 = data[[4]],
dt5 = data[[5]],
dt6 = data[[6]],
type = type,
K = N.clust - 1,
n.burnin = n.burnin,
n.draw = n.draw,
prior.gamma = prior.gamma,
sdev = sdev,
thin = thin)
}
message("clustering done...")
clustres <- data.frame(samID = rownames(data[[1]]),
clust = res$clusters,
row.names = rownames(data[[1]]),
stringsAsFactors = FALSE)
#clustres <- clustres[order(clustres$clust),]
featres <- data.frame(feature = as.character(unlist(lapply(data.backup, rownames))),
dataset = rep(names(data),as.numeric(sapply(data.backup, function(x) length(rownames(x))))),
post.prob = unlist(res$beta.pp),
stringsAsFactors = FALSE)
feat.res <- NULL
for (d in unique(featres$dataset)) {
tmp <- featres[which(featres$dataset == d),]
tmp <- tmp[order(tmp$post.prob, decreasing = TRUE),]
tmp$rank <- 1:nrow(tmp)
feat.res <- rbind.data.frame(feat.res,tmp)
}
message("feature selection done...")
return(list(fit = res, clust.res = clustres, feat.res = feat.res, mo.method = "iClusterBayes"))
}