|
a |
|
b/R/getiClusterBayes.R |
|
|
1 |
#' @name getiClusterBayes |
|
|
2 |
#' @title Get subtypes from iClusterBayes |
|
|
3 |
#' @description This function wraps the iClusterBayes (Integrative clustering by Bayesian latent variable model) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`. |
|
|
4 |
#' @param data List of matrices with maximum of 6 subdatasets. |
|
|
5 |
#' @param N.clust Number of clusters. |
|
|
6 |
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion. |
|
|
7 |
#' @param n.burnin An integer value to indicate the number of MCMC burnin. |
|
|
8 |
#' @param n.draw An integer value to indicate the number of MCMC draw. |
|
|
9 |
#' @param prior.gamma A numerical vector to indicate the prior probability for the indicator variable gamma of each subdataset. |
|
|
10 |
#' @param sdev A numerical value to indicate the standard deviation of random walk proposal for the latent variable. |
|
|
11 |
#' @param thin A numerical value to thin the MCMC chain in order to reduce autocorrelation. |
|
|
12 |
#' @export |
|
|
13 |
#' @return A list with the following components: |
|
|
14 |
#' |
|
|
15 |
#' \code{fit} an object returned by \link[iClusterPlus]{iClusterBayes}. |
|
|
16 |
#' |
|
|
17 |
#' \code{clust.res} a data.frame storing sample ID and corresponding clusters. |
|
|
18 |
#' |
|
|
19 |
#' \code{feat.res} the results of features selection process. |
|
|
20 |
#' |
|
|
21 |
#' \code{mo.method} a string value indicating the method used for multi-omics integrative clustering. |
|
|
22 |
#' @import iClusterPlus |
|
|
23 |
#' @importFrom dplyr %>% |
|
|
24 |
#' @examples # There is no example and please refer to vignette. |
|
|
25 |
#' @references Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG (2018). A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics, 19(1):71-86. |
|
|
26 |
getiClusterBayes <- function(data = NULL, |
|
|
27 |
N.clust = NULL, |
|
|
28 |
type = rep("gaussian", length(data)), |
|
|
29 |
n.burnin = 18000, |
|
|
30 |
n.draw = 12000, |
|
|
31 |
prior.gamma = rep(0.5,length(data)), |
|
|
32 |
sdev = 0.05, |
|
|
33 |
thin = 3) { |
|
|
34 |
|
|
|
35 |
# check data |
|
|
36 |
n_dat <- length(data) |
|
|
37 |
if(n_dat > 6){ |
|
|
38 |
stop('current verision of MOVICS can support up to 6 datasets.') |
|
|
39 |
} |
|
|
40 |
if(n_dat < 2){ |
|
|
41 |
stop('current verision of MOVICS needs at least 2 omics data.') |
|
|
42 |
} |
|
|
43 |
|
|
|
44 |
# remove features that made of categories not equal to 2 |
|
|
45 |
if(is.element("binomial",type)) { |
|
|
46 |
bindex <- which(type == "binomial") |
|
|
47 |
for (i in bindex) { |
|
|
48 |
a <- which(rowSums(data[[i]]) == 0) |
|
|
49 |
b <- which(rowSums(data[[i]]) == ncol(data[[i]])) |
|
|
50 |
if(length(a) > 0) { |
|
|
51 |
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != 0),] # remove all zero |
|
|
52 |
} |
|
|
53 |
|
|
|
54 |
if(length(b) > 0) { |
|
|
55 |
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != ncol(data[[i]])),] # remove all one |
|
|
56 |
} |
|
|
57 |
|
|
|
58 |
if(length(a) + length(b) > 0) { |
|
|
59 |
message(paste0("--", names(data)[i],": a total of ",length(a) + length(b), " features were removed due to the categories were not equal to 2!")) |
|
|
60 |
} |
|
|
61 |
} |
|
|
62 |
} |
|
|
63 |
|
|
|
64 |
data.backup <- data |
|
|
65 |
data <- lapply(data, t) |
|
|
66 |
|
|
|
67 |
if(n_dat == 1){ |
|
|
68 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
69 |
type = type, |
|
|
70 |
K = N.clust - 1, |
|
|
71 |
n.burnin = n.burnin, |
|
|
72 |
n.draw = n.draw, |
|
|
73 |
prior.gamma = prior.gamma, |
|
|
74 |
sdev = sdev, |
|
|
75 |
thin = thin) |
|
|
76 |
} |
|
|
77 |
if(n_dat == 2){ |
|
|
78 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
79 |
dt2 = data[[2]], |
|
|
80 |
type = type, |
|
|
81 |
K = N.clust - 1, |
|
|
82 |
n.burnin = n.burnin, |
|
|
83 |
n.draw = n.draw, |
|
|
84 |
prior.gamma = prior.gamma, |
|
|
85 |
sdev = sdev, |
|
|
86 |
thin = thin) |
|
|
87 |
} |
|
|
88 |
if(n_dat == 3){ |
|
|
89 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
90 |
dt2 = data[[2]], |
|
|
91 |
dt3 = data[[3]], |
|
|
92 |
type = type, |
|
|
93 |
K = N.clust - 1, |
|
|
94 |
n.burnin = n.burnin, |
|
|
95 |
n.draw = n.draw, |
|
|
96 |
prior.gamma = prior.gamma, |
|
|
97 |
sdev = sdev, |
|
|
98 |
thin = thin) |
|
|
99 |
} |
|
|
100 |
if(n_dat == 4){ |
|
|
101 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
102 |
dt2 = data[[2]], |
|
|
103 |
dt3 = data[[3]], |
|
|
104 |
dt4 = data[[4]], |
|
|
105 |
type = type, |
|
|
106 |
K = N.clust - 1, |
|
|
107 |
n.burnin = n.burnin, |
|
|
108 |
n.draw = n.draw, |
|
|
109 |
prior.gamma = prior.gamma, |
|
|
110 |
sdev = sdev, |
|
|
111 |
thin = thin) |
|
|
112 |
} |
|
|
113 |
if(n_dat == 5){ |
|
|
114 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
115 |
dt2 = data[[2]], |
|
|
116 |
dt3 = data[[3]], |
|
|
117 |
dt4 = data[[4]], |
|
|
118 |
dt5 = data[[5]], |
|
|
119 |
type = type, |
|
|
120 |
K = N.clust - 1, |
|
|
121 |
n.burnin = n.burnin, |
|
|
122 |
n.draw = n.draw, |
|
|
123 |
prior.gamma = prior.gamma, |
|
|
124 |
sdev = sdev, |
|
|
125 |
thin = thin) |
|
|
126 |
} |
|
|
127 |
if(n_dat == 6){ |
|
|
128 |
res <- iClusterBayes(dt1 = data[[1]], |
|
|
129 |
dt2 = data[[2]], |
|
|
130 |
dt3 = data[[3]], |
|
|
131 |
dt4 = data[[4]], |
|
|
132 |
dt5 = data[[5]], |
|
|
133 |
dt6 = data[[6]], |
|
|
134 |
type = type, |
|
|
135 |
K = N.clust - 1, |
|
|
136 |
n.burnin = n.burnin, |
|
|
137 |
n.draw = n.draw, |
|
|
138 |
prior.gamma = prior.gamma, |
|
|
139 |
sdev = sdev, |
|
|
140 |
thin = thin) |
|
|
141 |
} |
|
|
142 |
message("clustering done...") |
|
|
143 |
clustres <- data.frame(samID = rownames(data[[1]]), |
|
|
144 |
clust = res$clusters, |
|
|
145 |
row.names = rownames(data[[1]]), |
|
|
146 |
stringsAsFactors = FALSE) |
|
|
147 |
#clustres <- clustres[order(clustres$clust),] |
|
|
148 |
|
|
|
149 |
featres <- data.frame(feature = as.character(unlist(lapply(data.backup, rownames))), |
|
|
150 |
dataset = rep(names(data),as.numeric(sapply(data.backup, function(x) length(rownames(x))))), |
|
|
151 |
post.prob = unlist(res$beta.pp), |
|
|
152 |
stringsAsFactors = FALSE) |
|
|
153 |
|
|
|
154 |
feat.res <- NULL |
|
|
155 |
for (d in unique(featres$dataset)) { |
|
|
156 |
tmp <- featres[which(featres$dataset == d),] |
|
|
157 |
tmp <- tmp[order(tmp$post.prob, decreasing = TRUE),] |
|
|
158 |
tmp$rank <- 1:nrow(tmp) |
|
|
159 |
feat.res <- rbind.data.frame(feat.res,tmp) |
|
|
160 |
} |
|
|
161 |
message("feature selection done...") |
|
|
162 |
|
|
|
163 |
return(list(fit = res, clust.res = clustres, feat.res = feat.res, mo.method = "iClusterBayes")) |
|
|
164 |
} |