|
a |
|
b/R/getConsensusClustering.R |
|
|
1 |
#' @name getConsensusClustering |
|
|
2 |
#' @title Get subtypes from ConsensusClustering |
|
|
3 |
#' @description This function wraps the Consensus Clustering algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`. |
|
|
4 |
#' @param data List of matrices. |
|
|
5 |
#' @param N.clust Number of clusters. |
|
|
6 |
#' @param norMethod A string vector indicate the normalization method for consensus clustering. |
|
|
7 |
#' @param reps An integer value to indicate the number of subsamples. |
|
|
8 |
#' @param pItem A numerical value to indicate the proportion of items to sample. |
|
|
9 |
#' @param pFeature A numerical value to indicate the proportion of features to sample. |
|
|
10 |
#' @param clusterAlg A string value to indicate the cluster algorithm. |
|
|
11 |
#' @param innerLinkage A string value to indicate the heirachical linakge method for subsampling. |
|
|
12 |
#' @param finalLinkage A string value to indicate the heirarchical method for consensus matrix. |
|
|
13 |
#' @param distance A string value to indicate the distance function. |
|
|
14 |
#' @param plot A string value to indicate the output format for heatmap. |
|
|
15 |
#' @param writeTable A logical value to indicate if writing output and log to csv. |
|
|
16 |
#' @param title A string value for output directory. |
|
|
17 |
#' @param seed A numerical value to set random seed for reproducible results. |
|
|
18 |
#' @param verbose A logical value to indicate if printing messages to the screen to indicate progress. |
|
|
19 |
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion. |
|
|
20 |
#' @return A list with the following components: |
|
|
21 |
#' |
|
|
22 |
#' \code{fit} an object returned by \link[ConsensusClusterPlus]{ConsensusClusterPlus}. |
|
|
23 |
#' |
|
|
24 |
#' \code{clust.res} a data.frame storing sample ID and corresponding clusters. |
|
|
25 |
#' |
|
|
26 |
#' \code{clust.dend} a dendrogram of sample clustering. |
|
|
27 |
#' |
|
|
28 |
#' \code{mo.method} a string value indicating the method used for multi-omics integrative clustering. |
|
|
29 |
#' @export |
|
|
30 |
#' @examples # There is no example and please refer to vignette. |
|
|
31 |
#' @import ConsensusClusterPlus |
|
|
32 |
#' @importFrom dplyr %>% |
|
|
33 |
#' @references Monti S, Tamayo P, Mesirov J, et al (2003). Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Mach Learn, 52:91-118. |
|
|
34 |
getConsensusClustering <- function(data = NULL, |
|
|
35 |
N.clust = NULL, |
|
|
36 |
type = rep("gaussian", length(data)), |
|
|
37 |
norMethod = "none", |
|
|
38 |
reps = 500, |
|
|
39 |
pItem = 0.8, |
|
|
40 |
pFeature = 0.8, |
|
|
41 |
clusterAlg = "hc", |
|
|
42 |
innerLinkage = "ward.D", |
|
|
43 |
finalLinkage = "ward.D", |
|
|
44 |
distance = "pearson", |
|
|
45 |
plot = NULL, |
|
|
46 |
writeTable = F, |
|
|
47 |
title = file.path(getwd(),"consensuscluster"), |
|
|
48 |
seed = 123456, |
|
|
49 |
verbose = F){ |
|
|
50 |
|
|
|
51 |
# check data |
|
|
52 |
n_dat <- length(data) |
|
|
53 |
if(n_dat > 6){ |
|
|
54 |
stop('current verision of MOVICS can support up to 6 datasets.') |
|
|
55 |
} |
|
|
56 |
if(n_dat < 2){ |
|
|
57 |
stop('current verision of MOVICS needs at least 2 omics data.') |
|
|
58 |
} |
|
|
59 |
|
|
|
60 |
useless.argument <- type |
|
|
61 |
if(is.null(norMethod)) { |
|
|
62 |
d <- do.call(rbind, data) |
|
|
63 |
} else { |
|
|
64 |
if(!is.element(norMethod, c("median-centered","mean-centered","z-score","none"))) { |
|
|
65 |
stop("the normalized method should be one of median-centered, mean-centered, z-score or none!") |
|
|
66 |
} |
|
|
67 |
if(norMethod == "median-centered") { |
|
|
68 |
d <- do.call(rbind, data) |
|
|
69 |
d <- sweep(d,1, apply(d,1,median,na.rm=T)) |
|
|
70 |
} |
|
|
71 |
if(norMethod == "mean-centered") { |
|
|
72 |
d <- do.call(rbind, data) |
|
|
73 |
d <- sweep(d,1, apply(d,1,mean,na.rm=T)) |
|
|
74 |
} |
|
|
75 |
if(norMethod == "z-score") { |
|
|
76 |
d <- do.call(rbind, data) |
|
|
77 |
d <- t(scale(t(d))) |
|
|
78 |
} |
|
|
79 |
if(norMethod == "none") { |
|
|
80 |
d <- do.call(rbind, data) |
|
|
81 |
} |
|
|
82 |
} |
|
|
83 |
|
|
|
84 |
fit <- ConsensusClusterPlus(d = as.matrix(d), |
|
|
85 |
maxK = ifelse(N.clust == 2, 3, N.clust), # cannot set as 2 |
|
|
86 |
reps = reps, |
|
|
87 |
pItem = pItem, |
|
|
88 |
pFeature = pFeature, |
|
|
89 |
clusterAlg = clusterAlg, |
|
|
90 |
innerLinkage = innerLinkage, |
|
|
91 |
finalLinkage = finalLinkage, |
|
|
92 |
distance = distance, |
|
|
93 |
seed = seed, |
|
|
94 |
verbose = verbose, |
|
|
95 |
plot = plot, |
|
|
96 |
writeTable = writeTable, |
|
|
97 |
title = title) |
|
|
98 |
res <- fit[[N.clust]] |
|
|
99 |
|
|
|
100 |
clustres <- data.frame(samID = colnames(data[[1]]), |
|
|
101 |
clust = as.numeric(res$consensusClass), |
|
|
102 |
row.names = colnames(data[[1]]), |
|
|
103 |
stringsAsFactors = F) |
|
|
104 |
#clustres <- clustres[order(clustres$clust),] |
|
|
105 |
|
|
|
106 |
return(list(fit = fit, clust.res = clustres, clust.dend = res$consensusTree, mo.method = "ConsensusClustering")) |
|
|
107 |
} |