|
a |
|
b/R/getMOIC.R |
|
|
1 |
#' @name getMOIC |
|
|
2 |
#' @title Get subtypes from multi-omics integrative clustering |
|
|
3 |
#' @description Using `getMOIC()`, users can choose one out of the ten algorithms embedded in `MOVICS`. Users can implement multi-omics clustering in a simplest way of which the only requirement is to specify and at least specify a list of matrices (argument of `data`), a number of cluster (argument of `N.clust`), and clustering method (argument of `methodslist`) in `getMOIC()`. It is possible to pass various arguments that are specific to each method. Of course, users can also directly call different algorithms by using functions start with `get` and end with the name of the algorithm (e.g., `getSNF`; please refer to `?get%algorithm_name%` for more details about the editable arguments) |
|
|
4 |
#' @param data List of matrices (Maximum number of matrices is 6). |
|
|
5 |
#' @param methodslist A string list specifying one or multiple methods to run (See Details). |
|
|
6 |
#' @param N.clust Number of clusters. |
|
|
7 |
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion. |
|
|
8 |
#' @param ... Additionnal parameters for each method (only works when only one method chosen) |
|
|
9 |
#' @examples # There is no example and please refer to vignette. |
|
|
10 |
#' @export |
|
|
11 |
#' @return A list of results returned by each specified algorithms. |
|
|
12 |
#' @import SNFtool |
|
|
13 |
#' @import IntNMF |
|
|
14 |
#' @import mogsa |
|
|
15 |
#' @import coca |
|
|
16 |
#' @import iClusterPlus |
|
|
17 |
#' @import CIMLR |
|
|
18 |
#' @import PINSPlus |
|
|
19 |
#' @import ConsensusClusterPlus |
|
|
20 |
#' @details |
|
|
21 |
#' Method for integrative clustering will be chosed according to the value of argument 'methodslist': |
|
|
22 |
#' |
|
|
23 |
#' If \code{methodslist == "IntNMF"}, Integrative clustering methods using Non-Negative Matrix Factorization |
|
|
24 |
#' |
|
|
25 |
#' If \code{methodslist == "SNF"}, Similarity network fusion. |
|
|
26 |
#' |
|
|
27 |
#' If \code{methodslist == "LRAcluster"}, Integrated cancer omics data analysis by low rank approximation. |
|
|
28 |
#' |
|
|
29 |
#' If \code{methodslist == "PINSPlus"}, Perturbation Clustering for data integration and disease subtyping |
|
|
30 |
#' |
|
|
31 |
#' If \code{methodslist == "ConsensusClustering"}, Consensus clustering |
|
|
32 |
#' |
|
|
33 |
#' If \code{methodslist == "NEMO"}, Neighborhood based multi-omics clustering |
|
|
34 |
#' |
|
|
35 |
#' If \code{methodslist == "COCA"}, Cluster Of Clusters Analysis |
|
|
36 |
#' |
|
|
37 |
#' If \code{methodslist == "CIMLR"}, Cancer Integration via Multikernel Learning (Support Feature Selection) |
|
|
38 |
#' |
|
|
39 |
#' If \code{methodslist == "MoCluster"}, Identifying joint patterns across multiple omics data sets (Support Feature Selection) |
|
|
40 |
#' |
|
|
41 |
#' If \code{methodslist == "iClusterBayes"}, Integrative clustering of multiple genomic data by fitting a Bayesian latent variable model (Support Feature Selection) |
|
|
42 |
#' |
|
|
43 |
#' @references |
|
|
44 |
#' Pierre-Jean M, Deleuze J F, Le Floch E, et al. Clustering and variable selection evaluation of 13 unsupervised methods for multi-omics data integration[J]. Briefings in Bioinformatics, 2019. |
|
|
45 |
#' |
|
|
46 |
#' intNMF: |
|
|
47 |
#' Chalise P, Fridley BL. Integrative clustering of multi-level omic data based on non-negative matrix factorization algorithm. PLoS One. 2017;12(5):e0176278. |
|
|
48 |
#' |
|
|
49 |
#' iClusterBayes: |
|
|
50 |
#' Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG. A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics. 2018;19(1):71-86. |
|
|
51 |
#' |
|
|
52 |
#' SNF: |
|
|
53 |
#' Wang B, Mezlini AM, Demir F, et al. Similarity network fusion for aggregating data types on a genomic scale. Nat Methods. 2014;11(3):333-337. |
|
|
54 |
#' |
|
|
55 |
#' Mocluster: |
|
|
56 |
#' Meng C, Helm D, Frejno M, Kuster B. moCluster: Identifying Joint Patterns Across Multiple Omics Data Sets. J Proteome Res. 2016;15(3):755-765. |
|
|
57 |
#' |
|
|
58 |
#' LRAcluster: |
|
|
59 |
#' Wu D, Wang D, Zhang MQ, Gu J. Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification. BMC Genomics. 2015;16:1022. |
|
|
60 |
#' |
|
|
61 |
#' CIMLR: |
|
|
62 |
#' Ramazzotti D, Lal A, Wang B, Batzoglou S, Sidow A. Multi-omic tumor data reveal diversity of molecular mechanisms that correlate with survival. Nat Commun. 2018;9(1):4453. |
|
|
63 |
#' |
|
|
64 |
#' PINSPlus: |
|
|
65 |
#' Nguyen H, Shrestha S, Draghici S, Nguyen T. PINSPlus: a tool for tumor subtype discovery in integrated genomic data. Bioinformatics. 2019;35(16):2843-2846. |
|
|
66 |
#' |
|
|
67 |
#' ConsensusClustering: |
|
|
68 |
#' Monti S, Tamayo P, Mesirov J, et al. Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Machine Learning. 2003;52:91-118. |
|
|
69 |
#' |
|
|
70 |
#' NEMO: |
|
|
71 |
#' Rappoport N, Shamir R. NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics. 2019;35(18):3348-3356. |
|
|
72 |
#' |
|
|
73 |
#' COCA: |
|
|
74 |
#' Hoadley KA, Yau C, Wolf DM, et al. Multiplatform analysis of 12 cancer types reveals molecular classification within and across tissues of origin. Cell. 2014;158(4):929-944. |
|
|
75 |
getMOIC <- function(data = NULL, |
|
|
76 |
methodslist = list("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes"), |
|
|
77 |
N.clust = NULL, |
|
|
78 |
type = rep("gaussian", length(data)), |
|
|
79 |
...){ |
|
|
80 |
|
|
|
81 |
# check argument |
|
|
82 |
if (!is.list(data)) { |
|
|
83 |
stop("data is not a list!") |
|
|
84 |
} |
|
|
85 |
|
|
|
86 |
n_dat <- length(data) |
|
|
87 |
if(n_dat > 6){ |
|
|
88 |
stop('current verision of MOVICS can support up to 6 omics data.') |
|
|
89 |
} |
|
|
90 |
if(n_dat < 2){ |
|
|
91 |
stop('current verision of MOVICS needs at least 2 omics data.') |
|
|
92 |
} |
|
|
93 |
|
|
|
94 |
if(is.null(names(data))){ |
|
|
95 |
names(data) <- sprintf("dat%s", 1:length(data)) |
|
|
96 |
} |
|
|
97 |
|
|
|
98 |
num.methods <- length(unlist(methodslist)) |
|
|
99 |
|
|
|
100 |
if(is.vector(methodslist)) {methodslist <- as.list(methodslist)} |
|
|
101 |
if(!all(is.element(unlist(methodslist), c("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes")))) { |
|
|
102 |
stop("current version of MOVICS supports 10 algorithms. Allowed values contain c('SNF', 'CIMLR', 'PINSPlus', 'NEMO', 'COCA', 'MoCluster', 'LRAcluster', 'ConsensusClustering', 'IntNMF', 'iClusterBayes').") |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
if(num.methods > 1) { |
|
|
106 |
message("--you choose more than 1 algorithm and all of them shall be run with parameters by default.") |
|
|
107 |
} |
|
|
108 |
|
|
|
109 |
# Check dimension |
|
|
110 |
if(max(sapply(data, dim)[2,]) != min(sapply(data, dim)[2,])){ |
|
|
111 |
message(sprintf("number of samples in dat %s is %s\n", 1:length(data), sapply(data, dim)[2,])) |
|
|
112 |
stop("data do not contain the same number of samples!") |
|
|
113 |
} |
|
|
114 |
reslist <- list() |
|
|
115 |
for (method in unlist(methodslist)) { |
|
|
116 |
doMOIC <- switch(method, |
|
|
117 |
"IntNMF" = getIntNMF, |
|
|
118 |
"iClusterBayes" = getiClusterBayes, |
|
|
119 |
"SNF" = getSNF, |
|
|
120 |
"MoCluster" = getMoCluster, |
|
|
121 |
"LRAcluster" = getLRAcluster, |
|
|
122 |
"CIMLR" = getCIMLR, |
|
|
123 |
"PINSPlus" = getPINSPlus, |
|
|
124 |
"ConsensusClustering" = getConsensusClustering, |
|
|
125 |
"NEMO" = getNEMO, |
|
|
126 |
"COCA" = getCOCA |
|
|
127 |
) |
|
|
128 |
reslist[[method]] <- doMOIC(data, N.clust, type, ...) |
|
|
129 |
message(paste0(method," done...")) |
|
|
130 |
} |
|
|
131 |
|
|
|
132 |
if(num.methods == 1) { |
|
|
133 |
return(reslist[[1]]) |
|
|
134 |
} else { |
|
|
135 |
return(reslist) |
|
|
136 |
} |
|
|
137 |
} |