Diff of /R/getMOIC.R [000000] .. [494cbf]

Switch to unified view

a b/R/getMOIC.R
1
#' @name getMOIC
2
#' @title Get subtypes from multi-omics integrative clustering
3
#' @description Using `getMOIC()`, users can choose one out of the ten algorithms embedded in `MOVICS`. Users can implement multi-omics clustering in a simplest way of which the only requirement is to specify and at least specify a list of matrices (argument of `data`), a number of cluster (argument of `N.clust`), and clustering method (argument of `methodslist`) in `getMOIC()`. It is possible to pass various arguments that are specific to each method. Of course, users can also directly call different algorithms by using functions start with `get` and end with the name of the algorithm (e.g., `getSNF`; please refer to `?get%algorithm_name%` for more details about the editable arguments)
4
#' @param data List of matrices (Maximum number of matrices is 6).
5
#' @param methodslist A string list specifying one or multiple methods to run (See Details).
6
#' @param N.clust Number of clusters.
7
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
8
#' @param ... Additionnal parameters for each method (only works when only one method chosen)
9
#' @examples # There is no example and please refer to vignette.
10
#' @export
11
#' @return A list of results returned by each specified algorithms.
12
#' @import SNFtool
13
#' @import IntNMF
14
#' @import mogsa
15
#' @import coca
16
#' @import iClusterPlus
17
#' @import CIMLR
18
#' @import PINSPlus
19
#' @import ConsensusClusterPlus
20
#' @details
21
#' Method for integrative clustering will be chosed according to the value of argument 'methodslist':
22
#'
23
#' If \code{methodslist == "IntNMF"}, Integrative clustering methods using Non-Negative Matrix Factorization
24
#'
25
#' If \code{methodslist == "SNF"}, Similarity network fusion.
26
#'
27
#' If \code{methodslist == "LRAcluster"}, Integrated cancer omics data analysis by low rank approximation.
28
#'
29
#' If \code{methodslist == "PINSPlus"}, Perturbation Clustering for data integration and disease subtyping
30
#'
31
#' If \code{methodslist == "ConsensusClustering"}, Consensus clustering
32
#'
33
#' If \code{methodslist == "NEMO"}, Neighborhood based multi-omics clustering
34
#'
35
#' If \code{methodslist == "COCA"}, Cluster Of Clusters Analysis
36
#'
37
#' If \code{methodslist == "CIMLR"}, Cancer Integration via Multikernel Learning (Support Feature Selection)
38
#'
39
#' If \code{methodslist == "MoCluster"}, Identifying joint patterns across multiple omics data sets (Support Feature Selection)
40
#'
41
#' If \code{methodslist == "iClusterBayes"}, Integrative clustering of multiple genomic data by fitting a Bayesian latent variable model (Support Feature Selection)
42
#'
43
#' @references
44
#' Pierre-Jean M, Deleuze J F, Le Floch E, et al. Clustering and variable selection evaluation of 13 unsupervised methods for multi-omics data integration[J]. Briefings in Bioinformatics, 2019.
45
#'
46
#' intNMF:
47
#' Chalise P, Fridley BL. Integrative clustering of multi-level omic data based on non-negative matrix factorization algorithm. PLoS One. 2017;12(5):e0176278.
48
#'
49
#' iClusterBayes:
50
#' Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG. A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics. 2018;19(1):71-86.
51
#'
52
#' SNF:
53
#' Wang B, Mezlini AM, Demir F, et al. Similarity network fusion for aggregating data types on a genomic scale. Nat Methods. 2014;11(3):333-337.
54
#'
55
#' Mocluster:
56
#' Meng C, Helm D, Frejno M, Kuster B. moCluster: Identifying Joint Patterns Across Multiple Omics Data Sets. J Proteome Res. 2016;15(3):755-765.
57
#'
58
#' LRAcluster:
59
#' Wu D, Wang D, Zhang MQ, Gu J. Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification. BMC Genomics. 2015;16:1022.
60
#'
61
#' CIMLR:
62
#' Ramazzotti D, Lal A, Wang B, Batzoglou S, Sidow A. Multi-omic tumor data reveal diversity of molecular mechanisms that correlate with survival. Nat Commun. 2018;9(1):4453.
63
#'
64
#' PINSPlus:
65
#' Nguyen H, Shrestha S, Draghici S, Nguyen T. PINSPlus: a tool for tumor subtype discovery in integrated genomic data. Bioinformatics. 2019;35(16):2843-2846.
66
#'
67
#' ConsensusClustering:
68
#' Monti S, Tamayo P, Mesirov J, et al. Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Machine Learning. 2003;52:91-118.
69
#'
70
#' NEMO:
71
#' Rappoport N, Shamir R. NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics. 2019;35(18):3348-3356.
72
#'
73
#' COCA:
74
#' Hoadley KA, Yau C, Wolf DM, et al. Multiplatform analysis of 12 cancer types reveals molecular classification within and across tissues of origin. Cell. 2014;158(4):929-944.
75
getMOIC <- function(data        = NULL,
76
                    methodslist = list("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes"),
77
                    N.clust     = NULL,
78
                    type        = rep("gaussian", length(data)),
79
                    ...){
80
81
  # check argument
82
  if (!is.list(data)) {
83
    stop("data is not a list!")
84
  }
85
86
  n_dat <- length(data)
87
  if(n_dat > 6){
88
    stop('current verision of MOVICS can support up to 6 omics data.')
89
  }
90
  if(n_dat < 2){
91
    stop('current verision of MOVICS needs at least 2 omics data.')
92
  }
93
94
  if(is.null(names(data))){
95
    names(data) <- sprintf("dat%s", 1:length(data))
96
  }
97
98
  num.methods <- length(unlist(methodslist))
99
100
  if(is.vector(methodslist)) {methodslist <- as.list(methodslist)}
101
  if(!all(is.element(unlist(methodslist), c("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes")))) {
102
    stop("current version of MOVICS supports 10 algorithms. Allowed values contain c('SNF', 'CIMLR', 'PINSPlus', 'NEMO', 'COCA', 'MoCluster', 'LRAcluster', 'ConsensusClustering', 'IntNMF', 'iClusterBayes').")
103
  }
104
105
  if(num.methods > 1) {
106
    message("--you choose more than 1 algorithm and all of them shall be run with parameters by default.")
107
  }
108
109
  # Check dimension
110
  if(max(sapply(data, dim)[2,]) != min(sapply(data, dim)[2,])){
111
    message(sprintf("number of samples in dat %s is %s\n", 1:length(data), sapply(data, dim)[2,]))
112
    stop("data do not contain the same number of samples!")
113
  }
114
  reslist <- list()
115
  for (method in unlist(methodslist)) {
116
    doMOIC <- switch(method,
117
                     "IntNMF"              = getIntNMF,
118
                     "iClusterBayes"       = getiClusterBayes,
119
                     "SNF"                 = getSNF,
120
                     "MoCluster"           = getMoCluster,
121
                     "LRAcluster"          = getLRAcluster,
122
                     "CIMLR"               = getCIMLR,
123
                     "PINSPlus"            = getPINSPlus,
124
                     "ConsensusClustering" = getConsensusClustering,
125
                     "NEMO"                = getNEMO,
126
                     "COCA"                = getCOCA
127
    )
128
    reslist[[method]] <- doMOIC(data, N.clust, type, ...)
129
    message(paste0(method," done..."))
130
  }
131
132
  if(num.methods == 1) {
133
    return(reslist[[1]])
134
  } else {
135
    return(reslist)
136
  }
137
}