a b/R/getMoCluster.R
1
#' @name getMoCluster
2
#' @title Get subtypes from MoCluster
3
#' @description This function wraps the MoCluster (Multiple omics data integrative clustering) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices.
5
#' @param N.clust Number of clusters.
6
#' @param ncomp An integer value to indicate the number of components to calculate. To calculate more components requires longer computational time.
7
#' @param method A string value can be one of CPCA, GCCA and MCIA; CPCA by default.
8
#' @param option A string value could be one of c('lambda1', 'inertia', 'uniform') to indicate how the different matrices should be normalized.
9
#' @param k A numeric value to indicate the absolute number (if k >= 1) or the proportion (if 0 < k < 1) of non-zero coefficients for the variable loading vectors. It could be a single value or a vector has the same length as x so the sparsity of individual matrix could be different.
10
#' @param center A logical value to indicate if the variables should be centered. TRUE by default.
11
#' @param scale A logical value to indicate if the variables should be scaled. TRUE by default.
12
#' @param clusterAlg A string value to indicate the cluster algorithm for distance.
13
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
14
#' @return A list with the following components:
15
#'
16
#'         \code{fit}        an object returned by \link[mogsa]{mbpca}.
17
#'
18
#'         \code{clust.res}  a data.frame storing sample ID and corresponding clusters.
19
#'
20
#'         \code{feat.res}   the results of features selection process.
21
#'
22
#'         \code{clust.dend} a dendrogram of sample clustering.
23
#'
24
#'         \code{mo.method}  a string value indicating the method used for multi-omics integrative clustering.
25
#' @export
26
#' @examples # There is no example and please refer to vignette.
27
#' @import mogsa
28
#' @importFrom dplyr %>%
29
#' @references Meng C, Helm D, Frejno M, Kuster B (2016). moCluster: Identifying Joint Patterns Across Multiple Omics Data Sets. J Proteome Res, 15(3):755-765.
30
getMoCluster <- function(data       = NULL,
31
                         N.clust    = NULL,
32
                         type       = rep("gaussian", length(data)),
33
                         ncomp      = NULL,
34
                         method     = "CPCA",
35
                         option     = "lambda1",
36
                         k          = 10,
37
                         center     = TRUE,
38
                         scale      = TRUE,
39
                         clusterAlg = "ward.D"){
40
41
  # check data
42
  n_dat <- length(data)
43
  if(n_dat > 6){
44
    stop('current verision of MOVICS can support up to 6 datasets.')
45
  }
46
  if(n_dat < 2){
47
    stop('current verision of MOVICS needs at least 2 omics data.')
48
  }
49
50
  useless.argument <- type
51
  if(!is.element(method, c("CPCA","GCCA","MCIA"))) {
52
    stop("method should be one of CPCA [consensus PCA], GCCA [generalized canonical correlation analysis], or MCIA [multiple co-inertia analysis]!")
53
  }
54
55
  if(is.null(ncomp)) {
56
    ncomp = N.clust
57
  }
58
59
  moas <- data %>% mogsa::mbpca(ncomp      = ncomp,
60
                                k          = k,
61
                                method     = switch(method,
62
                                                    "CPCA" = "globalScore",
63
                                                    "GCCA" = "blockScore",
64
                                                    "MCIA" = "blockLoading"),
65
                                option     = option,
66
                                center     = center,
67
                                scale      = scale,
68
                                moa        = TRUE,
69
                                svd.solver = "fast",
70
                                maxiter    = 1000,
71
                                verbose    = FALSE)
72
73
  scrs <- moas %>% moaScore
74
  dist <- scrs %>% dist
75
  clust.dend <- hclust(dist, method = clusterAlg)
76
77
  clustres <- data.frame(samID = colnames(data[[1]]),
78
                         clust = cutree(clust.dend,k = N.clust),
79
                         row.names = colnames(data[[1]]),
80
                         stringsAsFactors = FALSE)
81
  #clustres <- clustres[order(clustres$clust),]
82
  message("clustering done...")
83
84
85
  featres <- moas@loading[which(moas@loading[,1] != 0),]
86
  f <- sub('_[^_]*$', '', rownames(featres))
87
  d <- sub('.*_', '', rownames(featres))
88
  featres <- data.frame(feature = f,
89
                        dataset = d,
90
                        load = featres[,1],
91
                        stringsAsFactors = FALSE)
92
  feat.res <- NULL
93
  for (d in unique(featres$dataset)) {
94
    tmp <- featres[which(featres$dataset == d),]
95
    feat.res <- rbind.data.frame(feat.res,tmp)
96
  }
97
  message("feature selection done...")
98
99
  return(list(fit = moas, clust.res = clustres, feat.res = feat.res, clust.dend = clust.dend, mo.method = "MoCluster"))
100
}