Diff of /R/getCOCA.R [000000] .. [494cbf]

Switch to unified view

a b/R/getCOCA.R
1
#' @name getCOCA
2
#' @title Get subtypes from COCA
3
#' @description This function wraps the COCA (Cluster-of-Clusters Analysis) algorithm and provides standard output for `getMoHeatmap()` and `getConsensusMOIC()`.
4
#' @param data List of matrices.
5
#' @param N.clust Number of clusters.
6
#' @param methods A string vector storing the names of clustering methods to be used to cluster the observations in each subdataset.
7
#' @param distances A string vector storing the name of distances to be used in the clustering step for each subdataset.
8
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
9
#' @return A list with the following components:
10
#'
11
#'         \code{fit}        an object returned by \link[coca]{coca}.
12
#'
13
#'         \code{clust.res}  a data.frame storing sample ID and corresponding clusters.
14
#'
15
#'         \code{clust.dend} a dendrogram of sample clustering.
16
#'
17
#'         \code{mo.method}  a string value indicating the method used for multi-omics integrative clustering.
18
#' @import coca
19
#' @importFrom vegan vegdist
20
#' @export
21
#' @examples # There is no example and please refer to vignette.
22
#' @references Hoadley KA, Yau C, Wolf DM, et al (2014). Multiplatform analysis of 12 cancer types reveals molecular classification within and across tissues of origin. Cell, 158(4):929-944.
23
getCOCA <- function(data      = NULL,
24
                    N.clust   = NULL,
25
                    type      = rep("gaussian", length(data)),
26
                    methods   = "hclust",
27
                    distances = "euclidean") {
28
29
  # check data
30
  n_dat <- length(data)
31
  if(n_dat > 6){
32
    stop('current verision of MOVICS can support up to 6 datasets.')
33
  }
34
  if(n_dat < 2){
35
    stop('current verision of MOVICS needs at least 2 omics data.')
36
  }
37
38
  useless.argument <- type
39
  data <- lapply(data, t)
40
41
  ### Build matrix of clusters
42
  outputBuildMOC <- coca::buildMOC(data,
43
                                   M         = length(data),
44
                                   K         = N.clust,
45
                                   methods   = methods,
46
                                   distances = distances)
47
48
  ### Extract matrix of clusters and dataset indicator vector
49
  moc <- outputBuildMOC$moc
50
  datasetIndicator <- outputBuildMOC$datasetIndicator
51
52
  hcs <- hclust(vegdist(as.matrix(moc), method = "jaccard"), "ward.D")
53
  coca <- cutree(hcs,N.clust)
54
  #coca <- coca::coca(moc, K = N.clust)
55
56
  clustres <- data.frame(samID = rownames(data[[1]]),
57
                         clust = as.numeric(coca),
58
                         row.names = rownames(data[[1]]),
59
                         stringsAsFactors = FALSE)
60
  #clustres <- clustres[order(clustres$clust),]
61
62
  return(list(fit = outputBuildMOC, clust.res = clustres, clust.dend = hcs, mo.method = "COCA"))
63
}