a b/R/DIscBIO-generic-Exprmclust.R
1
#' @title Performing Model-based clustering on expression values
2
#' @description this function first uses principal component analysis (PCA) to
3
#'   reduce dimensionality of original data. It then performs model-based
4
#'   clustering on the transformed expression values.
5
#' @param object \code{DISCBIO} class object.
6
#' @param K An integer vector specifying all possible cluster numbers. Default
7
#'   is 3.
8
#' @param modelNames model to be used in model-based clustering. By default
9
#'   "ellipsoidal, varying volume, shape, and orientation" is used.
10
#' @param reduce A logical vector that allows performing the PCA on the
11
#'   expression data. Default is TRUE.
12
#' @param cluster A vector showing the ID of cells in the clusters.
13
#' @param quiet if `TRUE`, suppresses intermediary output
14
#' @importFrom mclust Mclust mclustBIC
15
#' @importFrom stats dist prcomp lm
16
#' @importFrom igraph graph.adjacency minimum.spanning.tree
17
#' @return If `object` is of class DISCBIO, the output is the same object  with
18
#'   the MBclusters slot filled. If the `object` is a data frame, the function
19
#'   returns a named list containing the four objects that together correspond
20
#'   to the contents of the MBclusters slot.
21
#'
22
setGeneric(
23
  name = "Exprmclust",
24
  def = function(
25
      object,
26
      K = 3,
27
      modelNames = "VVV",
28
      reduce = TRUE,
29
      cluster = NULL,
30
      quiet = FALSE) {
31
    standardGeneric("Exprmclust")
32
  }
33
)
34
35
#' @export
36
#' @rdname Exprmclust
37
setMethod(
38
  f = "Exprmclust",
39
  signature = "DISCBIO",
40
  definition = function(object, K, modelNames, reduce, cluster, quiet) {
41
    pcareduceres <- calc_pcareduceres(object@fdata, reduce)
42
    if (is.null(cluster)) {
43
      K <- K[K > 1]
44
      res <- Mclust(
45
        data = pcareduceres,
46
        G = K,
47
        modelNames = modelNames,
48
        warn = FALSE,
49
        verbose = !quiet
50
      )
51
      if (is.null(res)) stop("Unable to cluster. Try a lower value for K.")
52
      clusterid <- apply(res$z, 1, which.max)
53
      clunum <- res$G
54
    } else {
55
      clunum <- length(unique(cluster))
56
      clusterid <- cluster
57
    }
58
    clucenter <- matrix(0, ncol = ncol(pcareduceres), nrow = clunum)
59
    for (cid in 1:clunum) {
60
      clucenter[cid, ] <- colMeans(
61
        pcareduceres[names(clusterid[clusterid == cid]), , drop = FALSE]
62
      )
63
    }
64
    dp <- as.matrix(dist(clucenter))
65
    gp <- graph.adjacency(dp, mode = "undirected", weighted = TRUE)
66
    dp_mst <- minimum.spanning.tree(gp)
67
    full_List <- list(
68
      pcareduceres = pcareduceres,
69
      MSTtree = dp_mst,
70
      clusterid = clusterid,
71
      clucenter = clucenter
72
    )
73
    object@MBclusters <- full_List
74
    return(object)
75
  }
76
)
77
78
#' @export
79
#' @rdname Exprmclust
80
setMethod(
81
  f = "Exprmclust",
82
  signature = "data.frame",
83
  definition = function(object,
84
                        K = 3,
85
                        modelNames = "VVV",
86
                        reduce = TRUE,
87
                        cluster = NULL,
88
                        quiet = FALSE) {
89
    pcareduceres <- calc_pcareduceres(object, reduce)
90
    if (is.null(cluster)) {
91
      K <- K[K > 1]
92
      res <- Mclust(
93
        data = pcareduceres,
94
        G = K,
95
        modelNames = modelNames,
96
        warn = FALSE,
97
        verbose = !quiet
98
      )
99
      if (is.null(res)) {
100
        stop("Unable to cluster. Try a lower value for K.")
101
      }
102
      clusterid <- apply(res$z, 1, which.max)
103
      clunum <- res$G
104
    } else {
105
      clunum <- length(unique(cluster))
106
      clusterid <- cluster
107
    }
108
    clucenter <- matrix(0, ncol = ncol(pcareduceres), nrow = clunum)
109
    for (cid in 1:clunum) {
110
      clucenter[cid, ] <- colMeans(
111
        pcareduceres[names(clusterid[clusterid == cid]), , drop = FALSE]
112
      )
113
    }
114
    dp <- as.matrix(dist(clucenter))
115
    gp <-
116
      graph.adjacency(dp, mode = "undirected", weighted = TRUE)
117
    dp_mst <- minimum.spanning.tree(gp)
118
    object <- list(
119
      pcareduceres = pcareduceres,
120
      MSTtree = dp_mst,
121
      clusterid = clusterid,
122
      clucenter = clucenter
123
    )
124
    return(object)
125
  }
126
)