a b/R/DIscBIO-generic-Clustexp.R
1
#' @title Clustering of single-cell transcriptome data
2
#' @description This functions performs the initial clustering of the RaceID
3
#'   algorithm.
4
#' @docType methods
5
#' @param object \code{DISCBIO} class object.
6
#' @param clustnr Maximum number of clusters for the derivation of the cluster
7
#'   number by the saturation of mean within-cluster-dispersion. Default is 20.
8
#' @param bootnr A numeric value of booststrapping runs for \code{clusterboot}.
9
#'   Default is 50.
10
#' @param metric Is the method to transform the input data to a distance object.
11
#'   Metric has to be one of the following: ["spearman", "pearson", "kendall",
12
#'   "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"].
13
#' @param do.gap A logical vector that allows generating the number of clusters
14
#'   based on the gap statistics. Default is TRUE.
15
#' @param SE.method The SE.method determines the first local maximum of the gap
16
#'   statistics. The SE.method has to be one of the following:["firstSEmax",
17
#'   "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"]. Default is
18
#'   "Tibs2001SEmax"
19
#' @param SE.factor A numeric value of the fraction of the standard deviation by
20
#'   which the local maximum is required to differ from the neighboring points
21
#'   it is compared to. Default is 0.25.
22
#' @param B.gap Number of bootstrap runs for the calculation of the gap
23
#'   statistics. Default is 50
24
#' @param cln Number of clusters to be used. Default is \code{NULL} and the
25
#'   cluster number is inferred by the saturation criterion.
26
#' @param rseed Random integer to enforce reproducible clustering results.
27
#' @param quiet if `TRUE`, intermediate output is suppressed
28
#' @importFrom stats as.dist cor kmeans
29
#' @importFrom cluster clusGap maxSE
30
#' @importFrom graphics pairs
31
#' @importFrom methods is
32
#' @return The DISCBIO-class object input with the cpart slot filled.
33
#' @examples
34
#' sc <- DISCBIO(valuesG1msTest) # changes signature of data
35
#' sc <- Clustexp(sc, cln = 2)
36
setGeneric("Clustexp", function(
37
    object, clustnr = 3, bootnr = 50,
38
    metric = "pearson", do.gap = TRUE, SE.method = "Tibs2001SEmax",
39
    SE.factor = .25, B.gap = 50, cln = 0, rseed = NULL, quiet = FALSE) {
40
  standardGeneric("Clustexp")
41
})
42
43
#' @export
44
#' @rdname Clustexp
45
setMethod(
46
  f = "Clustexp",
47
  signature = "DISCBIO",
48
  definition = function(object, clustnr, bootnr, metric, do.gap, SE.method,
49
                        SE.factor, B.gap, cln, rseed, quiet) {
50
    if (!is.numeric(clustnr)) {
51
      stop("clustnr has to be a positive integer")
52
    } else if (round(clustnr) != clustnr | clustnr <= 0) {
53
      stop("clustnr has to be a positive integer")
54
    }
55
    if (!is.numeric(bootnr)) {
56
      stop("bootnr has to be a positive integer")
57
    } else if (round(bootnr) != bootnr | bootnr <= 0) {
58
      stop("bootnr has to be a positive integer")
59
    }
60
    if (!(
61
      metric %in% c(
62
        "spearman", "pearson", "kendall", "euclidean", "maximum",
63
        "manhattan", "canberra", "binary", "minkowski"
64
      )
65
    )) {
66
      stop(
67
        "metric has to be one of the following: spearman, ",
68
        "pearson, kendall, euclidean, maximum, manhattan, ",
69
        "canberra, binary, minkowski"
70
      )
71
    }
72
    if (!(
73
      SE.method %in% c(
74
        "firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax",
75
        "globalmax"
76
      )
77
    )) {
78
      stop(
79
        "SE.method has to be one of the following: ",
80
        "firstSEmax, Tibs2001SEmax, globalSEmax, ",
81
        "firstmax, globalmax"
82
      )
83
    }
84
    if (!is.numeric(SE.factor)) {
85
      stop("SE.factor has to be a non-negative integer")
86
    } else if (SE.factor < 0) {
87
      stop("SE.factor has to be a non-negative integer")
88
    }
89
    if (!(is.numeric(do.gap) | is.logical(do.gap))) {
90
      stop("do.gap has to be logical (TRUE or FALSE)")
91
    }
92
    if (!is.numeric(B.gap)) {
93
      stop("B.gap has to be a positive integer")
94
    } else if (round(B.gap) != B.gap | B.gap <= 0) {
95
      stop("B.gap has to be a positive integer")
96
    }
97
    if (!is.numeric(cln)) {
98
      stop("cln has to be a non-negative integer")
99
    } else if (round(cln) != cln | cln < 0) {
100
      stop("cln has to be a non-negative integer")
101
    }
102
    if (!is.null(rseed) & !is.numeric(rseed)) {
103
      stop("rseed has to be numeric or NULL")
104
    }
105
    if (!do.gap & cln == 0) {
106
      stop("cln has to be a positive integer or do.gap has to be TRUE")
107
    }
108
109
    # Operations
110
    object@clusterpar <-
111
      list(
112
        clustnr = clustnr,
113
        bootnr = bootnr,
114
        metric = metric,
115
        do.gap = do.gap,
116
        SE.method = SE.method,
117
        SE.factor = SE.factor,
118
        B.gap = B.gap,
119
        cln = cln,
120
        rseed = rseed
121
      )
122
    y <- clustfun(
123
      object@fdata,
124
      clustnr,
125
      bootnr,
126
      metric,
127
      do.gap,
128
      SE.method,
129
      SE.factor,
130
      B.gap,
131
      cln,
132
      rseed = rseed,
133
      quiet = quiet
134
    )
135
    object@kmeans <- list(
136
      kpart = y$clb$result$partition,
137
      jaccard = y$clb$bootmean,
138
      gap = y$gpr
139
    )
140
    object@distances <- as.matrix(y$di)
141
    object@fcol <- rainbow(max(y$clb$result$partition))
142
    object@cpart <- object@kmeans$kpart
143
    return(object)
144
  }
145
)