[381c22]: / R / DIscBIO-generic-Clustexp.R

Download this file

146 lines (143 with data), 5.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#' @title Clustering of single-cell transcriptome data
#' @description This functions performs the initial clustering of the RaceID
#' algorithm.
#' @docType methods
#' @param object \code{DISCBIO} class object.
#' @param clustnr Maximum number of clusters for the derivation of the cluster
#' number by the saturation of mean within-cluster-dispersion. Default is 20.
#' @param bootnr A numeric value of booststrapping runs for \code{clusterboot}.
#' Default is 50.
#' @param metric Is the method to transform the input data to a distance object.
#' Metric has to be one of the following: ["spearman", "pearson", "kendall",
#' "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"].
#' @param do.gap A logical vector that allows generating the number of clusters
#' based on the gap statistics. Default is TRUE.
#' @param SE.method The SE.method determines the first local maximum of the gap
#' statistics. The SE.method has to be one of the following:["firstSEmax",
#' "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"]. Default is
#' "Tibs2001SEmax"
#' @param SE.factor A numeric value of the fraction of the standard deviation by
#' which the local maximum is required to differ from the neighboring points
#' it is compared to. Default is 0.25.
#' @param B.gap Number of bootstrap runs for the calculation of the gap
#' statistics. Default is 50
#' @param cln Number of clusters to be used. Default is \code{NULL} and the
#' cluster number is inferred by the saturation criterion.
#' @param rseed Random integer to enforce reproducible clustering results.
#' @param quiet if `TRUE`, intermediate output is suppressed
#' @importFrom stats as.dist cor kmeans
#' @importFrom cluster clusGap maxSE
#' @importFrom graphics pairs
#' @importFrom methods is
#' @return The DISCBIO-class object input with the cpart slot filled.
#' @examples
#' sc <- DISCBIO(valuesG1msTest) # changes signature of data
#' sc <- Clustexp(sc, cln = 2)
setGeneric("Clustexp", function(
object, clustnr = 3, bootnr = 50,
metric = "pearson", do.gap = TRUE, SE.method = "Tibs2001SEmax",
SE.factor = .25, B.gap = 50, cln = 0, rseed = NULL, quiet = FALSE) {
standardGeneric("Clustexp")
})
#' @export
#' @rdname Clustexp
setMethod(
f = "Clustexp",
signature = "DISCBIO",
definition = function(object, clustnr, bootnr, metric, do.gap, SE.method,
SE.factor, B.gap, cln, rseed, quiet) {
if (!is.numeric(clustnr)) {
stop("clustnr has to be a positive integer")
} else if (round(clustnr) != clustnr | clustnr <= 0) {
stop("clustnr has to be a positive integer")
}
if (!is.numeric(bootnr)) {
stop("bootnr has to be a positive integer")
} else if (round(bootnr) != bootnr | bootnr <= 0) {
stop("bootnr has to be a positive integer")
}
if (!(
metric %in% c(
"spearman", "pearson", "kendall", "euclidean", "maximum",
"manhattan", "canberra", "binary", "minkowski"
)
)) {
stop(
"metric has to be one of the following: spearman, ",
"pearson, kendall, euclidean, maximum, manhattan, ",
"canberra, binary, minkowski"
)
}
if (!(
SE.method %in% c(
"firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax",
"globalmax"
)
)) {
stop(
"SE.method has to be one of the following: ",
"firstSEmax, Tibs2001SEmax, globalSEmax, ",
"firstmax, globalmax"
)
}
if (!is.numeric(SE.factor)) {
stop("SE.factor has to be a non-negative integer")
} else if (SE.factor < 0) {
stop("SE.factor has to be a non-negative integer")
}
if (!(is.numeric(do.gap) | is.logical(do.gap))) {
stop("do.gap has to be logical (TRUE or FALSE)")
}
if (!is.numeric(B.gap)) {
stop("B.gap has to be a positive integer")
} else if (round(B.gap) != B.gap | B.gap <= 0) {
stop("B.gap has to be a positive integer")
}
if (!is.numeric(cln)) {
stop("cln has to be a non-negative integer")
} else if (round(cln) != cln | cln < 0) {
stop("cln has to be a non-negative integer")
}
if (!is.null(rseed) & !is.numeric(rseed)) {
stop("rseed has to be numeric or NULL")
}
if (!do.gap & cln == 0) {
stop("cln has to be a positive integer or do.gap has to be TRUE")
}
# Operations
object@clusterpar <-
list(
clustnr = clustnr,
bootnr = bootnr,
metric = metric,
do.gap = do.gap,
SE.method = SE.method,
SE.factor = SE.factor,
B.gap = B.gap,
cln = cln,
rseed = rseed
)
y <- clustfun(
object@fdata,
clustnr,
bootnr,
metric,
do.gap,
SE.method,
SE.factor,
B.gap,
cln,
rseed = rseed,
quiet = quiet
)
object@kmeans <- list(
kpart = y$clb$result$partition,
jaccard = y$clb$bootmean,
gap = y$gpr
)
object@distances <- as.matrix(y$di)
object@fcol <- rainbow(max(y$clb$result$partition))
object@cpart <- object@kmeans$kpart
return(object)
}
)