|
a |
|
b/R/DIscBIO-generic-Clustexp.R |
|
|
1 |
#' @title Clustering of single-cell transcriptome data |
|
|
2 |
#' @description This functions performs the initial clustering of the RaceID |
|
|
3 |
#' algorithm. |
|
|
4 |
#' @docType methods |
|
|
5 |
#' @param object \code{DISCBIO} class object. |
|
|
6 |
#' @param clustnr Maximum number of clusters for the derivation of the cluster |
|
|
7 |
#' number by the saturation of mean within-cluster-dispersion. Default is 20. |
|
|
8 |
#' @param bootnr A numeric value of booststrapping runs for \code{clusterboot}. |
|
|
9 |
#' Default is 50. |
|
|
10 |
#' @param metric Is the method to transform the input data to a distance object. |
|
|
11 |
#' Metric has to be one of the following: ["spearman", "pearson", "kendall", |
|
|
12 |
#' "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"]. |
|
|
13 |
#' @param do.gap A logical vector that allows generating the number of clusters |
|
|
14 |
#' based on the gap statistics. Default is TRUE. |
|
|
15 |
#' @param SE.method The SE.method determines the first local maximum of the gap |
|
|
16 |
#' statistics. The SE.method has to be one of the following:["firstSEmax", |
|
|
17 |
#' "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"]. Default is |
|
|
18 |
#' "Tibs2001SEmax" |
|
|
19 |
#' @param SE.factor A numeric value of the fraction of the standard deviation by |
|
|
20 |
#' which the local maximum is required to differ from the neighboring points |
|
|
21 |
#' it is compared to. Default is 0.25. |
|
|
22 |
#' @param B.gap Number of bootstrap runs for the calculation of the gap |
|
|
23 |
#' statistics. Default is 50 |
|
|
24 |
#' @param cln Number of clusters to be used. Default is \code{NULL} and the |
|
|
25 |
#' cluster number is inferred by the saturation criterion. |
|
|
26 |
#' @param rseed Random integer to enforce reproducible clustering results. |
|
|
27 |
#' @param quiet if `TRUE`, intermediate output is suppressed |
|
|
28 |
#' @importFrom stats as.dist cor kmeans |
|
|
29 |
#' @importFrom cluster clusGap maxSE |
|
|
30 |
#' @importFrom graphics pairs |
|
|
31 |
#' @importFrom methods is |
|
|
32 |
#' @return The DISCBIO-class object input with the cpart slot filled. |
|
|
33 |
#' @examples |
|
|
34 |
#' sc <- DISCBIO(valuesG1msTest) # changes signature of data |
|
|
35 |
#' sc <- Clustexp(sc, cln = 2) |
|
|
36 |
setGeneric("Clustexp", function( |
|
|
37 |
object, clustnr = 3, bootnr = 50, |
|
|
38 |
metric = "pearson", do.gap = TRUE, SE.method = "Tibs2001SEmax", |
|
|
39 |
SE.factor = .25, B.gap = 50, cln = 0, rseed = NULL, quiet = FALSE) { |
|
|
40 |
standardGeneric("Clustexp") |
|
|
41 |
}) |
|
|
42 |
|
|
|
43 |
#' @export |
|
|
44 |
#' @rdname Clustexp |
|
|
45 |
setMethod( |
|
|
46 |
f = "Clustexp", |
|
|
47 |
signature = "DISCBIO", |
|
|
48 |
definition = function(object, clustnr, bootnr, metric, do.gap, SE.method, |
|
|
49 |
SE.factor, B.gap, cln, rseed, quiet) { |
|
|
50 |
if (!is.numeric(clustnr)) { |
|
|
51 |
stop("clustnr has to be a positive integer") |
|
|
52 |
} else if (round(clustnr) != clustnr | clustnr <= 0) { |
|
|
53 |
stop("clustnr has to be a positive integer") |
|
|
54 |
} |
|
|
55 |
if (!is.numeric(bootnr)) { |
|
|
56 |
stop("bootnr has to be a positive integer") |
|
|
57 |
} else if (round(bootnr) != bootnr | bootnr <= 0) { |
|
|
58 |
stop("bootnr has to be a positive integer") |
|
|
59 |
} |
|
|
60 |
if (!( |
|
|
61 |
metric %in% c( |
|
|
62 |
"spearman", "pearson", "kendall", "euclidean", "maximum", |
|
|
63 |
"manhattan", "canberra", "binary", "minkowski" |
|
|
64 |
) |
|
|
65 |
)) { |
|
|
66 |
stop( |
|
|
67 |
"metric has to be one of the following: spearman, ", |
|
|
68 |
"pearson, kendall, euclidean, maximum, manhattan, ", |
|
|
69 |
"canberra, binary, minkowski" |
|
|
70 |
) |
|
|
71 |
} |
|
|
72 |
if (!( |
|
|
73 |
SE.method %in% c( |
|
|
74 |
"firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax", |
|
|
75 |
"globalmax" |
|
|
76 |
) |
|
|
77 |
)) { |
|
|
78 |
stop( |
|
|
79 |
"SE.method has to be one of the following: ", |
|
|
80 |
"firstSEmax, Tibs2001SEmax, globalSEmax, ", |
|
|
81 |
"firstmax, globalmax" |
|
|
82 |
) |
|
|
83 |
} |
|
|
84 |
if (!is.numeric(SE.factor)) { |
|
|
85 |
stop("SE.factor has to be a non-negative integer") |
|
|
86 |
} else if (SE.factor < 0) { |
|
|
87 |
stop("SE.factor has to be a non-negative integer") |
|
|
88 |
} |
|
|
89 |
if (!(is.numeric(do.gap) | is.logical(do.gap))) { |
|
|
90 |
stop("do.gap has to be logical (TRUE or FALSE)") |
|
|
91 |
} |
|
|
92 |
if (!is.numeric(B.gap)) { |
|
|
93 |
stop("B.gap has to be a positive integer") |
|
|
94 |
} else if (round(B.gap) != B.gap | B.gap <= 0) { |
|
|
95 |
stop("B.gap has to be a positive integer") |
|
|
96 |
} |
|
|
97 |
if (!is.numeric(cln)) { |
|
|
98 |
stop("cln has to be a non-negative integer") |
|
|
99 |
} else if (round(cln) != cln | cln < 0) { |
|
|
100 |
stop("cln has to be a non-negative integer") |
|
|
101 |
} |
|
|
102 |
if (!is.null(rseed) & !is.numeric(rseed)) { |
|
|
103 |
stop("rseed has to be numeric or NULL") |
|
|
104 |
} |
|
|
105 |
if (!do.gap & cln == 0) { |
|
|
106 |
stop("cln has to be a positive integer or do.gap has to be TRUE") |
|
|
107 |
} |
|
|
108 |
|
|
|
109 |
# Operations |
|
|
110 |
object@clusterpar <- |
|
|
111 |
list( |
|
|
112 |
clustnr = clustnr, |
|
|
113 |
bootnr = bootnr, |
|
|
114 |
metric = metric, |
|
|
115 |
do.gap = do.gap, |
|
|
116 |
SE.method = SE.method, |
|
|
117 |
SE.factor = SE.factor, |
|
|
118 |
B.gap = B.gap, |
|
|
119 |
cln = cln, |
|
|
120 |
rseed = rseed |
|
|
121 |
) |
|
|
122 |
y <- clustfun( |
|
|
123 |
object@fdata, |
|
|
124 |
clustnr, |
|
|
125 |
bootnr, |
|
|
126 |
metric, |
|
|
127 |
do.gap, |
|
|
128 |
SE.method, |
|
|
129 |
SE.factor, |
|
|
130 |
B.gap, |
|
|
131 |
cln, |
|
|
132 |
rseed = rseed, |
|
|
133 |
quiet = quiet |
|
|
134 |
) |
|
|
135 |
object@kmeans <- list( |
|
|
136 |
kpart = y$clb$result$partition, |
|
|
137 |
jaccard = y$clb$bootmean, |
|
|
138 |
gap = y$gpr |
|
|
139 |
) |
|
|
140 |
object@distances <- as.matrix(y$di) |
|
|
141 |
object@fcol <- rainbow(max(y$clb$result$partition)) |
|
|
142 |
object@cpart <- object@kmeans$kpart |
|
|
143 |
return(object) |
|
|
144 |
} |
|
|
145 |
) |