[381c22]: / R / Jaccard.R

Download this file

47 lines (45 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#' @title Jaccard’s similarity
#' @description Robustness of the clusters can be assessed by Jaccard’s
#' similarity, which reflects the reproducibility of individual clusters
#' across bootstrapping runs. Jaccard’s similarity is the intersect of two
#' clusters divided by the union.
#' @export
#' @param object \code{DISCBIO} class object.
#' @param Clustering Clustering has to be one of the following:
#' ["K-means","MB"]. Default is "K-means"
#' @param K A numeric value of the number of clusters
#' @param plot if `TRUE`, plots the mean Jaccard similarities
#' @param R number of bootstrap replicates
#' @importFrom graphics barplot box
#' @return A plot of the mean Jaccard similarity coefficient per cluster.
Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE, R = 100) {
JACCARD <- vector()
# Validation
if (!(Clustering %in% c("K-means", "MB"))) {
stop("Clustering has to be either K-means or MB")
}
for (i in 1:K) {
# Optimize by avoiding if every loop. Only thing variable is data
if (Clustering == "K-means") {
target_col <- object@kmeans$kpart
} else if (Clustering == "MB") {
target_col <- object@MBclusters$clusterid
}
results <- bootstrap(object@fdata[, which(target_col == i)], R)
# to get the mean of all bootstrappings (mean of mean Jaccard values)
JACCARD[i] <- round(mean(results), digits = 3)
}
if (plot) {
barplot(
height = JACCARD,
names.arg = seq_len(length(JACCARD)),
ylab = "Mean Jaccard's similarity values",
xlab = "Clusters",
las = 1,
ylim = c(0, 1),
col = c("black", "blue", "green", "red", "yellow", "gray")
)
box()
}
return(JACCARD)
}