Diff of /R/Jaccard.R [000000] .. [28e211]

Switch to unified view

a b/R/Jaccard.R
1
#' @title Jaccard’s similarity
2
#' @description Robustness of the clusters can be assessed by Jaccard’s
3
#'   similarity, which reflects the reproducibility of individual clusters
4
#'   across bootstrapping runs. Jaccard’s similarity is the intersect of two
5
#'   clusters divided by the union.
6
#' @export
7
#' @param object \code{DISCBIO} class object.
8
#' @param Clustering Clustering has to be one of the following:
9
#'   ["K-means","MB"]. Default is "K-means"
10
#' @param K A numeric value of the number of clusters
11
#' @param plot if `TRUE`, plots the mean Jaccard similarities
12
#' @param R number of bootstrap replicates
13
#' @importFrom graphics barplot box
14
#' @return A plot of the mean Jaccard similarity coefficient per cluster.
15
Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE, R = 100) {
16
  JACCARD <- vector()
17
18
  # Validation
19
  if (!(Clustering %in% c("K-means", "MB"))) {
20
    stop("Clustering has to be either K-means or MB")
21
  }
22
  for (i in 1:K) {
23
    # Optimize by avoiding if every loop. Only thing variable is data
24
    if (Clustering == "K-means") {
25
      target_col <- object@kmeans$kpart
26
    } else if (Clustering == "MB") {
27
      target_col <- object@MBclusters$clusterid
28
    }
29
    results <- bootstrap(object@fdata[, which(target_col == i)], R)
30
    # to get the mean of all bootstrappings (mean of mean Jaccard values)
31
    JACCARD[i] <- round(mean(results), digits = 3)
32
  }
33
  if (plot) {
34
    barplot(
35
      height    = JACCARD,
36
      names.arg = seq_len(length(JACCARD)),
37
      ylab      = "Mean Jaccard's similarity values",
38
      xlab      = "Clusters",
39
      las       = 1,
40
      ylim      = c(0, 1),
41
      col       = c("black", "blue", "green", "red", "yellow", "gray")
42
    )
43
    box()
44
  }
45
  return(JACCARD)
46
}