|
a |
|
b/R/silhouette.R |
|
|
1 |
#' Get data for silhouette demo |
|
|
2 |
#' |
|
|
3 |
#' @return A matrix of expression profile, sample in raws, time in columns. |
|
|
4 |
#' |
|
|
5 |
#' @examples |
|
|
6 |
#' data <- get_demo_silhouette() |
|
|
7 |
#' |
|
|
8 |
#' @export |
|
|
9 |
get_demo_silhouette <- function() { |
|
|
10 |
readRDS(system.file("extdata/data_silhouette.rds", package="timeOmics", |
|
|
11 |
mustWork = TRUE)) |
|
|
12 |
} |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
#' @importFrom dplyr group_by |
|
|
16 |
#' @importFrom dplyr summarise |
|
|
17 |
silhouette <- function(dmatrix, # distance matrix |
|
|
18 |
cluster) # cluster vector of size ncol(dmatrix) |
|
|
19 |
{ |
|
|
20 |
#-- checking input parameters ---------------------------------------------# |
|
|
21 |
#--------------------------------------------------------------------------# |
|
|
22 |
|
|
|
23 |
#-- check dmatrix |
|
|
24 |
stopifnot(is.matrix(dmatrix) || is.data.frame(dmatrix)) |
|
|
25 |
dmatrix <- as.matrix(dmatrix) |
|
|
26 |
stopifnot(nrow(dmatrix) == ncol(dmatrix)) |
|
|
27 |
|
|
|
28 |
#-- check cluster |
|
|
29 |
stopifnot(is.vector(cluster) || is.factor(cluster)) |
|
|
30 |
stopifnot(length(cluster) == ncol(dmatrix)) |
|
|
31 |
|
|
|
32 |
cluster <- factor(cluster) |
|
|
33 |
cluster.levels <- levels(cluster) |
|
|
34 |
|
|
|
35 |
|
|
|
36 |
#- compute silhouette -----------------------------------------------------# |
|
|
37 |
#--------------------------------------------------------------------------# |
|
|
38 |
average.dist <- matrix(ncol = length(cluster.levels), nrow = nrow(dmatrix)) |
|
|
39 |
result <- vector(length = ncol(dmatrix)) |
|
|
40 |
for(i in 1:nrow(dmatrix)){ |
|
|
41 |
for(j in 1:length(cluster.levels)){ |
|
|
42 |
index.tmp <- cluster == cluster.levels[j] |
|
|
43 |
if(cluster.levels[j] == cluster[i]) { |
|
|
44 |
# we do not include d(i,i) in the sum |
|
|
45 |
# can introduce NaN if size of cluster is 1 |
|
|
46 |
# but this is handle after because score is 0 when size of cluster is 1 |
|
|
47 |
index.tmp[i] <- FALSE |
|
|
48 |
} |
|
|
49 |
average.dist[i,j] <- mean(dmatrix[i, index.tmp]) |
|
|
50 |
} |
|
|
51 |
A <- average.dist[i, cluster[i]] # a : inside |
|
|
52 |
#B <- min(average.dist[i,-c(cluster[i])]) # b |
|
|
53 |
# fix R 4.1 |
|
|
54 |
B <- min(average.dist[i,-which(cluster[i] %in% cluster.levels)]) # b |
|
|
55 |
result[i] <- silhoutte.formula(A = A, B = B) |
|
|
56 |
} |
|
|
57 |
|
|
|
58 |
#-- return |
|
|
59 |
to_return <- list() |
|
|
60 |
|
|
|
61 |
#-- silhouette coefficient by feature |
|
|
62 |
to_return[["feature"]] <- cbind(colnames(dmatrix), cluster, as.data.frame(result)) |
|
|
63 |
colnames(to_return[["feature"]]) <- c("feature", "cluster", "silhouette.coef") |
|
|
64 |
|
|
|
65 |
#-- average silhouette coefficient |
|
|
66 |
to_return[["average"]] <- mean(to_return[["feature"]][["silhouette.coef"]]) |
|
|
67 |
|
|
|
68 |
#-- average silhouette coefficient by cluster |
|
|
69 |
to_return[["average.cluster"]] <- dplyr::group_by(to_return[["feature"]], cluster) %>% |
|
|
70 |
dplyr::summarise(silhouette.coef = mean(silhouette.coef)) %>% |
|
|
71 |
as.data.frame |
|
|
72 |
|
|
|
73 |
return(to_return) |
|
|
74 |
} |
|
|
75 |
|
|
|
76 |
#' dmatrix.spearman.dissimilarity |
|
|
77 |
#' |
|
|
78 |
#' Compute the spearman dissimilarity distance. |
|
|
79 |
#' |
|
|
80 |
#' @param X A numeric matrix with feature in colnames |
|
|
81 |
#' |
|
|
82 |
#' @return |
|
|
83 |
#' Return a dissimilarity matrix of size PxP. |
|
|
84 |
#' |
|
|
85 |
dmatrix.spearman.dissimilarity <- function(X){ |
|
|
86 |
# between 0 and 2 |
|
|
87 |
dmatrix <- cor(x = X, use = 'pairwise.complete.obs', |
|
|
88 |
method = 'spearman') |
|
|
89 |
dmatrix <- 1 - dmatrix |
|
|
90 |
return(dmatrix) |
|
|
91 |
} |
|
|
92 |
|
|
|
93 |
# dmatrix.proportionality.distance <- function(X){ |
|
|
94 |
# # clr first |
|
|
95 |
# dmatrix <- matrix(ncol = ncol(X), nrow = ncol(X)) |
|
|
96 |
# rownames(dmatrix) <- colnames(dmatrix) <- colnames(X) |
|
|
97 |
# for(i in 1:ncol(X)){ |
|
|
98 |
# for(j in 1:ncol(X)){ |
|
|
99 |
# dmatrix[i,j] <- var(X[,i] - X[,j])/var(X[,i] + X[,j]) |
|
|
100 |
# } |
|
|
101 |
# } |
|
|
102 |
# return(dmatrix) |
|
|
103 |
# } |
|
|
104 |
|
|
|
105 |
silhoutte.formula <- function(A,B){ |
|
|
106 |
# A average dist inside cluster; # B: min average dist outside |
|
|
107 |
stopifnot(is.vector(A) && is.vector(B)) |
|
|
108 |
stopifnot(is.numeric(A) && is.numeric(B)) |
|
|
109 |
if(!(is.finite(A))){ |
|
|
110 |
return(0) |
|
|
111 |
}else{ |
|
|
112 |
return((B - A)/(max(A,B))) |
|
|
113 |
} |
|
|
114 |
} |
|
|
115 |
|
|
|
116 |
|
|
|
117 |
wrapper.silhouette <- function(X, cluster) |
|
|
118 |
{ |
|
|
119 |
#-- checking input parameters ---------------------------------------------# |
|
|
120 |
#--------------------------------------------------------------------------# |
|
|
121 |
|
|
|
122 |
#-- check X |
|
|
123 |
stopifnot(is.matrix(X) || is.data.frame(X)) |
|
|
124 |
X <- as.matrix(X) |
|
|
125 |
|
|
|
126 |
#-- check cluster |
|
|
127 |
stopifnot(is.vector(cluster) || is.factor(cluster)) |
|
|
128 |
stopifnot(length(cluster) == ncol(X)) |
|
|
129 |
|
|
|
130 |
cluster <- factor(cluster) |
|
|
131 |
cluster.levels <- levels(cluster) |
|
|
132 |
|
|
|
133 |
#-- compute distance matrix -----------------------------------------------# |
|
|
134 |
#--------------------------------------------------------------------------# |
|
|
135 |
dmatrix <- dmatrix.spearman.dissimilarity(X) |
|
|
136 |
|
|
|
137 |
#- compute silhouette -----------------------------------------------------# |
|
|
138 |
#--------------------------------------------------------------------------# |
|
|
139 |
silhouette(dmatrix = dmatrix, cluster = cluster) |
|
|
140 |
} |