Diff of /R/silhouette.R [000000] .. [d79ff0]

Switch to unified view

a b/R/silhouette.R
1
#' Get data for silhouette demo
2
#'
3
#' @return A matrix of expression profile, sample in raws, time in columns.
4
#'
5
#' @examples
6
#' data <- get_demo_silhouette()
7
#'
8
#' @export
9
get_demo_silhouette <- function() {
10
    readRDS(system.file("extdata/data_silhouette.rds", package="timeOmics",
11
                        mustWork = TRUE))
12
}
13
14
15
#' @importFrom dplyr group_by
16
#' @importFrom dplyr summarise
17
silhouette <- function(dmatrix,  # distance matrix
18
                       cluster)  # cluster vector of size ncol(dmatrix)
19
    {
20
    #-- checking input parameters ---------------------------------------------#
21
    #--------------------------------------------------------------------------#
22
23
    #-- check dmatrix
24
    stopifnot(is.matrix(dmatrix) || is.data.frame(dmatrix))
25
    dmatrix <- as.matrix(dmatrix)
26
    stopifnot(nrow(dmatrix) == ncol(dmatrix))
27
28
    #-- check cluster
29
    stopifnot(is.vector(cluster) || is.factor(cluster))
30
    stopifnot(length(cluster) == ncol(dmatrix))
31
32
    cluster <- factor(cluster)
33
    cluster.levels <- levels(cluster)
34
35
36
    #- compute silhouette -----------------------------------------------------#
37
    #--------------------------------------------------------------------------#
38
    average.dist <- matrix(ncol = length(cluster.levels), nrow = nrow(dmatrix))
39
    result <- vector(length = ncol(dmatrix))
40
    for(i in 1:nrow(dmatrix)){
41
        for(j in 1:length(cluster.levels)){
42
            index.tmp <- cluster == cluster.levels[j]
43
            if(cluster.levels[j] == cluster[i]) {
44
                # we do not include d(i,i) in the sum
45
                # can introduce NaN if size of cluster is 1
46
                # but this is handle after because score is 0 when size of cluster is 1
47
                index.tmp[i] <- FALSE
48
            }
49
            average.dist[i,j] <- mean(dmatrix[i, index.tmp])
50
        }
51
        A <- average.dist[i, cluster[i]] # a : inside
52
        #B <- min(average.dist[i,-c(cluster[i])])  # b
53
        # fix R 4.1
54
        B <- min(average.dist[i,-which(cluster[i] %in% cluster.levels)])  # b
55
        result[i] <- silhoutte.formula(A = A, B = B)
56
    }
57
58
    #-- return
59
    to_return <- list()
60
61
    #-- silhouette coefficient by feature
62
    to_return[["feature"]] <- cbind(colnames(dmatrix), cluster,  as.data.frame(result))
63
    colnames(to_return[["feature"]]) <- c("feature", "cluster", "silhouette.coef")
64
65
    #-- average silhouette coefficient
66
    to_return[["average"]] <- mean(to_return[["feature"]][["silhouette.coef"]])
67
68
    #-- average silhouette coefficient by cluster
69
    to_return[["average.cluster"]] <- dplyr::group_by(to_return[["feature"]], cluster) %>%
70
        dplyr::summarise(silhouette.coef = mean(silhouette.coef)) %>%
71
        as.data.frame
72
73
    return(to_return)
74
}
75
76
#' dmatrix.spearman.dissimilarity
77
#'
78
#' Compute the spearman dissimilarity distance.
79
#'
80
#' @param X A numeric matrix with feature in colnames
81
#'
82
#' @return
83
#' Return a dissimilarity matrix of size PxP.
84
#'
85
dmatrix.spearman.dissimilarity <- function(X){
86
    # between 0 and 2
87
    dmatrix <- cor(x = X, use = 'pairwise.complete.obs',
88
                   method = 'spearman')
89
    dmatrix <- 1 - dmatrix
90
    return(dmatrix)
91
}
92
93
# dmatrix.proportionality.distance <- function(X){
94
#     # clr first
95
#     dmatrix <- matrix(ncol = ncol(X), nrow = ncol(X))
96
#     rownames(dmatrix) <- colnames(dmatrix) <- colnames(X)
97
#     for(i in 1:ncol(X)){
98
#         for(j in 1:ncol(X)){
99
#             dmatrix[i,j] <- var(X[,i] - X[,j])/var(X[,i] + X[,j])
100
#         }
101
#     }
102
#     return(dmatrix)
103
# }
104
105
silhoutte.formula <- function(A,B){
106
    # A average dist inside cluster; # B: min average dist outside
107
    stopifnot(is.vector(A) && is.vector(B))
108
    stopifnot(is.numeric(A) && is.numeric(B))
109
    if(!(is.finite(A))){
110
        return(0)
111
    }else{
112
        return((B - A)/(max(A,B)))
113
    }
114
}
115
116
117
wrapper.silhouette <- function(X, cluster)
118
{
119
    #-- checking input parameters ---------------------------------------------#
120
    #--------------------------------------------------------------------------#
121
122
    #-- check X
123
    stopifnot(is.matrix(X) || is.data.frame(X))
124
    X <- as.matrix(X)
125
126
    #-- check cluster
127
    stopifnot(is.vector(cluster) || is.factor(cluster))
128
    stopifnot(length(cluster) == ncol(X))
129
130
    cluster <- factor(cluster)
131
    cluster.levels <- levels(cluster)
132
133
    #-- compute distance matrix -----------------------------------------------#
134
    #--------------------------------------------------------------------------#
135
    dmatrix <- dmatrix.spearman.dissimilarity(X)
136
137
    #- compute silhouette -----------------------------------------------------#
138
    #--------------------------------------------------------------------------#
139
    silhouette(dmatrix = dmatrix, cluster = cluster)
140
}