--- a +++ b/R/silhouette.R @@ -0,0 +1,140 @@ +#' Get data for silhouette demo +#' +#' @return A matrix of expression profile, sample in raws, time in columns. +#' +#' @examples +#' data <- get_demo_silhouette() +#' +#' @export +get_demo_silhouette <- function() { + readRDS(system.file("extdata/data_silhouette.rds", package="timeOmics", + mustWork = TRUE)) +} + + +#' @importFrom dplyr group_by +#' @importFrom dplyr summarise +silhouette <- function(dmatrix, # distance matrix + cluster) # cluster vector of size ncol(dmatrix) + { + #-- checking input parameters ---------------------------------------------# + #--------------------------------------------------------------------------# + + #-- check dmatrix + stopifnot(is.matrix(dmatrix) || is.data.frame(dmatrix)) + dmatrix <- as.matrix(dmatrix) + stopifnot(nrow(dmatrix) == ncol(dmatrix)) + + #-- check cluster + stopifnot(is.vector(cluster) || is.factor(cluster)) + stopifnot(length(cluster) == ncol(dmatrix)) + + cluster <- factor(cluster) + cluster.levels <- levels(cluster) + + + #- compute silhouette -----------------------------------------------------# + #--------------------------------------------------------------------------# + average.dist <- matrix(ncol = length(cluster.levels), nrow = nrow(dmatrix)) + result <- vector(length = ncol(dmatrix)) + for(i in 1:nrow(dmatrix)){ + for(j in 1:length(cluster.levels)){ + index.tmp <- cluster == cluster.levels[j] + if(cluster.levels[j] == cluster[i]) { + # we do not include d(i,i) in the sum + # can introduce NaN if size of cluster is 1 + # but this is handle after because score is 0 when size of cluster is 1 + index.tmp[i] <- FALSE + } + average.dist[i,j] <- mean(dmatrix[i, index.tmp]) + } + A <- average.dist[i, cluster[i]] # a : inside + #B <- min(average.dist[i,-c(cluster[i])]) # b + # fix R 4.1 + B <- min(average.dist[i,-which(cluster[i] %in% cluster.levels)]) # b + result[i] <- silhoutte.formula(A = A, B = B) + } + + #-- return + to_return <- list() + + #-- silhouette coefficient by feature + to_return[["feature"]] <- cbind(colnames(dmatrix), cluster, as.data.frame(result)) + colnames(to_return[["feature"]]) <- c("feature", "cluster", "silhouette.coef") + + #-- average silhouette coefficient + to_return[["average"]] <- mean(to_return[["feature"]][["silhouette.coef"]]) + + #-- average silhouette coefficient by cluster + to_return[["average.cluster"]] <- dplyr::group_by(to_return[["feature"]], cluster) %>% + dplyr::summarise(silhouette.coef = mean(silhouette.coef)) %>% + as.data.frame + + return(to_return) +} + +#' dmatrix.spearman.dissimilarity +#' +#' Compute the spearman dissimilarity distance. +#' +#' @param X A numeric matrix with feature in colnames +#' +#' @return +#' Return a dissimilarity matrix of size PxP. +#' +dmatrix.spearman.dissimilarity <- function(X){ + # between 0 and 2 + dmatrix <- cor(x = X, use = 'pairwise.complete.obs', + method = 'spearman') + dmatrix <- 1 - dmatrix + return(dmatrix) +} + +# dmatrix.proportionality.distance <- function(X){ +# # clr first +# dmatrix <- matrix(ncol = ncol(X), nrow = ncol(X)) +# rownames(dmatrix) <- colnames(dmatrix) <- colnames(X) +# for(i in 1:ncol(X)){ +# for(j in 1:ncol(X)){ +# dmatrix[i,j] <- var(X[,i] - X[,j])/var(X[,i] + X[,j]) +# } +# } +# return(dmatrix) +# } + +silhoutte.formula <- function(A,B){ + # A average dist inside cluster; # B: min average dist outside + stopifnot(is.vector(A) && is.vector(B)) + stopifnot(is.numeric(A) && is.numeric(B)) + if(!(is.finite(A))){ + return(0) + }else{ + return((B - A)/(max(A,B))) + } +} + + +wrapper.silhouette <- function(X, cluster) +{ + #-- checking input parameters ---------------------------------------------# + #--------------------------------------------------------------------------# + + #-- check X + stopifnot(is.matrix(X) || is.data.frame(X)) + X <- as.matrix(X) + + #-- check cluster + stopifnot(is.vector(cluster) || is.factor(cluster)) + stopifnot(length(cluster) == ncol(X)) + + cluster <- factor(cluster) + cluster.levels <- levels(cluster) + + #-- compute distance matrix -----------------------------------------------# + #--------------------------------------------------------------------------# + dmatrix <- dmatrix.spearman.dissimilarity(X) + + #- compute silhouette -----------------------------------------------------# + #--------------------------------------------------------------------------# + silhouette(dmatrix = dmatrix, cluster = cluster) +}