Diff of /R/getNcomp.R [000000] .. [d79ff0]

Switch to side-by-side view

--- a
+++ b/R/getNcomp.R
@@ -0,0 +1,218 @@
+#' Get optimal number of components
+#'
+#' Compute the average silhouette coefficient for a given set of components on a mixOmics result.
+#' Foreach given ncomp, the mixOmics method is performed with the sames arguments and the given `ncomp`.
+#' Longitudinal clustering is performed and average silhouette coefficient is computed.
+#'
+#' @param object A mixOmics object of the class `pca`, `spca`, `mixo_pls`, `mixo_spls`, `block.pls`, `block.spls`
+#'
+#' @param max.ncomp integer, maximum number of component to include.
+#' If no argument is given, `max.ncomp=object$ncomp`
+#' 
+#' @param X a numeric matrix/data.frame or a list of data.frame for \code{block.pls}
+#' 
+#' @param Y (only for \code{pls}, optional for \code{block.spls}) a numeric matrix, with the same nrow as \code{X}
+#' 
+#' @param indY (optional and only for \code{block.pls}, if Y is not provided), an integer which indicates the position of the matrix response in the list X
+#' 
+#' @param ... Other arguments to be passed to methods (pca, pls, block.pls)
+#' 
+#' @return
+#' \code{getNcomp} returns a list with class "ncomp.tune.silhouette" containing the following components:
+#'
+#' \item{ncomp}{a vector containing the tested ncomp}
+#' \item{silhouette}{a vector containing the average silhouette coefficient by ncomp}
+#' \item{dmatrix}{the distance matrix used to compute silhouette coefficient}
+#'
+#' @seealso 
+#' \code{\link{getCluster}}, \code{\link{silhouette}}, \code{\link[mixOmics]{pca}}, \code{\link[mixOmics]{pls}}, \code{\link[mixOmics]{block.pls}}
+#'
+#' @examples
+#' # random input data
+#' demo <- suppressWarnings(get_demo_cluster())
+#'
+#' # pca
+#' pca.res <- mixOmics::pca(X=demo$X, ncomp = 5)
+#' res.ncomp <- getNcomp(pca.res, max.ncomp = 4, X = demo$X)
+#' plot(res.ncomp)
+#' 
+#' # pls
+#' pls.res <- mixOmics::pls(X=demo$X, Y=demo$Y)
+#' res.ncomp <- getNcomp(pls.res, max.ncomp = 4, X = demo$X, Y=demo$Y)
+#' plot(res.ncomp)
+#' 
+#' # block.pls
+#' block.pls.res <- suppressWarnings(mixOmics::block.pls(X=list(X=demo$X, Z=demo$Z), Y=demo$Y))
+#' res.ncomp <- suppressWarnings(getNcomp(block.pls.res, max.ncomp = 4,
+#'                                        X=list(X=demo$X, Z=demo$Z), Y=demo$Y))
+#' plot(res.ncomp)
+#'
+#' @export
+#' @import mixOmics
+getNcomp <- function(object, max.ncomp = NULL, X, Y = NULL, indY = NULL, ...){
+    #-- checking input parameters ---------------------------------------------#
+    #--------------------------------------------------------------------------#
+
+    #-- object
+    allowed_object = c("pca", "mixo_pls", "block.pls")
+    if(!any(class(object) %in% allowed_object)){
+        stop("invalid object, should be one of c(pca, mixo_pls, block.pls)")
+    }
+    
+
+    #-- max.ncomp
+    if(is_almostInteger(max.ncomp)){
+        if (max.ncomp < 1)
+            stop("'max.ncomp' should be greater than 1")
+
+        if(is(object, "block.pls")){
+            if (max.ncomp > min(ncol(object$X[[1]]), nrow(object$X[[1]])))
+                stop("use smaller 'max.ncomp'")   
+        } else {
+            if (max.ncomp > min(ncol(object$X), nrow(object$X)))
+                stop("use smaller 'max.ncomp'")
+        }
+    } else {
+        max.ncomp <- unique(object$ncomp)
+    }
+
+    #-- run  #pca / pls / block.pls
+    ncomp.opt.res <- ncomp.silhouette(object, X, Y, max.ncomp, indY, ...)
+    
+    to_return <- list()
+    to_return[["ncomp"]] <- c(0,1:max.ncomp)
+    to_return[["silhouette"]] <- c(0,ncomp.opt.res$silhouette.res)
+    to_return[["dmatrix"]] <- ncomp.opt.res$dmatrix
+    to_return[["choice.ncomp"]] <- to_return[["ncomp"]][which.max(to_return[["silhouette"]])]
+
+    class(to_return) <- "ncomp.tune.silhouette"
+    return(invisible(to_return))
+}
+
+ncomp.silhouette <- function(object, X = X, max.ncomp = max.ncomp, ...){
+    UseMethod("ncomp.silhouette")
+}
+
+#' @import mixOmics
+ncomp.silhouette.pca <- function(object, X, Y, max.ncomp, indY, ...){
+    #-- check X
+    X <- validate_matrix_X(X)
+    
+    #-- dmatrix
+    dmatrix <- dmatrix.spearman.dissimilarity(X)
+    
+    silhouette.res <- vector(length = max.ncomp)
+    #-- iterative ncomp silhouette coef.
+    for(comp in 1:max.ncomp){
+        #-- mixo pca
+        mixo.res <- mixOmics::pca(X = X, ncomp = comp, ...)
+        
+        #-- cluster
+        cluster.res <- getCluster(X = mixo.res)
+        # same names, same cluster
+        stopifnot(all(cluster.res$molecule == colnames(dmatrix))) 
+        
+        #-- silhouette
+        sil <- silhouette(dmatrix, cluster.res$cluster)
+        
+        #-- store
+        silhouette.res[comp] <- sil$average
+    }
+    
+    return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
+}
+
+#' @import mixOmics
+ncomp.silhouette.mixo_pls <- function(object, X, Y, max.ncomp, indY, ...){
+    #-- check X
+    X <- validate_matrix_X(X)
+    
+    #-- check Y
+    Y <- validate_matrix_Y(Y)
+    
+    #-- dmatrix
+    dmatrix <- dmatrix.spearman.dissimilarity(cbind(X,Y))
+    
+    silhouette.res <- vector(length = max.ncomp)
+    #-- iterative ncomp silhouette coef.
+    for(comp in 1:max.ncomp){
+        #-- mixo pls
+        mixo.res <- mixOmics::pls(X = X, Y=Y, ncomp = comp, ...)
+        
+        #-- cluster
+        cluster.res <- getCluster(X = mixo.res)
+        # same names, same cluster
+        stopifnot(all(cluster.res$molecule == colnames(dmatrix))) 
+        
+        #-- silhouette
+        sil <- silhouette(dmatrix, cluster.res$cluster)
+        
+        #-- store
+        silhouette.res[comp] <- sil$average
+    }
+    return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
+}
+
+#' @import mixOmics
+ncomp.silhouette.block.pls <- function(object, X, Y, max.ncomp, indY, ...){
+    #-- check X
+    X <- validate_list_matrix_X(X)
+    
+    data <- do.call("cbind", X)
+    
+    #-- Y
+    if(!is.null(Y)){
+        Y <- validate_matrix_Y(Y)
+        dmatrix <- dmatrix.spearman.dissimilarity(cbind(data,Y))
+        indY <- NULL
+    } else {
+        indY <- validate_indY(indY = indY, X=X)
+        dmatrix <- dmatrix.spearman.dissimilarity(data)
+    }
+    
+    silhouette.res <- vector(length = max.ncomp)
+    #-- iterative ncomp silhouette coef.
+    for(comp in 1:max.ncomp){
+        #-- mixo block.pls
+        if(is.null(indY)){
+            mixo.res <- mixOmics::block.pls(X = X, ncomp = comp, Y = Y, ...)
+        }else{
+            mixo.res <- mixOmics::block.pls(X = X, ncomp = comp, indY = indY, ...)
+        }
+        
+        #-- cluster
+        cluster.res <- getCluster(X = mixo.res)
+        # same names, same cluster
+        stopifnot(all(cluster.res$molecule == colnames(dmatrix))) 
+        
+        #-- silhouette
+        sil <- silhouette(dmatrix, cluster.res$cluster)
+        
+        #-- store
+        silhouette.res[comp] <- sil$average
+    }
+    return(list(silhouette.res = silhouette.res, dmatrix = dmatrix))
+}
+
+#' @export
+#' @import ggplot2
+plot.ncomp.tune.silhouette <- function(x, title = NULL, ...){
+    stopifnot(is(x, "ncomp.tune.silhouette"))
+    
+    # check title
+    if(!is.character(title)){title = NULL}
+
+    data <- as.data.frame(list(ncomp = x$ncomp, silhouette = x$silhouette))
+    ggplot_df <- ggplot2::ggplot(data, aes(x=ncomp, y = silhouette)) + geom_line() + geom_point() +
+        geom_vline(xintercept = x$choice.ncomp, lty=2, col = "grey") +
+        theme_bw() +
+        xlab("Number of Principal Components") + 
+        ylab("Average Silhouette Coefficient")
+    
+    # add title
+    if(is.character(title)){
+        ggplot_df <- ggplot_df + ggtitle(title)
+    }
+    print(ggplot_df)
+    return(invisible(ggplot_df))
+}