timeOmics / Git / Diff of /R/getCluster.R

Models:
AlyssaS/
timeOmics
Downloads: 1
Diff of /R/getCluster.R [000000] .. [d79ff0]
Switch to side-by-side view

--- a
+++ b/R/getCluster.R
@@ -0,0 +1,358 @@
+#' Get variable cluster from (s)PCA, (s)PLS or block.(s)PLS
+#'
+#' This function returns the cluster associated to each feature from a mixOmics object.
+#'
+#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls}
+#' @param user.cluster a vector to filter the result and return only the features of the specified clusters
+#' @param user.block a vector to filter the result and return the features of the specified blocks.
+#'
+#' @return
+#' A data.frame containing the name of feature, its assigned cluster and other information such as selected component, contribution, sign, ...
+#'
+#' @details
+#' For each feature, the cluster is assigned according to the maximum contribution on a component and the sign of that contribution.
+#' 
+#' @seealso 
+#' \code{\link[mixOmics]{selectVar}}
+#' 
+#' @examples
+#' demo <- suppressWarnings(get_demo_cluster())
+#' pca.cluster <- getCluster(demo$pca)
+#' spca.cluster <- getCluster(demo$spca)
+#' pls.cluster <- getCluster(demo$pls)
+#' spls.cluster <- getCluster(demo$spls)
+#' block.pls.cluster <- getCluster(demo$block.pls)
+#' block.spls.cluster <- getCluster(demo$block.spls)
+#'
+#' @export
+getCluster <- function(X, user.block = NULL, user.cluster = NULL) UseMethod("getCluster")
+
+#' get_demo_cluster
+#' 
+#' Generates random data to be used in examples.
+#' 
+#' @return  a list containg:
+#' \item{X}{data.frame}
+#' \item{Y}{data.frame}
+#' \item{Z}{data.frame}
+#' \item{pca}{a mixOmics pca result}
+#' \item{spca}{a mixOmics spca result}
+#' \item{pls}{a mixOmics pls result}
+#' \item{spls}{a mixOmics spls result}
+#' \item{block.pls}{a mixOmics block.pls result}
+#' \item{block.spls}{a mixOmics block.spls result}
+#' 
+#' @examples
+#' # Random data could lead to "The SGCCA algorithm did not converge" warning which is not important for a demo
+#' demo <- suppressWarnings(get_demo_cluster())
+#' @export
+get_demo_cluster<- function(){
+    X <- matrix(sample(1:1000), nrow = 10,
+                dimnames = list(1:10, paste0("X_",1:100)))
+
+    Y <- matrix(sample(1:100), nrow = 10, 
+                dimnames = list(1:10, paste0("Y_",1:10)))
+
+    Z <- matrix(sample(1:500), nrow = 10, 
+                dimnames = list(1:10, Y = paste0("Z_",1:50)))
+    
+    list.res = list()
+    list.res$X <- X
+    list.res$Y <- Y
+    list.res$Z <- Z
+    list.res$pca <- mixOmics::pca(X = X, ncomp = 5)
+    list.res$spca <- mixOmics::spca(X = X, ncomp = 5, keepX = c(5, 15, 4,5,6))
+
+    list.res$pls <- mixOmics::pls(X = X, Y = Y, ncomp = 5, mode = "canonical")
+    list.res$spls <- mixOmics::spls(X = X, Y = Y, ncomp = 5, mode = "canonical",
+                                keepX = c(5,6,4,5,6), keepY = c(5,1,4,5,6))
+
+    list.res$block.pls <- mixOmics::block.pls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1,
+                                             ncomp = 5, mode = "canonical")
+
+    list.res$block.spls <- mixOmics::block.spls(X = list("X" = X, "Y" = Y, "Z" = Z), indY = 1, ncomp = 3,
+                                             mode = "canonical", keepX = list("X" = c(5,6,4), "Y" = c(5,5,5), "Z" = c(4,2,4)))
+    list.res$UpDown <- getUpDownCluster(X = X)
+    return(invisible(list.res))
+}
+
+#' @importFrom dplyr mutate
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+#' @export
+getCluster.pca <- function(X, user.block = NULL, user.cluster = NULL){
+
+    #print("getCluster.pca")
+    # colnames = PC1, PC2...
+    loadings.max <- getMaxContrib(X$loadings$X)
+
+    loadings.max <- loadings.max %>% 
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^PC") %>% 
+                   as.numeric()) %>%
+        mutate(block = "X") %>%
+        .mutate_cluster()
+    Valid.getCluster(loadings.max)
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' @export
+#' @importFrom dplyr mutate
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+getCluster.spca <- function(X,  user.block = NULL, user.cluster = NULL){
+    # print(class(X))
+    selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE]
+    loadings.max <- getMaxContrib(selected.features.loadings)
+
+    loadings.max <- loadings.max %>% 
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^PC") %>% 
+                   as.numeric()) %>%
+        mutate(block = "X") %>%
+        .mutate_cluster()
+    Valid.getCluster(loadings.max)
+    
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' @export
+#' @importFrom dplyr mutate
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+getCluster.mixo_pls <- function(X,  user.block = NULL, user.cluster = NULL){
+    # print(class(X))
+    # block X
+    loadings.max.X <- getMaxContrib(X$loadings$X)
+
+    loadings.max.X <- loadings.max.X %>% 
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>% 
+                   as.numeric()) %>%
+        mutate(block = "X")
+
+
+    # block Y
+    loadings.max.Y <- getMaxContrib(X$loadings$Y)
+
+    loadings.max.Y <- loadings.max.Y %>% 
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>% 
+                   as.numeric()) %>%
+        mutate(block = "Y")
+
+    loadings.max <- rbind(loadings.max.X, loadings.max.Y) %>%
+        .mutate_cluster()
+
+    Valid.getCluster(loadings.max)
+    
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' @export
+#' @importFrom dplyr mutate
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+getCluster.mixo_spls <- function(X,  user.block = NULL, user.cluster = NULL){
+    # note : can not concatenate X and Y
+    # because they can have the same features names contrary to block.(s)pls
+
+    # print(class(X))
+    # block X
+    X.selected.features.loadings <- X$loadings$X[rowSums(X$loadings$X) != 0,,drop=FALSE]
+    loadings.max.X <- getMaxContrib(X.selected.features.loadings)
+
+    loadings.max.X <- loadings.max.X %>%
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
+                   as.numeric()) %>%
+        mutate(block = "X")
+
+    # block Y
+    Y.selected.features.loadings <- X$loadings$Y[rowSums(X$loadings$Y) != 0,,drop = FALSE]
+    loadings.max.Y <- getMaxContrib(Y.selected.features.loadings)
+
+    loadings.max.Y <- loadings.max.Y %>%
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
+                   as.numeric()) %>%
+        mutate(block = "Y")
+
+    loadings.max <- rbind(loadings.max.X, loadings.max.Y)  %>%
+        .mutate_cluster()
+
+    Valid.getCluster(loadings.max)
+    
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' @export
+#' @importFrom purrr imap set_names
+#' @importFrom dplyr mutate left_join
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+getCluster.block.pls <- function(X,  user.block = NULL, user.cluster = NULL){
+    # print(class(X))
+    # get block info
+    block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>%
+                           as.data.frame %>%
+                           set_names("molecule") %>%
+                           mutate("block" = y))
+    block.info <- do.call("rbind", block.info) %>% as.data.frame() %>%
+        mutate(block = factor(block, levels = names(X$loadings))) %>%
+        mutate(molecule = as.character(molecule))
+
+    loadings <- do.call("rbind", X$loadings)
+    loadings.max <- getMaxContrib(loadings)
+
+    loadings.max <- loadings.max %>%
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
+                   as.numeric()) %>%
+        left_join(block.info, by = c("molecule"= "molecule")) %>%
+        .mutate_cluster()
+
+    Valid.getCluster(loadings.max)
+    
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' @export
+#' @importFrom purrr imap set_names
+#' @importFrom dplyr mutate left_join
+#' @importFrom tibble rownames_to_column
+#' @importFrom stringr str_remove
+#' @importFrom magrittr %>%
+getCluster.block.spls <- function(X,  user.block = NULL, user.cluster = NULL){
+
+    # print(class(X))
+    # get block info
+    block.info <- purrr::imap(X$loadings, function(x,y) rownames(x) %>%
+                                  as.data.frame %>%
+                                  set_names("molecule") %>%
+                                  mutate("block" = y))
+    block.info <- do.call("rbind", block.info) %>%
+        as.data.frame() %>%
+        mutate(block = factor(block, levels = names(X$loadings))) %>%
+        mutate(molecule = as.character(molecule))
+
+    # sparse
+    loadings <- do.call("rbind", X$loadings)
+    X.selected.features.loadings <- loadings[rowSums(loadings) != 0,, drop=FALSE]
+    loadings.max <- getMaxContrib(X.selected.features.loadings)
+
+    loadings.max <- loadings.max %>%
+        rownames_to_column("molecule") %>%
+        mutate(cluster = stringr::str_remove(comp, "^comp") %>%
+                   as.numeric()) %>%
+        left_join(block.info, by = c("molecule"= "molecule")) %>%
+        .mutate_cluster()
+
+    Valid.getCluster(loadings.max)
+    
+    loadings.max <- filter.cluster.df(.data = loadings.max, user.block = user.block, user.cluster = user.cluster)
+    class(loadings.max) <- c("cluster.df", "data.frame")
+    return(loadings.max)
+}
+
+#' Get Max Contrib from loading matrix
+#' 
+#' @param X loading matrix from mixOmics
+#' @return a matrix
+#' 
+#' @keywords internal
+#' @noRd 
+#' @importFrom purrr set_names
+#' @importFrom magrittr %>%
+getMaxContrib <- function(X){
+    # loadings matrix, features in rows, comp in columns
+    contrib.max <- apply(X = X, FUN = function(x) { x[which.max( abs(x) )][1]}, MARGIN = 1) %>%
+        as.data.frame() %>%
+        purrr::set_names("contrib.max")
+
+    cluster.info <- apply(X = X, FUN = function(x) { colnames(X)[which.max( abs(x) )[1]]}, MARGIN = 1) %>%
+        as.data.frame() %>%
+        purrr::set_names("comp")
+
+    stopifnot(rownames(contrib.max) == rownames(cluster.info))
+    return(cbind(cluster.info, contrib.max))
+}
+
+# absmax <- function(x) { x[which.max( abs(x) )][1]}
+# absmax.index <- function(x) { which.max( abs(x) )[1]}
+
+
+#' @importFrom dplyr mutate case_when pull
+#' @importFrom magrittr %>%
+.mutate_cluster <- function(loadings.max){
+    X <- loadings.max %>%
+        mutate(cluster = cluster * sign(contrib.max)) %>%
+    mutate(contribution = case_when(sign(contrib.max) == 1 ~ "positive",
+                                    sign(contrib.max) == -1 ~ "negative",
+                                    sign(contrib.max) == 0 ~ "NULL"))
+
+    cluster.order <-  X %>%
+        pull(cluster) %>%
+        abs %>%
+        unique %>% 
+        sort %>%
+        rep(each=2) %>% `*`(c(1,-1)) %>% unique
+
+    X <- X %>%
+        mutate(cluster = factor(cluster, levels = cluster.order))
+    return(X)
+}
+
+Valid.getCluster <- function(X){
+    col_names <- c("molecule","comp","contrib.max","cluster","block","contribution")
+    stopifnot(all(col_names %in% colnames(X)))
+    # other check ?
+    # all comp present? sometimes not true
+    # idem for number of cluster
+    # also a molecule can be found in different cluster
+}
+
+
+#' @importFrom dplyr filter
+#' @export
+filter.cluster.df <- function(.data, user.block = NULL, user.cluster = NULL){
+    # X <- getCluster(pca); pca.cluster
+    
+    X.filter <- .data
+    if(!is.null(user.block)){
+        X.filter <- dplyr::filter(X.filter, block %in% user.block)
+    }
+    if(!is.null(user.cluster)){
+        X.filter <- dplyr::filter(X.filter, cluster %in% user.cluster)
+    }
+    return(X.filter)
+}
+
+
+# add getCluster for getCluster (cluser.df) to easily apply filter
+#' @export
+getCluster.cluster.df <- function(X, user.block = NULL, user.cluster = NULL){
+    results <- X
+    results <- filter.cluster.df(.data = results, user.block = user.block, user.cluster = user.cluster)
+    class(results) <- c("cluster.df", "data.frame")
+    return(results)
+}
+
+
+
+