Diff of /R/proprotionality.R [000000] .. [d79ff0]

Switch to side-by-side view

--- a
+++ b/R/proprotionality.R
@@ -0,0 +1,198 @@
+#' Proportionality Distance
+#' 
+#' \code{proportionality} is a wrapper that compute proportionality distance for 
+#' a clustering result (\code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls}, \code{block.spls}).
+#' and it performs a u-test to compare the median within a cluster to the median of the entire background set.
+#' 
+#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls}
+#' 
+#' @return 
+#' Return a list containing the following components:
+#'   \item{propr.distance}{Square matrix with proportionality distance between pairs of features}
+#'   \item{propr.distance.w.cluster}{distance between pairs with cluster label}
+#'   \item{pvalue}{Wilcoxon U-test p-value comparing the medians within clusters and with the entire background set}
+#' 
+#' 
+#' @references 
+#' Lovell, D., Pawlowsky-Glahn, V., Egozcue, J. J., Marguerat, S., Bähler, J. (2015). Proportionality: a valid alternative to correlation for relative data. PLoS Comput. Biol. 11, e1004075. doi: 10.1371/journal.pcbi.1004075
+#' 
+#' Quinn, T. P., Richardson, M. F., Lovell, D., Crowley, T. M. (2017). propr: an r-package for identifying proportionally abundant features using compositional data analysis. Sci. Rep. 7, 16252. doi: 10.1038/s41598-017-16520-0
+#' 
+#' @examples 
+#' demo <- suppressWarnings(get_demo_cluster())
+#' 
+#' # pca
+#' X <- demo$pca
+#' propr.res <- proportionality(X)
+#' plot(propr.res)
+#' 
+#' # pls
+#' X <- demo$spls
+#' propr.res <- proportionality(X)
+#' plot(propr.res)
+#' 
+#' # block.pls
+#' X <- demo$block.spls
+#' propr.res <- proportionality(X)
+#' plot(propr.res)
+#' 
+#' @importFrom dplyr mutate filter rename left_join
+#' @importFrom magrittr %>%
+#' @importFrom tibble rownames_to_column
+#' @importFrom tidyr pivot_longer
+#' 
+#' @export
+proportionality <- function(X){
+    #stopifnot(is(X, c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
+    # modif bioc 3.16
+    stopifnot(any(class(X) %in% c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
+    # 1. get cluster
+    cluster.info <- getCluster(X) %>%
+        dplyr::select(molecule, cluster)
+    
+    # 2. extract data and add cluster
+    
+    # pca / spca
+    if(any(class(X) %in% c("pca", "spca"))){
+        # unscaling + positive value
+        data <- unscale(X$X) %>% `+`(abs(min(.))) 
+    } else if(any(class(X) %in% c("mixo_pls", "mixo_spls"))){
+        # unscale X, unscale Y, cat
+        data.X <- unscale(X$X) %>%
+            `+`(abs(min(.)))
+        data.Y <- unscale(X$Y) %>%
+            `+`(abs(min(.)))
+        data <- cbind(data.X, data.Y)
+    } else if(any(class(X) %in% c("block.pls", "block.spls"))){
+        # if(is.null(X$Y)){  ## no need: Y is passed to X
+        data <- lapply(X$X, function(x) x %>%
+                           unscale %>%
+                           `+`(abs(min(.)))) %>%
+            do.call(what="cbind")
+        # } else {
+        #     data.X <- lapply(X$X, function(x) x %>% unscale %>% `+`(abs(min(.)))) %>%
+        #         do.call(what="cbind")
+        #     data.Y <- unscale(X$Y) %>% `+`(abs(min(.)))
+        #     data <- cbind(data.X, data.Y)
+        # }
+    }
+    
+    # 3. compute phi_s
+    
+    # update 16 march 2023: propr deprecated;
+    # implementation of phi_s only based on Quinn paper
+    # data.propr <- suppressMessages(as.data.frame(propr::propr(data, metric = "phs")@matrix))
+    
+    # gather, add cluster info, compute basic stats
+    data.propr <- get_phi_s(data)
+    data.propr.gather <- data.propr %>%
+        tibble::rownames_to_column("feature1") %>%
+        tidyr::pivot_longer(-feature1, names_to = "feature2", values_to = 'value') %>%
+        dplyr::filter(feature1 %in% cluster.info$molecule) %>%
+        dplyr::left_join(cluster.info, by = c("feature1"="molecule")) %>%
+        dplyr::rename("cluster1"="cluster") %>%
+        dplyr::filter(feature2 %in% cluster.info$molecule) %>%
+        dplyr::left_join(cluster.info, by = c("feature2"="molecule")) %>%
+        dplyr::rename("cluster2"="cluster") %>%
+        dplyr::mutate(insideout = ifelse(cluster1 == cluster2, "inside", "outside"))
+        
+    # compute stat (median, u-test pval, adj.pval)
+    res.stat <- stat_median(data.propr.gather) %>%
+        na.omit()
+    
+    res <- list()
+    res[["propr.distance"]] <- data.propr
+    res[["propr.distance.w.cluster"]] <- data.propr.gather
+    res[["pvalue"]] <- res.stat
+    class(res) <- "proportionality"
+    return(res)
+}
+
+#' @importFrom dplyr mutate filter pull
+#' @importFrom purrr set_names
+#' @importFrom magrittr %>%
+stat_median <- function(res.phs.X){
+    i = 1
+    res.pval <- matrix(ncol = 4, nrow = 4) %>%
+        as.data.frame() %>%
+        purrr::set_names("cluster", "median_inside", "median_outside", "Pvalue")
+    for(clu in unique(res.phs.X$cluster1)){
+        inside <- res.phs.X %>%
+            filter(cluster1 == clu) %>% 
+            dplyr::filter(cluster2==clu) %>% 
+            dplyr::pull(value)
+        outside <- res.phs.X %>% 
+            filter(cluster1 == clu) %>% 
+            dplyr::filter(cluster2!=clu) %>% 
+            dplyr::pull(value)
+        
+        #ttest.pval <- t.test(inside, outside)$p.value
+        utest.pval <- stats::wilcox.test(inside, outside)$p.value
+        
+        res.pval[i,] <- c(clu, round(median(inside), digits = 2),
+                          round(median(outside), digits = 2), utest.pval)
+        i = i+1
+    }
+    as.data.frame(res.pval) %>% 
+        dplyr::mutate("Adj.Pvalue" = stats::p.adjust(Pvalue, method = "fdr")) %>%
+        na.omit
+    return(res.pval)
+}
+
+#' @export
+#' @import ggplot2
+#' @importFrom mixOmics color.mixo
+plot.proportionality <- function(x, ...){
+    ggplot2::ggplot(data = x$propr.distance.w.cluster, 
+                    aes(x=as.factor(cluster1), y=value, col=insideout)) + 
+        geom_boxplot() + theme_bw() + 
+        xlab("Cluster ID") + 
+        ylab("Proportionality distance") + 
+        labs(color = "Proportionality distance") +
+        scale_color_manual(values = mixOmics::color.mixo(1:2))
+}
+
+# based on the works of Thom Quinn (https://github.com/tpq/propr)
+get_phi_s <- function(counts){
+    ct <- counts
+    
+    if (any(as.matrix(counts) == 0)) {
+        #message("Alert: Replacing 0s with next smallest value.")
+        zeros <- ct == 0
+        ct[zeros] <- min(ct[!zeros])
+    }
+    #ivar = "clr"
+    #use <- propr::ivar2index(ct, ivar)
+    use <- 1:ncol(counts)
+    
+    
+    logX <- log(ct)
+    logSet <- logX[, use, drop = FALSE]
+    ref <- rowMeans(logSet)
+    lr <- sweep(logX, 1, ref, "-")
+
+    mat <- as.data.frame(to_lr2phs(lr))
+    colnames(mat) <- colnames(lr)
+    rownames(mat) <- colnames(lr)
+    return(mat)
+}
+
+
+to_lr2phs <- function(lr){
+    # Calculate phs = var(a-b)/var(a+b)
+    nfeats <- ncol(lr);
+    mat <- matrix(nrow = nfeats, ncol = nfeats)
+    for(i in 1:nfeats){
+        for(j in 1:nfeats){
+            if(i == j){
+                mat[i,j] <- 0
+            } else {
+                a <- lr[,i]; b <- lr[,j]
+                mat[i,j] <- var(a-b)/var(a+b)
+            }
+        }
+    }
+    # return the same matrix as propr
+    return(mat)
+}
+    
\ No newline at end of file