--- a +++ b/R/proprotionality.R @@ -0,0 +1,198 @@ +#' Proportionality Distance +#' +#' \code{proportionality} is a wrapper that compute proportionality distance for +#' a clustering result (\code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls}, \code{block.spls}). +#' and it performs a u-test to compare the median within a cluster to the median of the entire background set. +#' +#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls} +#' +#' @return +#' Return a list containing the following components: +#' \item{propr.distance}{Square matrix with proportionality distance between pairs of features} +#' \item{propr.distance.w.cluster}{distance between pairs with cluster label} +#' \item{pvalue}{Wilcoxon U-test p-value comparing the medians within clusters and with the entire background set} +#' +#' +#' @references +#' Lovell, D., Pawlowsky-Glahn, V., Egozcue, J. J., Marguerat, S., Bähler, J. (2015). Proportionality: a valid alternative to correlation for relative data. PLoS Comput. Biol. 11, e1004075. doi: 10.1371/journal.pcbi.1004075 +#' +#' Quinn, T. P., Richardson, M. F., Lovell, D., Crowley, T. M. (2017). propr: an r-package for identifying proportionally abundant features using compositional data analysis. Sci. Rep. 7, 16252. doi: 10.1038/s41598-017-16520-0 +#' +#' @examples +#' demo <- suppressWarnings(get_demo_cluster()) +#' +#' # pca +#' X <- demo$pca +#' propr.res <- proportionality(X) +#' plot(propr.res) +#' +#' # pls +#' X <- demo$spls +#' propr.res <- proportionality(X) +#' plot(propr.res) +#' +#' # block.pls +#' X <- demo$block.spls +#' propr.res <- proportionality(X) +#' plot(propr.res) +#' +#' @importFrom dplyr mutate filter rename left_join +#' @importFrom magrittr %>% +#' @importFrom tibble rownames_to_column +#' @importFrom tidyr pivot_longer +#' +#' @export +proportionality <- function(X){ + #stopifnot(is(X, c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls"))) + # modif bioc 3.16 + stopifnot(any(class(X) %in% c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls"))) + # 1. get cluster + cluster.info <- getCluster(X) %>% + dplyr::select(molecule, cluster) + + # 2. extract data and add cluster + + # pca / spca + if(any(class(X) %in% c("pca", "spca"))){ + # unscaling + positive value + data <- unscale(X$X) %>% `+`(abs(min(.))) + } else if(any(class(X) %in% c("mixo_pls", "mixo_spls"))){ + # unscale X, unscale Y, cat + data.X <- unscale(X$X) %>% + `+`(abs(min(.))) + data.Y <- unscale(X$Y) %>% + `+`(abs(min(.))) + data <- cbind(data.X, data.Y) + } else if(any(class(X) %in% c("block.pls", "block.spls"))){ + # if(is.null(X$Y)){ ## no need: Y is passed to X + data <- lapply(X$X, function(x) x %>% + unscale %>% + `+`(abs(min(.)))) %>% + do.call(what="cbind") + # } else { + # data.X <- lapply(X$X, function(x) x %>% unscale %>% `+`(abs(min(.)))) %>% + # do.call(what="cbind") + # data.Y <- unscale(X$Y) %>% `+`(abs(min(.))) + # data <- cbind(data.X, data.Y) + # } + } + + # 3. compute phi_s + + # update 16 march 2023: propr deprecated; + # implementation of phi_s only based on Quinn paper + # data.propr <- suppressMessages(as.data.frame(propr::propr(data, metric = "phs")@matrix)) + + # gather, add cluster info, compute basic stats + data.propr <- get_phi_s(data) + data.propr.gather <- data.propr %>% + tibble::rownames_to_column("feature1") %>% + tidyr::pivot_longer(-feature1, names_to = "feature2", values_to = 'value') %>% + dplyr::filter(feature1 %in% cluster.info$molecule) %>% + dplyr::left_join(cluster.info, by = c("feature1"="molecule")) %>% + dplyr::rename("cluster1"="cluster") %>% + dplyr::filter(feature2 %in% cluster.info$molecule) %>% + dplyr::left_join(cluster.info, by = c("feature2"="molecule")) %>% + dplyr::rename("cluster2"="cluster") %>% + dplyr::mutate(insideout = ifelse(cluster1 == cluster2, "inside", "outside")) + + # compute stat (median, u-test pval, adj.pval) + res.stat <- stat_median(data.propr.gather) %>% + na.omit() + + res <- list() + res[["propr.distance"]] <- data.propr + res[["propr.distance.w.cluster"]] <- data.propr.gather + res[["pvalue"]] <- res.stat + class(res) <- "proportionality" + return(res) +} + +#' @importFrom dplyr mutate filter pull +#' @importFrom purrr set_names +#' @importFrom magrittr %>% +stat_median <- function(res.phs.X){ + i = 1 + res.pval <- matrix(ncol = 4, nrow = 4) %>% + as.data.frame() %>% + purrr::set_names("cluster", "median_inside", "median_outside", "Pvalue") + for(clu in unique(res.phs.X$cluster1)){ + inside <- res.phs.X %>% + filter(cluster1 == clu) %>% + dplyr::filter(cluster2==clu) %>% + dplyr::pull(value) + outside <- res.phs.X %>% + filter(cluster1 == clu) %>% + dplyr::filter(cluster2!=clu) %>% + dplyr::pull(value) + + #ttest.pval <- t.test(inside, outside)$p.value + utest.pval <- stats::wilcox.test(inside, outside)$p.value + + res.pval[i,] <- c(clu, round(median(inside), digits = 2), + round(median(outside), digits = 2), utest.pval) + i = i+1 + } + as.data.frame(res.pval) %>% + dplyr::mutate("Adj.Pvalue" = stats::p.adjust(Pvalue, method = "fdr")) %>% + na.omit + return(res.pval) +} + +#' @export +#' @import ggplot2 +#' @importFrom mixOmics color.mixo +plot.proportionality <- function(x, ...){ + ggplot2::ggplot(data = x$propr.distance.w.cluster, + aes(x=as.factor(cluster1), y=value, col=insideout)) + + geom_boxplot() + theme_bw() + + xlab("Cluster ID") + + ylab("Proportionality distance") + + labs(color = "Proportionality distance") + + scale_color_manual(values = mixOmics::color.mixo(1:2)) +} + +# based on the works of Thom Quinn (https://github.com/tpq/propr) +get_phi_s <- function(counts){ + ct <- counts + + if (any(as.matrix(counts) == 0)) { + #message("Alert: Replacing 0s with next smallest value.") + zeros <- ct == 0 + ct[zeros] <- min(ct[!zeros]) + } + #ivar = "clr" + #use <- propr::ivar2index(ct, ivar) + use <- 1:ncol(counts) + + + logX <- log(ct) + logSet <- logX[, use, drop = FALSE] + ref <- rowMeans(logSet) + lr <- sweep(logX, 1, ref, "-") + + mat <- as.data.frame(to_lr2phs(lr)) + colnames(mat) <- colnames(lr) + rownames(mat) <- colnames(lr) + return(mat) +} + + +to_lr2phs <- function(lr){ + # Calculate phs = var(a-b)/var(a+b) + nfeats <- ncol(lr); + mat <- matrix(nrow = nfeats, ncol = nfeats) + for(i in 1:nfeats){ + for(j in 1:nfeats){ + if(i == j){ + mat[i,j] <- 0 + } else { + a <- lr[,i]; b <- lr[,j] + mat[i,j] <- var(a-b)/var(a+b) + } + } + } + # return the same matrix as propr + return(mat) +} + \ No newline at end of file