|
a |
|
b/R/proprotionality.R |
|
|
1 |
#' Proportionality Distance |
|
|
2 |
#' |
|
|
3 |
#' \code{proportionality} is a wrapper that compute proportionality distance for |
|
|
4 |
#' a clustering result (\code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls}, \code{block.spls}). |
|
|
5 |
#' and it performs a u-test to compare the median within a cluster to the median of the entire background set. |
|
|
6 |
#' |
|
|
7 |
#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls} |
|
|
8 |
#' |
|
|
9 |
#' @return |
|
|
10 |
#' Return a list containing the following components: |
|
|
11 |
#' \item{propr.distance}{Square matrix with proportionality distance between pairs of features} |
|
|
12 |
#' \item{propr.distance.w.cluster}{distance between pairs with cluster label} |
|
|
13 |
#' \item{pvalue}{Wilcoxon U-test p-value comparing the medians within clusters and with the entire background set} |
|
|
14 |
#' |
|
|
15 |
#' |
|
|
16 |
#' @references |
|
|
17 |
#' Lovell, D., Pawlowsky-Glahn, V., Egozcue, J. J., Marguerat, S., Bähler, J. (2015). Proportionality: a valid alternative to correlation for relative data. PLoS Comput. Biol. 11, e1004075. doi: 10.1371/journal.pcbi.1004075 |
|
|
18 |
#' |
|
|
19 |
#' Quinn, T. P., Richardson, M. F., Lovell, D., Crowley, T. M. (2017). propr: an r-package for identifying proportionally abundant features using compositional data analysis. Sci. Rep. 7, 16252. doi: 10.1038/s41598-017-16520-0 |
|
|
20 |
#' |
|
|
21 |
#' @examples |
|
|
22 |
#' demo <- suppressWarnings(get_demo_cluster()) |
|
|
23 |
#' |
|
|
24 |
#' # pca |
|
|
25 |
#' X <- demo$pca |
|
|
26 |
#' propr.res <- proportionality(X) |
|
|
27 |
#' plot(propr.res) |
|
|
28 |
#' |
|
|
29 |
#' # pls |
|
|
30 |
#' X <- demo$spls |
|
|
31 |
#' propr.res <- proportionality(X) |
|
|
32 |
#' plot(propr.res) |
|
|
33 |
#' |
|
|
34 |
#' # block.pls |
|
|
35 |
#' X <- demo$block.spls |
|
|
36 |
#' propr.res <- proportionality(X) |
|
|
37 |
#' plot(propr.res) |
|
|
38 |
#' |
|
|
39 |
#' @importFrom dplyr mutate filter rename left_join |
|
|
40 |
#' @importFrom magrittr %>% |
|
|
41 |
#' @importFrom tibble rownames_to_column |
|
|
42 |
#' @importFrom tidyr pivot_longer |
|
|
43 |
#' |
|
|
44 |
#' @export |
|
|
45 |
proportionality <- function(X){ |
|
|
46 |
#stopifnot(is(X, c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls"))) |
|
|
47 |
# modif bioc 3.16 |
|
|
48 |
stopifnot(any(class(X) %in% c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls"))) |
|
|
49 |
# 1. get cluster |
|
|
50 |
cluster.info <- getCluster(X) %>% |
|
|
51 |
dplyr::select(molecule, cluster) |
|
|
52 |
|
|
|
53 |
# 2. extract data and add cluster |
|
|
54 |
|
|
|
55 |
# pca / spca |
|
|
56 |
if(any(class(X) %in% c("pca", "spca"))){ |
|
|
57 |
# unscaling + positive value |
|
|
58 |
data <- unscale(X$X) %>% `+`(abs(min(.))) |
|
|
59 |
} else if(any(class(X) %in% c("mixo_pls", "mixo_spls"))){ |
|
|
60 |
# unscale X, unscale Y, cat |
|
|
61 |
data.X <- unscale(X$X) %>% |
|
|
62 |
`+`(abs(min(.))) |
|
|
63 |
data.Y <- unscale(X$Y) %>% |
|
|
64 |
`+`(abs(min(.))) |
|
|
65 |
data <- cbind(data.X, data.Y) |
|
|
66 |
} else if(any(class(X) %in% c("block.pls", "block.spls"))){ |
|
|
67 |
# if(is.null(X$Y)){ ## no need: Y is passed to X |
|
|
68 |
data <- lapply(X$X, function(x) x %>% |
|
|
69 |
unscale %>% |
|
|
70 |
`+`(abs(min(.)))) %>% |
|
|
71 |
do.call(what="cbind") |
|
|
72 |
# } else { |
|
|
73 |
# data.X <- lapply(X$X, function(x) x %>% unscale %>% `+`(abs(min(.)))) %>% |
|
|
74 |
# do.call(what="cbind") |
|
|
75 |
# data.Y <- unscale(X$Y) %>% `+`(abs(min(.))) |
|
|
76 |
# data <- cbind(data.X, data.Y) |
|
|
77 |
# } |
|
|
78 |
} |
|
|
79 |
|
|
|
80 |
# 3. compute phi_s |
|
|
81 |
|
|
|
82 |
# update 16 march 2023: propr deprecated; |
|
|
83 |
# implementation of phi_s only based on Quinn paper |
|
|
84 |
# data.propr <- suppressMessages(as.data.frame(propr::propr(data, metric = "phs")@matrix)) |
|
|
85 |
|
|
|
86 |
# gather, add cluster info, compute basic stats |
|
|
87 |
data.propr <- get_phi_s(data) |
|
|
88 |
data.propr.gather <- data.propr %>% |
|
|
89 |
tibble::rownames_to_column("feature1") %>% |
|
|
90 |
tidyr::pivot_longer(-feature1, names_to = "feature2", values_to = 'value') %>% |
|
|
91 |
dplyr::filter(feature1 %in% cluster.info$molecule) %>% |
|
|
92 |
dplyr::left_join(cluster.info, by = c("feature1"="molecule")) %>% |
|
|
93 |
dplyr::rename("cluster1"="cluster") %>% |
|
|
94 |
dplyr::filter(feature2 %in% cluster.info$molecule) %>% |
|
|
95 |
dplyr::left_join(cluster.info, by = c("feature2"="molecule")) %>% |
|
|
96 |
dplyr::rename("cluster2"="cluster") %>% |
|
|
97 |
dplyr::mutate(insideout = ifelse(cluster1 == cluster2, "inside", "outside")) |
|
|
98 |
|
|
|
99 |
# compute stat (median, u-test pval, adj.pval) |
|
|
100 |
res.stat <- stat_median(data.propr.gather) %>% |
|
|
101 |
na.omit() |
|
|
102 |
|
|
|
103 |
res <- list() |
|
|
104 |
res[["propr.distance"]] <- data.propr |
|
|
105 |
res[["propr.distance.w.cluster"]] <- data.propr.gather |
|
|
106 |
res[["pvalue"]] <- res.stat |
|
|
107 |
class(res) <- "proportionality" |
|
|
108 |
return(res) |
|
|
109 |
} |
|
|
110 |
|
|
|
111 |
#' @importFrom dplyr mutate filter pull |
|
|
112 |
#' @importFrom purrr set_names |
|
|
113 |
#' @importFrom magrittr %>% |
|
|
114 |
stat_median <- function(res.phs.X){ |
|
|
115 |
i = 1 |
|
|
116 |
res.pval <- matrix(ncol = 4, nrow = 4) %>% |
|
|
117 |
as.data.frame() %>% |
|
|
118 |
purrr::set_names("cluster", "median_inside", "median_outside", "Pvalue") |
|
|
119 |
for(clu in unique(res.phs.X$cluster1)){ |
|
|
120 |
inside <- res.phs.X %>% |
|
|
121 |
filter(cluster1 == clu) %>% |
|
|
122 |
dplyr::filter(cluster2==clu) %>% |
|
|
123 |
dplyr::pull(value) |
|
|
124 |
outside <- res.phs.X %>% |
|
|
125 |
filter(cluster1 == clu) %>% |
|
|
126 |
dplyr::filter(cluster2!=clu) %>% |
|
|
127 |
dplyr::pull(value) |
|
|
128 |
|
|
|
129 |
#ttest.pval <- t.test(inside, outside)$p.value |
|
|
130 |
utest.pval <- stats::wilcox.test(inside, outside)$p.value |
|
|
131 |
|
|
|
132 |
res.pval[i,] <- c(clu, round(median(inside), digits = 2), |
|
|
133 |
round(median(outside), digits = 2), utest.pval) |
|
|
134 |
i = i+1 |
|
|
135 |
} |
|
|
136 |
as.data.frame(res.pval) %>% |
|
|
137 |
dplyr::mutate("Adj.Pvalue" = stats::p.adjust(Pvalue, method = "fdr")) %>% |
|
|
138 |
na.omit |
|
|
139 |
return(res.pval) |
|
|
140 |
} |
|
|
141 |
|
|
|
142 |
#' @export |
|
|
143 |
#' @import ggplot2 |
|
|
144 |
#' @importFrom mixOmics color.mixo |
|
|
145 |
plot.proportionality <- function(x, ...){ |
|
|
146 |
ggplot2::ggplot(data = x$propr.distance.w.cluster, |
|
|
147 |
aes(x=as.factor(cluster1), y=value, col=insideout)) + |
|
|
148 |
geom_boxplot() + theme_bw() + |
|
|
149 |
xlab("Cluster ID") + |
|
|
150 |
ylab("Proportionality distance") + |
|
|
151 |
labs(color = "Proportionality distance") + |
|
|
152 |
scale_color_manual(values = mixOmics::color.mixo(1:2)) |
|
|
153 |
} |
|
|
154 |
|
|
|
155 |
# based on the works of Thom Quinn (https://github.com/tpq/propr) |
|
|
156 |
get_phi_s <- function(counts){ |
|
|
157 |
ct <- counts |
|
|
158 |
|
|
|
159 |
if (any(as.matrix(counts) == 0)) { |
|
|
160 |
#message("Alert: Replacing 0s with next smallest value.") |
|
|
161 |
zeros <- ct == 0 |
|
|
162 |
ct[zeros] <- min(ct[!zeros]) |
|
|
163 |
} |
|
|
164 |
#ivar = "clr" |
|
|
165 |
#use <- propr::ivar2index(ct, ivar) |
|
|
166 |
use <- 1:ncol(counts) |
|
|
167 |
|
|
|
168 |
|
|
|
169 |
logX <- log(ct) |
|
|
170 |
logSet <- logX[, use, drop = FALSE] |
|
|
171 |
ref <- rowMeans(logSet) |
|
|
172 |
lr <- sweep(logX, 1, ref, "-") |
|
|
173 |
|
|
|
174 |
mat <- as.data.frame(to_lr2phs(lr)) |
|
|
175 |
colnames(mat) <- colnames(lr) |
|
|
176 |
rownames(mat) <- colnames(lr) |
|
|
177 |
return(mat) |
|
|
178 |
} |
|
|
179 |
|
|
|
180 |
|
|
|
181 |
to_lr2phs <- function(lr){ |
|
|
182 |
# Calculate phs = var(a-b)/var(a+b) |
|
|
183 |
nfeats <- ncol(lr); |
|
|
184 |
mat <- matrix(nrow = nfeats, ncol = nfeats) |
|
|
185 |
for(i in 1:nfeats){ |
|
|
186 |
for(j in 1:nfeats){ |
|
|
187 |
if(i == j){ |
|
|
188 |
mat[i,j] <- 0 |
|
|
189 |
} else { |
|
|
190 |
a <- lr[,i]; b <- lr[,j] |
|
|
191 |
mat[i,j] <- var(a-b)/var(a+b) |
|
|
192 |
} |
|
|
193 |
} |
|
|
194 |
} |
|
|
195 |
# return the same matrix as propr |
|
|
196 |
return(mat) |
|
|
197 |
} |
|
|
198 |
|