[d79ff0]: / R / proprotionality.R

Download this file

198 lines (179 with data), 7.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#' Proportionality Distance
#'
#' \code{proportionality} is a wrapper that compute proportionality distance for
#' a clustering result (\code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls}, \code{block.spls}).
#' and it performs a u-test to compare the median within a cluster to the median of the entire background set.
#'
#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls}
#'
#' @return
#' Return a list containing the following components:
#' \item{propr.distance}{Square matrix with proportionality distance between pairs of features}
#' \item{propr.distance.w.cluster}{distance between pairs with cluster label}
#' \item{pvalue}{Wilcoxon U-test p-value comparing the medians within clusters and with the entire background set}
#'
#'
#' @references
#' Lovell, D., Pawlowsky-Glahn, V., Egozcue, J. J., Marguerat, S., Bähler, J. (2015). Proportionality: a valid alternative to correlation for relative data. PLoS Comput. Biol. 11, e1004075. doi: 10.1371/journal.pcbi.1004075
#'
#' Quinn, T. P., Richardson, M. F., Lovell, D., Crowley, T. M. (2017). propr: an r-package for identifying proportionally abundant features using compositional data analysis. Sci. Rep. 7, 16252. doi: 10.1038/s41598-017-16520-0
#'
#' @examples
#' demo <- suppressWarnings(get_demo_cluster())
#'
#' # pca
#' X <- demo$pca
#' propr.res <- proportionality(X)
#' plot(propr.res)
#'
#' # pls
#' X <- demo$spls
#' propr.res <- proportionality(X)
#' plot(propr.res)
#'
#' # block.pls
#' X <- demo$block.spls
#' propr.res <- proportionality(X)
#' plot(propr.res)
#'
#' @importFrom dplyr mutate filter rename left_join
#' @importFrom magrittr %>%
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr pivot_longer
#'
#' @export
proportionality <- function(X){
#stopifnot(is(X, c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
# modif bioc 3.16
stopifnot(any(class(X) %in% c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
# 1. get cluster
cluster.info <- getCluster(X) %>%
dplyr::select(molecule, cluster)
# 2. extract data and add cluster
# pca / spca
if(any(class(X) %in% c("pca", "spca"))){
# unscaling + positive value
data <- unscale(X$X) %>% `+`(abs(min(.)))
} else if(any(class(X) %in% c("mixo_pls", "mixo_spls"))){
# unscale X, unscale Y, cat
data.X <- unscale(X$X) %>%
`+`(abs(min(.)))
data.Y <- unscale(X$Y) %>%
`+`(abs(min(.)))
data <- cbind(data.X, data.Y)
} else if(any(class(X) %in% c("block.pls", "block.spls"))){
# if(is.null(X$Y)){ ## no need: Y is passed to X
data <- lapply(X$X, function(x) x %>%
unscale %>%
`+`(abs(min(.)))) %>%
do.call(what="cbind")
# } else {
# data.X <- lapply(X$X, function(x) x %>% unscale %>% `+`(abs(min(.)))) %>%
# do.call(what="cbind")
# data.Y <- unscale(X$Y) %>% `+`(abs(min(.)))
# data <- cbind(data.X, data.Y)
# }
}
# 3. compute phi_s
# update 16 march 2023: propr deprecated;
# implementation of phi_s only based on Quinn paper
# data.propr <- suppressMessages(as.data.frame(propr::propr(data, metric = "phs")@matrix))
# gather, add cluster info, compute basic stats
data.propr <- get_phi_s(data)
data.propr.gather <- data.propr %>%
tibble::rownames_to_column("feature1") %>%
tidyr::pivot_longer(-feature1, names_to = "feature2", values_to = 'value') %>%
dplyr::filter(feature1 %in% cluster.info$molecule) %>%
dplyr::left_join(cluster.info, by = c("feature1"="molecule")) %>%
dplyr::rename("cluster1"="cluster") %>%
dplyr::filter(feature2 %in% cluster.info$molecule) %>%
dplyr::left_join(cluster.info, by = c("feature2"="molecule")) %>%
dplyr::rename("cluster2"="cluster") %>%
dplyr::mutate(insideout = ifelse(cluster1 == cluster2, "inside", "outside"))
# compute stat (median, u-test pval, adj.pval)
res.stat <- stat_median(data.propr.gather) %>%
na.omit()
res <- list()
res[["propr.distance"]] <- data.propr
res[["propr.distance.w.cluster"]] <- data.propr.gather
res[["pvalue"]] <- res.stat
class(res) <- "proportionality"
return(res)
}
#' @importFrom dplyr mutate filter pull
#' @importFrom purrr set_names
#' @importFrom magrittr %>%
stat_median <- function(res.phs.X){
i = 1
res.pval <- matrix(ncol = 4, nrow = 4) %>%
as.data.frame() %>%
purrr::set_names("cluster", "median_inside", "median_outside", "Pvalue")
for(clu in unique(res.phs.X$cluster1)){
inside <- res.phs.X %>%
filter(cluster1 == clu) %>%
dplyr::filter(cluster2==clu) %>%
dplyr::pull(value)
outside <- res.phs.X %>%
filter(cluster1 == clu) %>%
dplyr::filter(cluster2!=clu) %>%
dplyr::pull(value)
#ttest.pval <- t.test(inside, outside)$p.value
utest.pval <- stats::wilcox.test(inside, outside)$p.value
res.pval[i,] <- c(clu, round(median(inside), digits = 2),
round(median(outside), digits = 2), utest.pval)
i = i+1
}
as.data.frame(res.pval) %>%
dplyr::mutate("Adj.Pvalue" = stats::p.adjust(Pvalue, method = "fdr")) %>%
na.omit
return(res.pval)
}
#' @export
#' @import ggplot2
#' @importFrom mixOmics color.mixo
plot.proportionality <- function(x, ...){
ggplot2::ggplot(data = x$propr.distance.w.cluster,
aes(x=as.factor(cluster1), y=value, col=insideout)) +
geom_boxplot() + theme_bw() +
xlab("Cluster ID") +
ylab("Proportionality distance") +
labs(color = "Proportionality distance") +
scale_color_manual(values = mixOmics::color.mixo(1:2))
}
# based on the works of Thom Quinn (https://github.com/tpq/propr)
get_phi_s <- function(counts){
ct <- counts
if (any(as.matrix(counts) == 0)) {
#message("Alert: Replacing 0s with next smallest value.")
zeros <- ct == 0
ct[zeros] <- min(ct[!zeros])
}
#ivar = "clr"
#use <- propr::ivar2index(ct, ivar)
use <- 1:ncol(counts)
logX <- log(ct)
logSet <- logX[, use, drop = FALSE]
ref <- rowMeans(logSet)
lr <- sweep(logX, 1, ref, "-")
mat <- as.data.frame(to_lr2phs(lr))
colnames(mat) <- colnames(lr)
rownames(mat) <- colnames(lr)
return(mat)
}
to_lr2phs <- function(lr){
# Calculate phs = var(a-b)/var(a+b)
nfeats <- ncol(lr);
mat <- matrix(nrow = nfeats, ncol = nfeats)
for(i in 1:nfeats){
for(j in 1:nfeats){
if(i == j){
mat[i,j] <- 0
} else {
a <- lr[,i]; b <- lr[,j]
mat[i,j] <- var(a-b)/var(a+b)
}
}
}
# return the same matrix as propr
return(mat)
}