a b/R/proprotionality.R
1
#' Proportionality Distance
2
#' 
3
#' \code{proportionality} is a wrapper that compute proportionality distance for 
4
#' a clustering result (\code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls}, \code{block.spls}).
5
#' and it performs a u-test to compare the median within a cluster to the median of the entire background set.
6
#' 
7
#' @param X an object of the class: \code{pca}, \code{spca}, \code{pls}, \code{spls}, \code{block.pls} or \code{block.spls}
8
#' 
9
#' @return 
10
#' Return a list containing the following components:
11
#'   \item{propr.distance}{Square matrix with proportionality distance between pairs of features}
12
#'   \item{propr.distance.w.cluster}{distance between pairs with cluster label}
13
#'   \item{pvalue}{Wilcoxon U-test p-value comparing the medians within clusters and with the entire background set}
14
#' 
15
#' 
16
#' @references 
17
#' Lovell, D., Pawlowsky-Glahn, V., Egozcue, J. J., Marguerat, S., Bähler, J. (2015). Proportionality: a valid alternative to correlation for relative data. PLoS Comput. Biol. 11, e1004075. doi: 10.1371/journal.pcbi.1004075
18
#' 
19
#' Quinn, T. P., Richardson, M. F., Lovell, D., Crowley, T. M. (2017). propr: an r-package for identifying proportionally abundant features using compositional data analysis. Sci. Rep. 7, 16252. doi: 10.1038/s41598-017-16520-0
20
#' 
21
#' @examples 
22
#' demo <- suppressWarnings(get_demo_cluster())
23
#' 
24
#' # pca
25
#' X <- demo$pca
26
#' propr.res <- proportionality(X)
27
#' plot(propr.res)
28
#' 
29
#' # pls
30
#' X <- demo$spls
31
#' propr.res <- proportionality(X)
32
#' plot(propr.res)
33
#' 
34
#' # block.pls
35
#' X <- demo$block.spls
36
#' propr.res <- proportionality(X)
37
#' plot(propr.res)
38
#' 
39
#' @importFrom dplyr mutate filter rename left_join
40
#' @importFrom magrittr %>%
41
#' @importFrom tibble rownames_to_column
42
#' @importFrom tidyr pivot_longer
43
#' 
44
#' @export
45
proportionality <- function(X){
46
    #stopifnot(is(X, c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
47
    # modif bioc 3.16
48
    stopifnot(any(class(X) %in% c("pca", "spca", "mixo_pls", "mixo_spls", "block.pls", "block.spls")))
49
    # 1. get cluster
50
    cluster.info <- getCluster(X) %>%
51
        dplyr::select(molecule, cluster)
52
    
53
    # 2. extract data and add cluster
54
    
55
    # pca / spca
56
    if(any(class(X) %in% c("pca", "spca"))){
57
        # unscaling + positive value
58
        data <- unscale(X$X) %>% `+`(abs(min(.))) 
59
    } else if(any(class(X) %in% c("mixo_pls", "mixo_spls"))){
60
        # unscale X, unscale Y, cat
61
        data.X <- unscale(X$X) %>%
62
            `+`(abs(min(.)))
63
        data.Y <- unscale(X$Y) %>%
64
            `+`(abs(min(.)))
65
        data <- cbind(data.X, data.Y)
66
    } else if(any(class(X) %in% c("block.pls", "block.spls"))){
67
        # if(is.null(X$Y)){  ## no need: Y is passed to X
68
        data <- lapply(X$X, function(x) x %>%
69
                           unscale %>%
70
                           `+`(abs(min(.)))) %>%
71
            do.call(what="cbind")
72
        # } else {
73
        #     data.X <- lapply(X$X, function(x) x %>% unscale %>% `+`(abs(min(.)))) %>%
74
        #         do.call(what="cbind")
75
        #     data.Y <- unscale(X$Y) %>% `+`(abs(min(.)))
76
        #     data <- cbind(data.X, data.Y)
77
        # }
78
    }
79
    
80
    # 3. compute phi_s
81
    
82
    # update 16 march 2023: propr deprecated;
83
    # implementation of phi_s only based on Quinn paper
84
    # data.propr <- suppressMessages(as.data.frame(propr::propr(data, metric = "phs")@matrix))
85
    
86
    # gather, add cluster info, compute basic stats
87
    data.propr <- get_phi_s(data)
88
    data.propr.gather <- data.propr %>%
89
        tibble::rownames_to_column("feature1") %>%
90
        tidyr::pivot_longer(-feature1, names_to = "feature2", values_to = 'value') %>%
91
        dplyr::filter(feature1 %in% cluster.info$molecule) %>%
92
        dplyr::left_join(cluster.info, by = c("feature1"="molecule")) %>%
93
        dplyr::rename("cluster1"="cluster") %>%
94
        dplyr::filter(feature2 %in% cluster.info$molecule) %>%
95
        dplyr::left_join(cluster.info, by = c("feature2"="molecule")) %>%
96
        dplyr::rename("cluster2"="cluster") %>%
97
        dplyr::mutate(insideout = ifelse(cluster1 == cluster2, "inside", "outside"))
98
        
99
    # compute stat (median, u-test pval, adj.pval)
100
    res.stat <- stat_median(data.propr.gather) %>%
101
        na.omit()
102
    
103
    res <- list()
104
    res[["propr.distance"]] <- data.propr
105
    res[["propr.distance.w.cluster"]] <- data.propr.gather
106
    res[["pvalue"]] <- res.stat
107
    class(res) <- "proportionality"
108
    return(res)
109
}
110
111
#' @importFrom dplyr mutate filter pull
112
#' @importFrom purrr set_names
113
#' @importFrom magrittr %>%
114
stat_median <- function(res.phs.X){
115
    i = 1
116
    res.pval <- matrix(ncol = 4, nrow = 4) %>%
117
        as.data.frame() %>%
118
        purrr::set_names("cluster", "median_inside", "median_outside", "Pvalue")
119
    for(clu in unique(res.phs.X$cluster1)){
120
        inside <- res.phs.X %>%
121
            filter(cluster1 == clu) %>% 
122
            dplyr::filter(cluster2==clu) %>% 
123
            dplyr::pull(value)
124
        outside <- res.phs.X %>% 
125
            filter(cluster1 == clu) %>% 
126
            dplyr::filter(cluster2!=clu) %>% 
127
            dplyr::pull(value)
128
        
129
        #ttest.pval <- t.test(inside, outside)$p.value
130
        utest.pval <- stats::wilcox.test(inside, outside)$p.value
131
        
132
        res.pval[i,] <- c(clu, round(median(inside), digits = 2),
133
                          round(median(outside), digits = 2), utest.pval)
134
        i = i+1
135
    }
136
    as.data.frame(res.pval) %>% 
137
        dplyr::mutate("Adj.Pvalue" = stats::p.adjust(Pvalue, method = "fdr")) %>%
138
        na.omit
139
    return(res.pval)
140
}
141
142
#' @export
143
#' @import ggplot2
144
#' @importFrom mixOmics color.mixo
145
plot.proportionality <- function(x, ...){
146
    ggplot2::ggplot(data = x$propr.distance.w.cluster, 
147
                    aes(x=as.factor(cluster1), y=value, col=insideout)) + 
148
        geom_boxplot() + theme_bw() + 
149
        xlab("Cluster ID") + 
150
        ylab("Proportionality distance") + 
151
        labs(color = "Proportionality distance") +
152
        scale_color_manual(values = mixOmics::color.mixo(1:2))
153
}
154
155
# based on the works of Thom Quinn (https://github.com/tpq/propr)
156
get_phi_s <- function(counts){
157
    ct <- counts
158
    
159
    if (any(as.matrix(counts) == 0)) {
160
        #message("Alert: Replacing 0s with next smallest value.")
161
        zeros <- ct == 0
162
        ct[zeros] <- min(ct[!zeros])
163
    }
164
    #ivar = "clr"
165
    #use <- propr::ivar2index(ct, ivar)
166
    use <- 1:ncol(counts)
167
    
168
    
169
    logX <- log(ct)
170
    logSet <- logX[, use, drop = FALSE]
171
    ref <- rowMeans(logSet)
172
    lr <- sweep(logX, 1, ref, "-")
173
174
    mat <- as.data.frame(to_lr2phs(lr))
175
    colnames(mat) <- colnames(lr)
176
    rownames(mat) <- colnames(lr)
177
    return(mat)
178
}
179
180
181
to_lr2phs <- function(lr){
182
    # Calculate phs = var(a-b)/var(a+b)
183
    nfeats <- ncol(lr);
184
    mat <- matrix(nrow = nfeats, ncol = nfeats)
185
    for(i in 1:nfeats){
186
        for(j in 1:nfeats){
187
            if(i == j){
188
                mat[i,j] <- 0
189
            } else {
190
                a <- lr[,i]; b <- lr[,j]
191
                mat[i,j] <- var(a-b)/var(a+b)
192
            }
193
        }
194
    }
195
    # return the same matrix as propr
196
    return(mat)
197
}
198