|
a |
|
b/R/1.calculate.R |
|
|
1 |
# =======1.calculate======== |
|
|
2 |
#' Calculate correlation for one or two t(otutab), or distance for one t(otutab). |
|
|
3 |
#' |
|
|
4 |
#' @param totu t(otutab), row are samples, column are features. |
|
|
5 |
#' @param totu2 t(otutab2) or NULL, row are samples, column are features. |
|
|
6 |
#' @param method "spearman" (default), "pearson", "sparcc", or distance index from \code{\link[vegan]{vegdist}}. |
|
|
7 |
#' @param filename the prefix of saved .corr file or FALSE. |
|
|
8 |
#' @param p.adjust.method see \code{\link[stats]{p.adjust}} |
|
|
9 |
#' @param p.adjust.mode see \code{\link{p.adjust.table}} |
|
|
10 |
#' @param threads threads, default: 1. |
|
|
11 |
#' @param verbose verbose, default: TRUE. |
|
|
12 |
#' |
|
|
13 |
#' @return a corr object with 3 elements: |
|
|
14 |
#' \item{r}{default: spearman correlation} |
|
|
15 |
#' \item{p.value}{default: p-value of spearman correlation} |
|
|
16 |
#' \item{p.adjust}{default p.adjust.method = NULL} |
|
|
17 |
#' @family calculate |
|
|
18 |
#' @export |
|
|
19 |
#' @aliases c_net_cal |
|
|
20 |
#' @examples |
|
|
21 |
#' data("otutab", package = "pcutils") |
|
|
22 |
#' t(otutab) -> totu |
|
|
23 |
#' c_net_calculate(totu) -> corr |
|
|
24 |
#' metadata[, 3:10] -> env |
|
|
25 |
#' c_net_calculate(totu, env) -> corr2 |
|
|
26 |
c_net_calculate <- function(totu, totu2 = NULL, method = "spearman", filename = FALSE, |
|
|
27 |
p.adjust.method = NULL, p.adjust.mode = "all", threads = 1, verbose = TRUE) { |
|
|
28 |
if (!is.null(totu2)) { |
|
|
29 |
tls <- check_tabs(totu, totu2) |
|
|
30 |
totu <- tls[[1]] |
|
|
31 |
totu2 <- tls[[2]] |
|
|
32 |
} else { |
|
|
33 |
totu <- check_tabs(totu) |
|
|
34 |
} |
|
|
35 |
|
|
|
36 |
if (method %in% c("spearman", "pearson")) { |
|
|
37 |
corr <- fast_cor(totu, totu2, method = method) |
|
|
38 |
} else if (method == "sparcc") { |
|
|
39 |
if (!is.null(totu2)) warning("sparcc only take the totu, ignore the totu2.") |
|
|
40 |
message("sparcc is not supported on CRAN version. Please install `zdk123/SpiecEasi` from github and use the function `sparcc`.") |
|
|
41 |
# corr <- par_sparcc(totu, threads = threads, verbose = verbose) |
|
|
42 |
corr <- NULL |
|
|
43 |
} else if (method %in% c( |
|
|
44 |
"manhattan", "euclidean", "canberra", "bray", |
|
|
45 |
"kulczynski", "gower", "morisita", "horn", "mountford", |
|
|
46 |
"jaccard", "raup", "binomial", "chao", "altGower", "cao", |
|
|
47 |
"mahalanobis", "clark", "chisq", "chord", "hellinger", |
|
|
48 |
"aitchison", "robust.aitchison" |
|
|
49 |
)) { |
|
|
50 |
corr <- cal_sim(totu, totu2, method = method) |
|
|
51 |
} else { |
|
|
52 |
stop("method should be one of 'spearman', 'pearson', 'sparcc', or distance index from vegan.") |
|
|
53 |
} |
|
|
54 |
|
|
|
55 |
if (!is.null(p.adjust.method)) { |
|
|
56 |
p.adjust <- p.adjust.table(corr$p.value, p.adjust.method) |
|
|
57 |
res <- list(r = corr$r, p.value = corr$p.value, p.adjust = p.adjust) |
|
|
58 |
} else { |
|
|
59 |
res <- list(r = corr$r, p.value = corr$p.value) |
|
|
60 |
} |
|
|
61 |
|
|
|
62 |
class(res) <- "corr" |
|
|
63 |
attributes(res)$method <- method |
|
|
64 |
|
|
|
65 |
# save the correlation result |
|
|
66 |
if (is.logical(filename)) { |
|
|
67 |
if (filename) filename <- paste0("c_net_", date()) |
|
|
68 |
} |
|
|
69 |
if (is.character(filename)) { |
|
|
70 |
save_corr(res, filename) |
|
|
71 |
} |
|
|
72 |
return(res) |
|
|
73 |
} |
|
|
74 |
|
|
|
75 |
#' Check tables and extract common samples |
|
|
76 |
#' |
|
|
77 |
#' @param ... tables |
|
|
78 |
#' @return formatted tables |
|
|
79 |
#' @export |
|
|
80 |
#' @examples |
|
|
81 |
#' data("otutab", package = "pcutils") |
|
|
82 |
#' check_tabs(otutab) |
|
|
83 |
check_tabs <- function(...) { |
|
|
84 |
tables <- list(...) |
|
|
85 |
if (all(class(tables[[1]]) == "list")) tables <- tables[[1]] |
|
|
86 |
|
|
|
87 |
names(tables) <- NULL |
|
|
88 |
|
|
|
89 |
if (length(tables) > 1) { |
|
|
90 |
comm <- Reduce(intersect, lapply(tables, rownames)) |
|
|
91 |
if (length(comm) < 2) stop("There are ", length(comm), " common sample! Can not calculate correlation.") |
|
|
92 |
if (all(lapply(tables, \(i)identical(rownames(i), comm)) %>% unlist())) { |
|
|
93 |
message("All samples matched.") |
|
|
94 |
} else { |
|
|
95 |
message("Extract ", length(comm), " commmon samples.") |
|
|
96 |
} |
|
|
97 |
dup <- lapply(tables, colnames) %>% do.call(c, .) |
|
|
98 |
dup <- dup[duplicated(dup)] |
|
|
99 |
if (length(dup) > 0) { |
|
|
100 |
stop("Duplicated colnames found: ", paste0(dup, collapse = ", "), "\nPlease check colnames of input tables.") |
|
|
101 |
} else { |
|
|
102 |
message("All features are OK.") |
|
|
103 |
} |
|
|
104 |
tables <- lapply(tables, \(i)i[comm, ]) |
|
|
105 |
} |
|
|
106 |
|
|
|
107 |
if (length(tables) == 1) tables <- tables[[1]] |
|
|
108 |
return(tables) |
|
|
109 |
} |
|
|
110 |
|
|
|
111 |
|
|
|
112 |
#' Save a corr object |
|
|
113 |
#' |
|
|
114 |
#' @param corr a corr object |
|
|
115 |
#' @param filename filename without extension, default: "corr" |
|
|
116 |
#' |
|
|
117 |
#' @return a .corr file |
|
|
118 |
#' @export |
|
|
119 |
#' |
|
|
120 |
save_corr <- function(corr, filename = "corr") { |
|
|
121 |
stopifnot(inherits(corr, "corr")) |
|
|
122 |
if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr") |
|
|
123 |
if (t_flag(corr$r)) { |
|
|
124 |
# 节约一半的储存空间 |
|
|
125 |
corr$r[upper.tri(corr$r)] -> r |
|
|
126 |
corr$p.value[upper.tri(corr$p.value)] -> p.value |
|
|
127 |
if (!is.null(corr$p.adjust)) { |
|
|
128 |
corr$p.adjust[upper.tri(corr$p.adjust)] -> p.adj |
|
|
129 |
if (all(p.value == p.adj)) { |
|
|
130 |
p.adj <- FALSE |
|
|
131 |
} |
|
|
132 |
} else { |
|
|
133 |
p.adj <- NULL |
|
|
134 |
} |
|
|
135 |
|
|
|
136 |
corr_names <- rownames(corr$r) |
|
|
137 |
saveRDS( |
|
|
138 |
list( |
|
|
139 |
r = r, p.value = p.value, p.adjust = p.adj, |
|
|
140 |
corr_names = corr_names, corr_attr = attributes(corr) |
|
|
141 |
), |
|
|
142 |
file = filename |
|
|
143 |
) |
|
|
144 |
} else { |
|
|
145 |
saveRDS(corr, file = filename) |
|
|
146 |
} |
|
|
147 |
} |
|
|
148 |
|
|
|
149 |
#' Read a corr object |
|
|
150 |
#' |
|
|
151 |
#' @param filename filename of .corr |
|
|
152 |
#' |
|
|
153 |
#' @return a corr object |
|
|
154 |
#' @export |
|
|
155 |
#' @family calculate |
|
|
156 |
read_corr <- function(filename) { |
|
|
157 |
# r <- read.csv(paste0(filename, "_r.csv"), row.names = 1, check.names = FALSE) |
|
|
158 |
# p.value <- read.csv(paste0(filename, "_p.csv"), row.names = 1, check.names = FALSE) |
|
|
159 |
# p.adjust <- read.csv(paste0(filename, "_p_adj.csv"), row.names = 1, check.names = FALSE) |
|
|
160 |
if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr") |
|
|
161 |
in_corr <- readRDS(filename) |
|
|
162 |
if ("corr_names" %in% names(in_corr)) { |
|
|
163 |
corr_names <- in_corr$corr_names |
|
|
164 |
r <- in_corr$r |
|
|
165 |
p.value <- in_corr$p.value |
|
|
166 |
p.adj <- in_corr$p.adj |
|
|
167 |
corr_attr <- in_corr$corr_attr |
|
|
168 |
|
|
|
169 |
new_p.adjust <- new_p <- new_r <- matrix(0, nrow = length(corr_names), ncol = length(corr_names), dimnames = list(corr_names, corr_names)) |
|
|
170 |
new_r[upper.tri(new_r)] <- r |
|
|
171 |
new_r <- new_r + t(new_r) |
|
|
172 |
diag(new_r) <- 1 |
|
|
173 |
new_p[upper.tri(new_p)] <- p.value |
|
|
174 |
new_p <- new_p + t(new_p) |
|
|
175 |
if (is.null(p.adj)) { |
|
|
176 |
new_p.adjust <- NULL |
|
|
177 |
} else if (is.logical(p.adj)) { |
|
|
178 |
new_p.adjust <- new_p |
|
|
179 |
} else { |
|
|
180 |
new_p.adjust[upper.tri(new_p.adjust)] <- p.adj |
|
|
181 |
new_p.adjust <- new_p.adjust + t(new_p.adjust) |
|
|
182 |
diag(new_p.adjust) <- 1 |
|
|
183 |
} |
|
|
184 |
in_corr <- list() |
|
|
185 |
in_corr$r <- new_r |
|
|
186 |
in_corr$p.value <- new_p |
|
|
187 |
if (!is.null(new_p.adjust)) in_corr$p.adjust <- new_p.adjust |
|
|
188 |
attributes(in_corr) <- corr_attr |
|
|
189 |
} |
|
|
190 |
return(in_corr) |
|
|
191 |
} |
|
|
192 |
|
|
|
193 |
|
|
|
194 |
#' Fast correlation calculation |
|
|
195 |
#' |
|
|
196 |
#' @param totu t(otutab), row are samples, column are features. |
|
|
197 |
#' @param totu2 t(otutab) or NULL, row are samples, column are features. |
|
|
198 |
#' @param method "spearman" or "pearson" |
|
|
199 |
#' |
|
|
200 |
#' @export |
|
|
201 |
#' |
|
|
202 |
#' @return a list with 2 elements: |
|
|
203 |
#' \item{r}{default: spearman correlation} |
|
|
204 |
#' \item{p.value}{default: p-value of spearman correlation} |
|
|
205 |
#' @family calculate |
|
|
206 |
#' @examples |
|
|
207 |
#' data("otutab", package = "pcutils") |
|
|
208 |
#' t(otutab[1:100, ]) -> totu |
|
|
209 |
#' fast_cor(totu, method = "spearman") -> corr |
|
|
210 |
fast_cor <- function(totu, totu2 = NULL, method = c("pearson", "spearman")) { |
|
|
211 |
method <- match.arg(method, c("pearson", "spearman")) |
|
|
212 |
totu <- as.matrix(totu) |
|
|
213 |
if (!is.null(totu2)) { |
|
|
214 |
totu2 <- as.matrix(totu2) |
|
|
215 |
r <- stats::cor(totu, totu2, method = method) |
|
|
216 |
df <- dim(totu)[1] - 2 |
|
|
217 |
t <- r * sqrt(df / (1 - r^2)) |
|
|
218 |
p <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE)) |
|
|
219 |
} else { |
|
|
220 |
r <- stats::cor(totu, method = method) |
|
|
221 |
p <- r |
|
|
222 |
p[p != 0] <- 0 |
|
|
223 |
r_tri <- r[upper.tri(r)] |
|
|
224 |
df <- dim(totu)[1] - 2 |
|
|
225 |
t <- r_tri * sqrt(df / (1 - r_tri^2)) |
|
|
226 |
p[upper.tri(p)] <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE)) |
|
|
227 |
p <- p + t(p) |
|
|
228 |
} |
|
|
229 |
return(list(r = r, p.value = p)) |
|
|
230 |
} |
|
|
231 |
|
|
|
232 |
Hmisc_cor <- function(totu, totu2 = NULL, method = c("spearman", "pearson")[1]) { |
|
|
233 |
lib_ps("Hmisc", library = FALSE) |
|
|
234 |
totu <- as.matrix(totu) |
|
|
235 |
if (!is.null(totu2)) { |
|
|
236 |
totu2 <- as.matrix(totu2) |
|
|
237 |
tmp <- Hmisc::rcorr(totu, totu2, type = method) |
|
|
238 |
r <- tmp$r[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)] |
|
|
239 |
p <- tmp$P[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)] |
|
|
240 |
return(list(r = r, p.value = p)) |
|
|
241 |
} |
|
|
242 |
tmp <- Hmisc::rcorr(totu, type = method) |
|
|
243 |
p <- tmp$P |
|
|
244 |
p[is.na(p)] <- 0 |
|
|
245 |
return(list(r = tmp$r, p.value = p)) |
|
|
246 |
} |
|
|
247 |
|
|
|
248 |
#' Calculate similarity for one t(otutab) |
|
|
249 |
#' |
|
|
250 |
#' @param totu t(otutab), row are samples, column are features. |
|
|
251 |
#' @param method Dissimilarity index, see \code{\link[vegan]{vegdist}}. |
|
|
252 |
#' @param totu2 t(otutab) or NULL, row are samples, column are features. |
|
|
253 |
#' |
|
|
254 |
#' @family calculate |
|
|
255 |
#' @return similarity = 1-distance |
|
|
256 |
#' @export |
|
|
257 |
#' @seealso \code{\link[vegan]{vegdist}} |
|
|
258 |
#' @examples |
|
|
259 |
#' if (requireNamespace("vegan")) { |
|
|
260 |
#' data("otutab", package = "pcutils") |
|
|
261 |
#' t(otutab) -> totu |
|
|
262 |
#' cal_sim(totu) -> sim_corr |
|
|
263 |
#' } |
|
|
264 |
cal_sim <- function(totu, totu2 = NULL, method = "bray") { |
|
|
265 |
lib_ps("vegan", library = FALSE) |
|
|
266 |
if (is.null(totu2)) { |
|
|
267 |
vegan::vegdist(t(totu), method = method) %>% as.matrix() -> dist |
|
|
268 |
} else { |
|
|
269 |
n1 <- ncol(totu) |
|
|
270 |
n2 <- ncol(totu2) |
|
|
271 |
dist <- matrix(NA, nrow = n1, ncol = n2, dimnames = list(colnames(totu), colnames(totu2))) |
|
|
272 |
for (i in seq_len(n1)) { |
|
|
273 |
for (j in seq_len(n2)) { |
|
|
274 |
dist[i, j] <- vegan::vegdist(rbind(totu[, i], totu2[, j]), method = method) |
|
|
275 |
} |
|
|
276 |
} |
|
|
277 |
} |
|
|
278 |
|
|
|
279 |
sim <- 1 - dist |
|
|
280 |
p <- dist |
|
|
281 |
message("p-value is not supported for distance index, all set as 0.") |
|
|
282 |
p[p != 0] <- 0 |
|
|
283 |
|
|
|
284 |
return(list(r = sim, p.value = p)) |
|
|
285 |
} |
|
|
286 |
|
|
|
287 |
cal_KLD <- function(totu, totu2 = NULL, method = "KLD") { |
|
|
288 |
# Kullback-Leibler divergence |
|
|
289 |
# KLD = function(p, q) { |
|
|
290 |
# sum(p * log(p / q)) |
|
|
291 |
# } |
|
|
292 |
# p = c(0.1, 0.2, 0.3, 0.4) |
|
|
293 |
# q = c(0.2, 0.3, 0.2, 0.3) |
|
|
294 |
# KLD(p, q) |
|
|
295 |
lib_ps("philentropy", library = FALSE) |
|
|
296 |
dat <- t(totu) / rowSums(t(totu)) |
|
|
297 |
philentropy::KL(dat, unit = "log") |
|
|
298 |
} |
|
|
299 |
|
|
|
300 |
#' p.adjust apply on a correlation table (matrix or data.frame) |
|
|
301 |
#' |
|
|
302 |
#' @param pp table of p-values |
|
|
303 |
#' @param method see \code{\link[stats]{p.adjust}}, default: "BH". |
|
|
304 |
#' @param mode "all" for all values; "rows" adjust each row one by one; "columns" adjust each column one by one. Default: "all". |
|
|
305 |
#' |
|
|
306 |
#' @return a table of adjusted p-values |
|
|
307 |
#' @export |
|
|
308 |
#' |
|
|
309 |
#' @family calculate |
|
|
310 |
#' @examples |
|
|
311 |
#' matrix(abs(rnorm(100, 0.01, 0.1)), 10, 10) -> pp |
|
|
312 |
#' p.adjust.table(pp, method = "BH", mode = "all") -> pp_adj |
|
|
313 |
p.adjust.table <- \(pp, method = "BH", mode = "all"){ |
|
|
314 |
mode <- match.arg(mode, c("all", "rows", "columns")) |
|
|
315 |
pp <- as.matrix(pp) |
|
|
316 |
if (mode == "all") { |
|
|
317 |
if (t_flag(pp)) { |
|
|
318 |
lp <- lower.tri(pp) |
|
|
319 |
pa <- pp[lp] |
|
|
320 |
pa <- p.adjust(pa, method) |
|
|
321 |
pp[lower.tri(pp, diag = FALSE)] <- pa |
|
|
322 |
pp[upper.tri(pp, diag = FALSE)] <- 0 |
|
|
323 |
pp <- pp + t(pp) |
|
|
324 |
} else { |
|
|
325 |
pp1 <- p.adjust(pp, method) |
|
|
326 |
pp <- matrix(pp1, nrow(pp), ncol(pp), dimnames = list(rownames(pp), colnames(pp))) |
|
|
327 |
} |
|
|
328 |
} else if (mode == "rows") { |
|
|
329 |
pp <- t(apply(pp, 1, p.adjust, method = method)) |
|
|
330 |
} else if (mode == "columns") { |
|
|
331 |
pp <- apply(t(pp), 1, p.adjust, method = method) |
|
|
332 |
} |
|
|
333 |
return(pp) |
|
|
334 |
} |