--- a +++ b/R/1.calculate.R @@ -0,0 +1,334 @@ +# =======1.calculate======== +#' Calculate correlation for one or two t(otutab), or distance for one t(otutab). +#' +#' @param totu t(otutab), row are samples, column are features. +#' @param totu2 t(otutab2) or NULL, row are samples, column are features. +#' @param method "spearman" (default), "pearson", "sparcc", or distance index from \code{\link[vegan]{vegdist}}. +#' @param filename the prefix of saved .corr file or FALSE. +#' @param p.adjust.method see \code{\link[stats]{p.adjust}} +#' @param p.adjust.mode see \code{\link{p.adjust.table}} +#' @param threads threads, default: 1. +#' @param verbose verbose, default: TRUE. +#' +#' @return a corr object with 3 elements: +#' \item{r}{default: spearman correlation} +#' \item{p.value}{default: p-value of spearman correlation} +#' \item{p.adjust}{default p.adjust.method = NULL} +#' @family calculate +#' @export +#' @aliases c_net_cal +#' @examples +#' data("otutab", package = "pcutils") +#' t(otutab) -> totu +#' c_net_calculate(totu) -> corr +#' metadata[, 3:10] -> env +#' c_net_calculate(totu, env) -> corr2 +c_net_calculate <- function(totu, totu2 = NULL, method = "spearman", filename = FALSE, + p.adjust.method = NULL, p.adjust.mode = "all", threads = 1, verbose = TRUE) { + if (!is.null(totu2)) { + tls <- check_tabs(totu, totu2) + totu <- tls[[1]] + totu2 <- tls[[2]] + } else { + totu <- check_tabs(totu) + } + + if (method %in% c("spearman", "pearson")) { + corr <- fast_cor(totu, totu2, method = method) + } else if (method == "sparcc") { + if (!is.null(totu2)) warning("sparcc only take the totu, ignore the totu2.") + message("sparcc is not supported on CRAN version. Please install `zdk123/SpiecEasi` from github and use the function `sparcc`.") + # corr <- par_sparcc(totu, threads = threads, verbose = verbose) + corr <- NULL + } else if (method %in% c( + "manhattan", "euclidean", "canberra", "bray", + "kulczynski", "gower", "morisita", "horn", "mountford", + "jaccard", "raup", "binomial", "chao", "altGower", "cao", + "mahalanobis", "clark", "chisq", "chord", "hellinger", + "aitchison", "robust.aitchison" + )) { + corr <- cal_sim(totu, totu2, method = method) + } else { + stop("method should be one of 'spearman', 'pearson', 'sparcc', or distance index from vegan.") + } + + if (!is.null(p.adjust.method)) { + p.adjust <- p.adjust.table(corr$p.value, p.adjust.method) + res <- list(r = corr$r, p.value = corr$p.value, p.adjust = p.adjust) + } else { + res <- list(r = corr$r, p.value = corr$p.value) + } + + class(res) <- "corr" + attributes(res)$method <- method + + # save the correlation result + if (is.logical(filename)) { + if (filename) filename <- paste0("c_net_", date()) + } + if (is.character(filename)) { + save_corr(res, filename) + } + return(res) +} + +#' Check tables and extract common samples +#' +#' @param ... tables +#' @return formatted tables +#' @export +#' @examples +#' data("otutab", package = "pcutils") +#' check_tabs(otutab) +check_tabs <- function(...) { + tables <- list(...) + if (all(class(tables[[1]]) == "list")) tables <- tables[[1]] + + names(tables) <- NULL + + if (length(tables) > 1) { + comm <- Reduce(intersect, lapply(tables, rownames)) + if (length(comm) < 2) stop("There are ", length(comm), " common sample! Can not calculate correlation.") + if (all(lapply(tables, \(i)identical(rownames(i), comm)) %>% unlist())) { + message("All samples matched.") + } else { + message("Extract ", length(comm), " commmon samples.") + } + dup <- lapply(tables, colnames) %>% do.call(c, .) + dup <- dup[duplicated(dup)] + if (length(dup) > 0) { + stop("Duplicated colnames found: ", paste0(dup, collapse = ", "), "\nPlease check colnames of input tables.") + } else { + message("All features are OK.") + } + tables <- lapply(tables, \(i)i[comm, ]) + } + + if (length(tables) == 1) tables <- tables[[1]] + return(tables) +} + + +#' Save a corr object +#' +#' @param corr a corr object +#' @param filename filename without extension, default: "corr" +#' +#' @return a .corr file +#' @export +#' +save_corr <- function(corr, filename = "corr") { + stopifnot(inherits(corr, "corr")) + if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr") + if (t_flag(corr$r)) { + # 节约一半的储存空间 + corr$r[upper.tri(corr$r)] -> r + corr$p.value[upper.tri(corr$p.value)] -> p.value + if (!is.null(corr$p.adjust)) { + corr$p.adjust[upper.tri(corr$p.adjust)] -> p.adj + if (all(p.value == p.adj)) { + p.adj <- FALSE + } + } else { + p.adj <- NULL + } + + corr_names <- rownames(corr$r) + saveRDS( + list( + r = r, p.value = p.value, p.adjust = p.adj, + corr_names = corr_names, corr_attr = attributes(corr) + ), + file = filename + ) + } else { + saveRDS(corr, file = filename) + } +} + +#' Read a corr object +#' +#' @param filename filename of .corr +#' +#' @return a corr object +#' @export +#' @family calculate +read_corr <- function(filename) { + # r <- read.csv(paste0(filename, "_r.csv"), row.names = 1, check.names = FALSE) + # p.value <- read.csv(paste0(filename, "_p.csv"), row.names = 1, check.names = FALSE) + # p.adjust <- read.csv(paste0(filename, "_p_adj.csv"), row.names = 1, check.names = FALSE) + if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr") + in_corr <- readRDS(filename) + if ("corr_names" %in% names(in_corr)) { + corr_names <- in_corr$corr_names + r <- in_corr$r + p.value <- in_corr$p.value + p.adj <- in_corr$p.adj + corr_attr <- in_corr$corr_attr + + new_p.adjust <- new_p <- new_r <- matrix(0, nrow = length(corr_names), ncol = length(corr_names), dimnames = list(corr_names, corr_names)) + new_r[upper.tri(new_r)] <- r + new_r <- new_r + t(new_r) + diag(new_r) <- 1 + new_p[upper.tri(new_p)] <- p.value + new_p <- new_p + t(new_p) + if (is.null(p.adj)) { + new_p.adjust <- NULL + } else if (is.logical(p.adj)) { + new_p.adjust <- new_p + } else { + new_p.adjust[upper.tri(new_p.adjust)] <- p.adj + new_p.adjust <- new_p.adjust + t(new_p.adjust) + diag(new_p.adjust) <- 1 + } + in_corr <- list() + in_corr$r <- new_r + in_corr$p.value <- new_p + if (!is.null(new_p.adjust)) in_corr$p.adjust <- new_p.adjust + attributes(in_corr) <- corr_attr + } + return(in_corr) +} + + +#' Fast correlation calculation +#' +#' @param totu t(otutab), row are samples, column are features. +#' @param totu2 t(otutab) or NULL, row are samples, column are features. +#' @param method "spearman" or "pearson" +#' +#' @export +#' +#' @return a list with 2 elements: +#' \item{r}{default: spearman correlation} +#' \item{p.value}{default: p-value of spearman correlation} +#' @family calculate +#' @examples +#' data("otutab", package = "pcutils") +#' t(otutab[1:100, ]) -> totu +#' fast_cor(totu, method = "spearman") -> corr +fast_cor <- function(totu, totu2 = NULL, method = c("pearson", "spearman")) { + method <- match.arg(method, c("pearson", "spearman")) + totu <- as.matrix(totu) + if (!is.null(totu2)) { + totu2 <- as.matrix(totu2) + r <- stats::cor(totu, totu2, method = method) + df <- dim(totu)[1] - 2 + t <- r * sqrt(df / (1 - r^2)) + p <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE)) + } else { + r <- stats::cor(totu, method = method) + p <- r + p[p != 0] <- 0 + r_tri <- r[upper.tri(r)] + df <- dim(totu)[1] - 2 + t <- r_tri * sqrt(df / (1 - r_tri^2)) + p[upper.tri(p)] <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE)) + p <- p + t(p) + } + return(list(r = r, p.value = p)) +} + +Hmisc_cor <- function(totu, totu2 = NULL, method = c("spearman", "pearson")[1]) { + lib_ps("Hmisc", library = FALSE) + totu <- as.matrix(totu) + if (!is.null(totu2)) { + totu2 <- as.matrix(totu2) + tmp <- Hmisc::rcorr(totu, totu2, type = method) + r <- tmp$r[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)] + p <- tmp$P[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)] + return(list(r = r, p.value = p)) + } + tmp <- Hmisc::rcorr(totu, type = method) + p <- tmp$P + p[is.na(p)] <- 0 + return(list(r = tmp$r, p.value = p)) +} + +#' Calculate similarity for one t(otutab) +#' +#' @param totu t(otutab), row are samples, column are features. +#' @param method Dissimilarity index, see \code{\link[vegan]{vegdist}}. +#' @param totu2 t(otutab) or NULL, row are samples, column are features. +#' +#' @family calculate +#' @return similarity = 1-distance +#' @export +#' @seealso \code{\link[vegan]{vegdist}} +#' @examples +#' if (requireNamespace("vegan")) { +#' data("otutab", package = "pcutils") +#' t(otutab) -> totu +#' cal_sim(totu) -> sim_corr +#' } +cal_sim <- function(totu, totu2 = NULL, method = "bray") { + lib_ps("vegan", library = FALSE) + if (is.null(totu2)) { + vegan::vegdist(t(totu), method = method) %>% as.matrix() -> dist + } else { + n1 <- ncol(totu) + n2 <- ncol(totu2) + dist <- matrix(NA, nrow = n1, ncol = n2, dimnames = list(colnames(totu), colnames(totu2))) + for (i in seq_len(n1)) { + for (j in seq_len(n2)) { + dist[i, j] <- vegan::vegdist(rbind(totu[, i], totu2[, j]), method = method) + } + } + } + + sim <- 1 - dist + p <- dist + message("p-value is not supported for distance index, all set as 0.") + p[p != 0] <- 0 + + return(list(r = sim, p.value = p)) +} + +cal_KLD <- function(totu, totu2 = NULL, method = "KLD") { + # Kullback-Leibler divergence + # KLD = function(p, q) { + # sum(p * log(p / q)) + # } + # p = c(0.1, 0.2, 0.3, 0.4) + # q = c(0.2, 0.3, 0.2, 0.3) + # KLD(p, q) + lib_ps("philentropy", library = FALSE) + dat <- t(totu) / rowSums(t(totu)) + philentropy::KL(dat, unit = "log") +} + +#' p.adjust apply on a correlation table (matrix or data.frame) +#' +#' @param pp table of p-values +#' @param method see \code{\link[stats]{p.adjust}}, default: "BH". +#' @param mode "all" for all values; "rows" adjust each row one by one; "columns" adjust each column one by one. Default: "all". +#' +#' @return a table of adjusted p-values +#' @export +#' +#' @family calculate +#' @examples +#' matrix(abs(rnorm(100, 0.01, 0.1)), 10, 10) -> pp +#' p.adjust.table(pp, method = "BH", mode = "all") -> pp_adj +p.adjust.table <- \(pp, method = "BH", mode = "all"){ + mode <- match.arg(mode, c("all", "rows", "columns")) + pp <- as.matrix(pp) + if (mode == "all") { + if (t_flag(pp)) { + lp <- lower.tri(pp) + pa <- pp[lp] + pa <- p.adjust(pa, method) + pp[lower.tri(pp, diag = FALSE)] <- pa + pp[upper.tri(pp, diag = FALSE)] <- 0 + pp <- pp + t(pp) + } else { + pp1 <- p.adjust(pp, method) + pp <- matrix(pp1, nrow(pp), ncol(pp), dimnames = list(rownames(pp), colnames(pp))) + } + } else if (mode == "rows") { + pp <- t(apply(pp, 1, p.adjust, method = method)) + } else if (mode == "columns") { + pp <- apply(t(pp), 1, p.adjust, method = method) + } + return(pp) +}