Diff of /R/1.calculate.R [000000] .. [13df9a]

Switch to unified view

a b/R/1.calculate.R
1
# =======1.calculate========
2
#' Calculate correlation for one or two t(otutab), or distance for one t(otutab).
3
#'
4
#' @param totu t(otutab), row are samples, column are features.
5
#' @param totu2 t(otutab2) or NULL, row are samples, column are features.
6
#' @param method "spearman" (default), "pearson", "sparcc", or distance index from \code{\link[vegan]{vegdist}}.
7
#' @param filename the prefix of saved .corr file or FALSE.
8
#' @param p.adjust.method see \code{\link[stats]{p.adjust}}
9
#' @param p.adjust.mode see \code{\link{p.adjust.table}}
10
#' @param threads threads, default: 1.
11
#' @param verbose verbose, default: TRUE.
12
#'
13
#' @return a corr object with 3 elements:
14
#' \item{r}{default: spearman correlation}
15
#' \item{p.value}{default: p-value of spearman correlation}
16
#' \item{p.adjust}{default p.adjust.method = NULL}
17
#' @family calculate
18
#' @export
19
#' @aliases c_net_cal
20
#' @examples
21
#' data("otutab", package = "pcutils")
22
#' t(otutab) -> totu
23
#' c_net_calculate(totu) -> corr
24
#' metadata[, 3:10] -> env
25
#' c_net_calculate(totu, env) -> corr2
26
c_net_calculate <- function(totu, totu2 = NULL, method = "spearman", filename = FALSE,
27
                            p.adjust.method = NULL, p.adjust.mode = "all", threads = 1, verbose = TRUE) {
28
  if (!is.null(totu2)) {
29
    tls <- check_tabs(totu, totu2)
30
    totu <- tls[[1]]
31
    totu2 <- tls[[2]]
32
  } else {
33
    totu <- check_tabs(totu)
34
  }
35
36
  if (method %in% c("spearman", "pearson")) {
37
    corr <- fast_cor(totu, totu2, method = method)
38
  } else if (method == "sparcc") {
39
    if (!is.null(totu2)) warning("sparcc only take the totu, ignore the totu2.")
40
    message("sparcc is not supported on CRAN version. Please install `zdk123/SpiecEasi` from github and use the function `sparcc`.")
41
    # corr <- par_sparcc(totu, threads = threads, verbose = verbose)
42
    corr <- NULL
43
  } else if (method %in% c(
44
    "manhattan", "euclidean", "canberra", "bray",
45
    "kulczynski", "gower", "morisita", "horn", "mountford",
46
    "jaccard", "raup", "binomial", "chao", "altGower", "cao",
47
    "mahalanobis", "clark", "chisq", "chord", "hellinger",
48
    "aitchison", "robust.aitchison"
49
  )) {
50
    corr <- cal_sim(totu, totu2, method = method)
51
  } else {
52
    stop("method should be one of 'spearman', 'pearson', 'sparcc', or distance index from vegan.")
53
  }
54
55
  if (!is.null(p.adjust.method)) {
56
    p.adjust <- p.adjust.table(corr$p.value, p.adjust.method)
57
    res <- list(r = corr$r, p.value = corr$p.value, p.adjust = p.adjust)
58
  } else {
59
    res <- list(r = corr$r, p.value = corr$p.value)
60
  }
61
62
  class(res) <- "corr"
63
  attributes(res)$method <- method
64
65
  # save the correlation result
66
  if (is.logical(filename)) {
67
    if (filename) filename <- paste0("c_net_", date())
68
  }
69
  if (is.character(filename)) {
70
    save_corr(res, filename)
71
  }
72
  return(res)
73
}
74
75
#' Check tables and extract common samples
76
#'
77
#' @param ... tables
78
#' @return formatted tables
79
#' @export
80
#' @examples
81
#' data("otutab", package = "pcutils")
82
#' check_tabs(otutab)
83
check_tabs <- function(...) {
84
  tables <- list(...)
85
  if (all(class(tables[[1]]) == "list")) tables <- tables[[1]]
86
87
  names(tables) <- NULL
88
89
  if (length(tables) > 1) {
90
    comm <- Reduce(intersect, lapply(tables, rownames))
91
    if (length(comm) < 2) stop("There are ", length(comm), " common sample! Can not calculate correlation.")
92
    if (all(lapply(tables, \(i)identical(rownames(i), comm)) %>% unlist())) {
93
      message("All samples matched.")
94
    } else {
95
      message("Extract ", length(comm), " commmon samples.")
96
    }
97
    dup <- lapply(tables, colnames) %>% do.call(c, .)
98
    dup <- dup[duplicated(dup)]
99
    if (length(dup) > 0) {
100
      stop("Duplicated colnames found: ", paste0(dup, collapse = ", "), "\nPlease check colnames of input tables.")
101
    } else {
102
      message("All features are OK.")
103
    }
104
    tables <- lapply(tables, \(i)i[comm, ])
105
  }
106
107
  if (length(tables) == 1) tables <- tables[[1]]
108
  return(tables)
109
}
110
111
112
#' Save a corr object
113
#'
114
#' @param corr a corr object
115
#' @param filename filename without extension, default: "corr"
116
#'
117
#' @return a .corr file
118
#' @export
119
#'
120
save_corr <- function(corr, filename = "corr") {
121
  stopifnot(inherits(corr, "corr"))
122
  if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr")
123
  if (t_flag(corr$r)) {
124
    # 节约一半的储存空间
125
    corr$r[upper.tri(corr$r)] -> r
126
    corr$p.value[upper.tri(corr$p.value)] -> p.value
127
    if (!is.null(corr$p.adjust)) {
128
      corr$p.adjust[upper.tri(corr$p.adjust)] -> p.adj
129
      if (all(p.value == p.adj)) {
130
        p.adj <- FALSE
131
      }
132
    } else {
133
      p.adj <- NULL
134
    }
135
136
    corr_names <- rownames(corr$r)
137
    saveRDS(
138
      list(
139
        r = r, p.value = p.value, p.adjust = p.adj,
140
        corr_names = corr_names, corr_attr = attributes(corr)
141
      ),
142
      file = filename
143
    )
144
  } else {
145
    saveRDS(corr, file = filename)
146
  }
147
}
148
149
#' Read a corr object
150
#'
151
#' @param filename filename of .corr
152
#'
153
#' @return a corr object
154
#' @export
155
#' @family calculate
156
read_corr <- function(filename) {
157
  # r <- read.csv(paste0(filename, "_r.csv"), row.names = 1, check.names = FALSE)
158
  # p.value <- read.csv(paste0(filename, "_p.csv"), row.names = 1, check.names = FALSE)
159
  # p.adjust <- read.csv(paste0(filename, "_p_adj.csv"), row.names = 1, check.names = FALSE)
160
  if (!grepl("\\.corr$", filename)) filename <- paste0(filename, ".corr")
161
  in_corr <- readRDS(filename)
162
  if ("corr_names" %in% names(in_corr)) {
163
    corr_names <- in_corr$corr_names
164
    r <- in_corr$r
165
    p.value <- in_corr$p.value
166
    p.adj <- in_corr$p.adj
167
    corr_attr <- in_corr$corr_attr
168
169
    new_p.adjust <- new_p <- new_r <- matrix(0, nrow = length(corr_names), ncol = length(corr_names), dimnames = list(corr_names, corr_names))
170
    new_r[upper.tri(new_r)] <- r
171
    new_r <- new_r + t(new_r)
172
    diag(new_r) <- 1
173
    new_p[upper.tri(new_p)] <- p.value
174
    new_p <- new_p + t(new_p)
175
    if (is.null(p.adj)) {
176
      new_p.adjust <- NULL
177
    } else if (is.logical(p.adj)) {
178
      new_p.adjust <- new_p
179
    } else {
180
      new_p.adjust[upper.tri(new_p.adjust)] <- p.adj
181
      new_p.adjust <- new_p.adjust + t(new_p.adjust)
182
      diag(new_p.adjust) <- 1
183
    }
184
    in_corr <- list()
185
    in_corr$r <- new_r
186
    in_corr$p.value <- new_p
187
    if (!is.null(new_p.adjust)) in_corr$p.adjust <- new_p.adjust
188
    attributes(in_corr) <- corr_attr
189
  }
190
  return(in_corr)
191
}
192
193
194
#' Fast correlation calculation
195
#'
196
#' @param totu t(otutab), row are samples, column are features.
197
#' @param totu2 t(otutab) or NULL, row are samples, column are features.
198
#' @param method "spearman" or "pearson"
199
#'
200
#' @export
201
#'
202
#' @return a list with 2 elements:
203
#' \item{r}{default: spearman correlation}
204
#' \item{p.value}{default: p-value of spearman correlation}
205
#' @family calculate
206
#' @examples
207
#' data("otutab", package = "pcutils")
208
#' t(otutab[1:100, ]) -> totu
209
#' fast_cor(totu, method = "spearman") -> corr
210
fast_cor <- function(totu, totu2 = NULL, method = c("pearson", "spearman")) {
211
  method <- match.arg(method, c("pearson", "spearman"))
212
  totu <- as.matrix(totu)
213
  if (!is.null(totu2)) {
214
    totu2 <- as.matrix(totu2)
215
    r <- stats::cor(totu, totu2, method = method)
216
    df <- dim(totu)[1] - 2
217
    t <- r * sqrt(df / (1 - r^2))
218
    p <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE))
219
  } else {
220
    r <- stats::cor(totu, method = method)
221
    p <- r
222
    p[p != 0] <- 0
223
    r_tri <- r[upper.tri(r)]
224
    df <- dim(totu)[1] - 2
225
    t <- r_tri * sqrt(df / (1 - r_tri^2))
226
    p[upper.tri(p)] <- -2 * expm1(stats::pt(abs(t), df, log.p = TRUE))
227
    p <- p + t(p)
228
  }
229
  return(list(r = r, p.value = p))
230
}
231
232
Hmisc_cor <- function(totu, totu2 = NULL, method = c("spearman", "pearson")[1]) {
233
  lib_ps("Hmisc", library = FALSE)
234
  totu <- as.matrix(totu)
235
  if (!is.null(totu2)) {
236
    totu2 <- as.matrix(totu2)
237
    tmp <- Hmisc::rcorr(totu, totu2, type = method)
238
    r <- tmp$r[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)]
239
    p <- tmp$P[1:ncol(totu), (ncol(totu) + 1):ncol(tmp$r)]
240
    return(list(r = r, p.value = p))
241
  }
242
  tmp <- Hmisc::rcorr(totu, type = method)
243
  p <- tmp$P
244
  p[is.na(p)] <- 0
245
  return(list(r = tmp$r, p.value = p))
246
}
247
248
#' Calculate similarity for one t(otutab)
249
#'
250
#' @param totu t(otutab), row are samples, column are features.
251
#' @param method Dissimilarity index, see \code{\link[vegan]{vegdist}}.
252
#' @param totu2 t(otutab) or NULL, row are samples, column are features.
253
#'
254
#' @family calculate
255
#' @return similarity = 1-distance
256
#' @export
257
#' @seealso \code{\link[vegan]{vegdist}}
258
#' @examples
259
#' if (requireNamespace("vegan")) {
260
#'   data("otutab", package = "pcutils")
261
#'   t(otutab) -> totu
262
#'   cal_sim(totu) -> sim_corr
263
#' }
264
cal_sim <- function(totu, totu2 = NULL, method = "bray") {
265
  lib_ps("vegan", library = FALSE)
266
  if (is.null(totu2)) {
267
    vegan::vegdist(t(totu), method = method) %>% as.matrix() -> dist
268
  } else {
269
    n1 <- ncol(totu)
270
    n2 <- ncol(totu2)
271
    dist <- matrix(NA, nrow = n1, ncol = n2, dimnames = list(colnames(totu), colnames(totu2)))
272
    for (i in seq_len(n1)) {
273
      for (j in seq_len(n2)) {
274
        dist[i, j] <- vegan::vegdist(rbind(totu[, i], totu2[, j]), method = method)
275
      }
276
    }
277
  }
278
279
  sim <- 1 - dist
280
  p <- dist
281
  message("p-value is not supported for distance index, all set as 0.")
282
  p[p != 0] <- 0
283
284
  return(list(r = sim, p.value = p))
285
}
286
287
cal_KLD <- function(totu, totu2 = NULL, method = "KLD") {
288
  # Kullback-Leibler divergence
289
  # KLD = function(p, q) {
290
  #   sum(p * log(p / q))
291
  # }
292
  # p = c(0.1, 0.2, 0.3, 0.4)
293
  # q = c(0.2, 0.3, 0.2, 0.3)
294
  # KLD(p, q)
295
  lib_ps("philentropy", library = FALSE)
296
  dat <- t(totu) / rowSums(t(totu))
297
  philentropy::KL(dat, unit = "log")
298
}
299
300
#' p.adjust apply on a correlation table (matrix or data.frame)
301
#'
302
#' @param pp table of p-values
303
#' @param method see \code{\link[stats]{p.adjust}}, default: "BH".
304
#' @param mode "all" for all values; "rows" adjust each row one by one; "columns" adjust each column one by one. Default: "all".
305
#'
306
#' @return a table of adjusted p-values
307
#' @export
308
#'
309
#' @family calculate
310
#' @examples
311
#' matrix(abs(rnorm(100, 0.01, 0.1)), 10, 10) -> pp
312
#' p.adjust.table(pp, method = "BH", mode = "all") -> pp_adj
313
p.adjust.table <- \(pp, method = "BH", mode = "all"){
314
  mode <- match.arg(mode, c("all", "rows", "columns"))
315
  pp <- as.matrix(pp)
316
  if (mode == "all") {
317
    if (t_flag(pp)) {
318
      lp <- lower.tri(pp)
319
      pa <- pp[lp]
320
      pa <- p.adjust(pa, method)
321
      pp[lower.tri(pp, diag = FALSE)] <- pa
322
      pp[upper.tri(pp, diag = FALSE)] <- 0
323
      pp <- pp + t(pp)
324
    } else {
325
      pp1 <- p.adjust(pp, method)
326
      pp <- matrix(pp1, nrow(pp), ncol(pp), dimnames = list(rownames(pp), colnames(pp)))
327
    }
328
  } else if (mode == "rows") {
329
    pp <- t(apply(pp, 1, p.adjust, method = method))
330
  } else if (mode == "columns") {
331
    pp <- apply(t(pp), 1, p.adjust, method = method)
332
  }
333
  return(pp)
334
}