Diff of /R/2-3.RMT.R [000000] .. [13df9a]

Switch to unified view

a b/R/2-3.RMT.R
1
# =========2.2RMT optimize=====
2
3
#' Get RMT threshold for a correlation matrix
4
#'
5
#' @param occor.r a corr object or a correlation matrix
6
#' @param min_threshold min_threshold
7
#' @param max_threshold max_threshold
8
#' @param step step
9
#' @param gif render a .gif file?
10
#' @param verbose verbose
11
#' @param out_dir output dir
12
#'
13
#' @return a r-threshold
14
#' @export
15
#' @references
16
#' J. Zhou, Y. Deng, FALSE. Luo, Z. He, Q. Tu, X. Zhi, (2010) Functional Molecular Ecological Networks, doi:10.1128/mBio.00169-10.
17
#' <https://matstat.org/content_en/RMT/RMThreshold_Intro.pdf>
18
#' @examples
19
#' \donttest{
20
#' data(otutab, package = "pcutils")
21
#' t(otutab) -> totu
22
#' c_net_calculate(totu) -> corr
23
#' rmt(corr)
24
#' # recommend: 0.69
25
#' c_net_build(corr, r_threshold = 0.69) -> co_net_rmt
26
#' }
27
RMT_threshold <- function(occor.r, out_dir, min_threshold = 0.5, max_threshold = 0.8,
28
                          step = 0.02, gif = FALSE, verbose = FALSE) {
29
  nwd <- getwd()
30
  on.exit(setwd(nwd))
31
32
  setwd(out_dir)
33
  if (inherits(occor.r, "corr")) occor.r <- occor.r$r
34
  if (!dir.exists("./RMT_temp")) dir.create("./RMT_temp")
35
  diag(occor.r) <- 0
36
37
  if (max_threshold >= max(abs(occor.r))) max_threshold <- (max(abs(occor.r)) - step)
38
  if (min_threshold >= max_threshold) min_threshold <- max_threshold - 10 * step
39
40
  thres_seq <- seq(min_threshold, max_threshold, step)
41
42
  res <- data.frame()
43
  for (i in seq_len(length(thres_seq))) {
44
    threshold <- thres_seq[i]
45
    if (!verbose) pcutils::dabiao(paste0("Calculating", i, ":  threshold =", signif(threshold, 3)), print = TRUE)
46
    corr_r1 <- occor.r
47
    corr_r1[abs(corr_r1) < threshold] <- 0
48
    # calculate eigenvalues
49
    rand.mat <- corr_r1
50
    eigenvalues <- eigen(rand.mat, only.values = TRUE)$values
51
    eigenvalues <- eigenvalues[order(eigenvalues)] / max(abs(eigenvalues))
52
    eigenvalues <- pcutils::remove.outliers(unique(eigenvalues))
53
54
    # get the NNDS
55
    { # uf <- rm.unfold.gauss(eigenvalues,pop.up = TRUE)
56
      dens <- density(eigenvalues, kernel = "gaussian")
57
      midpoints <- \(x)(x[-length(x)] + 0.5 * diff(x))
58
      scale.function <- approx(dens$x, dens$y, xout = midpoints(eigenvalues))
59
      ev.spacing <- diff(eigenvalues)
60
      ev.spacing <- ev.spacing * scale.function$y
61
      ev.spacing <- ev.spacing / mean(ev.spacing)
62
    }
63
64
    ev.spacing <- ev.spacing[ev.spacing <= 3]
65
    # test whether fit possion?
66
    p_ks_test <- ks.test(unique(ev.spacing), "pexp", 1)$p.value
67
    # get sse
68
    # sse = rm.sse(ev.spacing)
69
    sse <- get_sse(ev.spacing)
70
    log_sse <- log(sse)
71
72
    # maximum likelihood
73
    evs <- ev.spacing[ev.spacing != 0]
74
    N <- length(evs)
75
    log_LE <- -sum(evs) / N
76
    log_LW <- log(pi / 2) + sum(log(evs)) / N - 0.25 * pi * sum(evs^2) / N
77
78
    # save png
79
    {
80
      histo <- hist(ev.spacing, breaks = seq(min(ev.spacing), max(ev.spacing), len = 51), plot = FALSE)
81
      grDevices::png(paste0("RMT_temp/rmt_nnsd", i, ".png"), height = 600, width = 700, res = 130)
82
      nnsd_plot(
83
        histo = histo, title = "Eigenvalue spacing distribution (NNSD)", threshold = threshold,
84
        dis_GOE = log_LW, dis_possion = log_LE, p_ks_test = p_ks_test
85
      )
86
      grDevices::dev.off()
87
    }
88
    res <- rbind(res, data.frame(threshold, p_ks_test, log_sse, log_LW, log_LE))
89
  }
90
  message("The Intermediate files saved in ", out_dir, "/RMT_temp/.")
91
  # transfer to gif
92
  if (gif) {
93
    lib_ps("gifski", library = FALSE)
94
    gifski::gifski(paste0("RMT_temp/rmt_nnsd", seq_len(length(thres_seq)), ".png"),
95
      gif_file = "RMT_temp/rmt_nnsd.gif"
96
    )
97
  }
98
  r_threshold <- (res[which(res$log_LW == min(res$log_LW)), "threshold"] +
99
    res[which(res$log_LE == max(res$log_LE)), "threshold"]) / 2
100
  res <- list(res = res, r_threshold = r_threshold)
101
  class(res) <- c("rmt_res", class(res))
102
  return(res)
103
}
104
105
#' Plot a rmt_res
106
#'
107
#' @param x rmt_res
108
#' @param ... Additional arguments
109
#'
110
#' @return ggplot
111
#' @exportS3Method
112
#' @method plot rmt_res
113
plot.rmt_res <- function(x, ...) {
114
  threshold <- value <- variable <- xi <- y <- NULL
115
  res <- x$res
116
  linedf <- data.frame(
117
    variable = c("p_ks_test", "log_sse", "log_LW", "log_LE"),
118
    xi = c(
119
      res[which(res$p_ks_test == max(res$p_ks_test))[1], "threshold"],
120
      res[which(res$log_sse == min(res$log_sse))[1], "threshold"],
121
      res[which(res$log_LW == min(res$log_LW))[1], "threshold"],
122
      res[which(res$log_LE == max(res$log_LE))[1], "threshold"]
123
    ),
124
    x = max(res$threshold) - min(res$threshold),
125
    y = apply(res[, -1], 2, max)
126
  )
127
128
  reshape2::melt(res, "threshold") -> md
129
130
  # filter(threshold<0.77)%>%
131
  p <- ggplot(md, aes(threshold, value)) +
132
    geom_point(aes(col = variable)) +
133
    geom_line(aes(col = variable)) +
134
    scale_color_manual(values = get_cols(4, "col1")) +
135
    facet_wrap(. ~ variable, scales = "free_y") +
136
    theme_bw() +
137
    xlab(NULL) +
138
    geom_text(data = linedf, aes(x = xi - 0.02 * x, y = 0.5 * y, label = xi)) +
139
    geom_vline(data = linedf, aes(xintercept = xi), linetype = 2, col = "red") +
140
    theme(legend.position = "none")
141
142
  message(paste("recommend r_threshold: ", mean(linedf$xi)))
143
  return(p)
144
}
145
146
nnsd_plot <- \(histo = histo, title = title, threshold = threshold,
147
  dis_GOE = dis_GOE, dis_possion = dis_possion, p_ks_test = p_ks_test) {
148
  plot(histo, freq = FALSE, col = "#F4FCA1", main = title, font.main = 1, xlab = "eigenvalue spacing", ylab = "PDF of eigenvalue spacing")
149
  {
150
    actual.ymax <- par("yaxp")[2]
151
    x0 <- -log(actual.ymax * 0.98)
152
    possion_dis <- \(x)exp(-x)
153
    graphics::curve(possion_dis,
154
      from = max(x0, min(histo$breaks)),
155
      to = max(histo$breaks), n = 1001, add = TRUE, type = "l", lty = 1, col = "#EB34FF", lwd = 2
156
    )
157
  }
158
  {
159
    GOE <- function(x) pi / 2 * x * exp(-pi / 4 * x^2)
160
    graphics::curve(GOE,
161
      from = min(histo$breaks),
162
      to = max(histo$breaks), n = 1001, add = TRUE, type = "l",
163
      lty = 1, col = "blue", lwd = 2
164
    )
165
  }
166
167
  if ((!is.na(dis_GOE)) && (!is.na(dis_possion))) {
168
    graphics::mtext(side = 3, paste(
169
      "Distance to GOE =", signif(dis_GOE, 3),
170
      "\nDistance to Possion =", signif(dis_possion, 3), "; ks_test p.value for possion =", signif(p_ks_test, 3)
171
    ), col = "#878787", cex = 0.6)
172
  }
173
174
  if (!is.na(threshold)) graphics::mtext(side = 4, paste("threshold =", signif(threshold, 4)))
175
176
  graphics::legend("topright", inset = 0.05, c("Possion", "GOE"), col = c("#EB34FF", "blue"), lty = 1, lwd = 2, cex = 0.8)
177
}
178
179
trapez <- \(x, y){
180
  ind <- 2:length(x)
181
  as.double((x[ind] - x[ind - 1]) %*% (y[ind] + y[ind - 1])) / 2
182
}
183
184
get_sse <- \(ev.spacing){
185
  dens <- density(ev.spacing)
186
  N <- 20
187
  x <- seq(min(ev.spacing), max(ev.spacing), len = 1000)
188
  A <- exp(-min(ev.spacing)) - exp(-max(ev.spacing))
189
  xs <- numeric(N + 1)
190
  xs[1] <- min(ev.spacing)
191
  for (i in 1:N) xs[i + 1] <- -log(exp(-xs[i]) - A / N)
192
  area <- numeric(N)
193
  for (i in 1:N) {
194
    xsec <- x[(x > xs[i]) & (x < xs[i + 1])]
195
    xsec <- c(xs[i], xsec, xs[i + 1])
196
    ysec <- approx(dens$x, dens$y, xout = xsec)$y
197
    area[i] <- trapez(xsec, ysec)
198
  }
199
  sse <- sum((area[i] - A / N)^2)
200
  sse
201
}
202
203
#' Get RMT threshold for a correlation matrix roughly
204
#'
205
#' @export
206
#' @return recommend threshold
207
#' @rdname RMT_threshold
208
rmt <- function(occor.r, min_threshold = 0.5, max_threshold = 0.85, step = 0.01) {
209
  if (inherits(occor.r, "corr")) occor.r <- occor.r$r
210
  NNSD <- \(x)abs(diff(x))
211
212
  s <- seq(0, 3, 0.1)
213
  poisson_d <- exp(-s)
214
  nnsdpois <- density(NNSD(poisson_d))
215
216
  ps <- c()
217
  threshold <- c()
218
219
  for (i in seq(min_threshold, max_threshold, step)) {
220
    corr_r1 <- occor.r
221
    corr_r1[abs(corr_r1) < i] <- 0
222
    {
223
      eigen_res <- sort(eigen(corr_r1)$value)
224
      # spline to eigen_res
225
      check <- tryCatch(ssp <- smooth.spline(eigen_res, control.spar = list(low = 0, high = 3)),
226
        error = \(e) {
227
          TRUE
228
        }
229
      )
230
      if (rlang::is_true(check)) next
231
      nnsdw <- density(NNSD(ssp$y))
232
      chival <- sum((nnsdw$y - nnsdpois$y)^2 / nnsdpois$y / 1e3)
233
    }
234
235
    ps <- c(ps, chival)
236
    threshold <- c(threshold, i)
237
    if (((i * 100) %% 5 == 0)) {
238
      message(paste0("Calculating: ", i))
239
    }
240
  }
241
242
  res <- data.frame(threshold, ps)
243
  recommend_thres <- res[which.min(res[, 2]), 1]
244
  p <- ggplot(res, aes(threshold, ps)) +
245
    geom_point() +
246
    geom_vline(xintercept = recommend_thres, linetype = 2, col = "red") +
247
    geom_text(x = recommend_thres + 0.01, y = 0.5 * max(res$ps), label = recommend_thres) +
248
    theme_bw(base_size = 15)
249
  print(p)
250
251
  res1 <- res[(res$threshold < (recommend_thres + 0.05)) & (res$threshold > (recommend_thres - 0.05)), ]
252
  p <- ggplot(res1, aes(threshold, ps)) +
253
    geom_point() +
254
    geom_vline(xintercept = recommend_thres, linetype = 2, col = "red") +
255
    geom_text(x = recommend_thres + 0.01, y = 0.5 * max(res1$ps), label = recommend_thres) +
256
    theme_bw(base_size = 15)
257
  print(p)
258
259
  message("We recommend r-threshold: ", recommend_thres, ", you can calculate again in a smaller region")
260
  recommend_thres
261
}