# =========2.2RMT optimize=====
#' Get RMT threshold for a correlation matrix
#'
#' @param occor.r a corr object or a correlation matrix
#' @param min_threshold min_threshold
#' @param max_threshold max_threshold
#' @param step step
#' @param gif render a .gif file?
#' @param verbose verbose
#' @param out_dir output dir
#'
#' @return a r-threshold
#' @export
#' @references
#' J. Zhou, Y. Deng, FALSE. Luo, Z. He, Q. Tu, X. Zhi, (2010) Functional Molecular Ecological Networks, doi:10.1128/mBio.00169-10.
#' <https://matstat.org/content_en/RMT/RMThreshold_Intro.pdf>
#' @examples
#' \donttest{
#' data(otutab, package = "pcutils")
#' t(otutab) -> totu
#' c_net_calculate(totu) -> corr
#' rmt(corr)
#' # recommend: 0.69
#' c_net_build(corr, r_threshold = 0.69) -> co_net_rmt
#' }
RMT_threshold <- function(occor.r, out_dir, min_threshold = 0.5, max_threshold = 0.8,
step = 0.02, gif = FALSE, verbose = FALSE) {
nwd <- getwd()
on.exit(setwd(nwd))
setwd(out_dir)
if (inherits(occor.r, "corr")) occor.r <- occor.r$r
if (!dir.exists("./RMT_temp")) dir.create("./RMT_temp")
diag(occor.r) <- 0
if (max_threshold >= max(abs(occor.r))) max_threshold <- (max(abs(occor.r)) - step)
if (min_threshold >= max_threshold) min_threshold <- max_threshold - 10 * step
thres_seq <- seq(min_threshold, max_threshold, step)
res <- data.frame()
for (i in seq_len(length(thres_seq))) {
threshold <- thres_seq[i]
if (!verbose) pcutils::dabiao(paste0("Calculating", i, ": threshold =", signif(threshold, 3)), print = TRUE)
corr_r1 <- occor.r
corr_r1[abs(corr_r1) < threshold] <- 0
# calculate eigenvalues
rand.mat <- corr_r1
eigenvalues <- eigen(rand.mat, only.values = TRUE)$values
eigenvalues <- eigenvalues[order(eigenvalues)] / max(abs(eigenvalues))
eigenvalues <- pcutils::remove.outliers(unique(eigenvalues))
# get the NNDS
{ # uf <- rm.unfold.gauss(eigenvalues,pop.up = TRUE)
dens <- density(eigenvalues, kernel = "gaussian")
midpoints <- \(x)(x[-length(x)] + 0.5 * diff(x))
scale.function <- approx(dens$x, dens$y, xout = midpoints(eigenvalues))
ev.spacing <- diff(eigenvalues)
ev.spacing <- ev.spacing * scale.function$y
ev.spacing <- ev.spacing / mean(ev.spacing)
}
ev.spacing <- ev.spacing[ev.spacing <= 3]
# test whether fit possion?
p_ks_test <- ks.test(unique(ev.spacing), "pexp", 1)$p.value
# get sse
# sse = rm.sse(ev.spacing)
sse <- get_sse(ev.spacing)
log_sse <- log(sse)
# maximum likelihood
evs <- ev.spacing[ev.spacing != 0]
N <- length(evs)
log_LE <- -sum(evs) / N
log_LW <- log(pi / 2) + sum(log(evs)) / N - 0.25 * pi * sum(evs^2) / N
# save png
{
histo <- hist(ev.spacing, breaks = seq(min(ev.spacing), max(ev.spacing), len = 51), plot = FALSE)
grDevices::png(paste0("RMT_temp/rmt_nnsd", i, ".png"), height = 600, width = 700, res = 130)
nnsd_plot(
histo = histo, title = "Eigenvalue spacing distribution (NNSD)", threshold = threshold,
dis_GOE = log_LW, dis_possion = log_LE, p_ks_test = p_ks_test
)
grDevices::dev.off()
}
res <- rbind(res, data.frame(threshold, p_ks_test, log_sse, log_LW, log_LE))
}
message("The Intermediate files saved in ", out_dir, "/RMT_temp/.")
# transfer to gif
if (gif) {
lib_ps("gifski", library = FALSE)
gifski::gifski(paste0("RMT_temp/rmt_nnsd", seq_len(length(thres_seq)), ".png"),
gif_file = "RMT_temp/rmt_nnsd.gif"
)
}
r_threshold <- (res[which(res$log_LW == min(res$log_LW)), "threshold"] +
res[which(res$log_LE == max(res$log_LE)), "threshold"]) / 2
res <- list(res = res, r_threshold = r_threshold)
class(res) <- c("rmt_res", class(res))
return(res)
}
#' Plot a rmt_res
#'
#' @param x rmt_res
#' @param ... Additional arguments
#'
#' @return ggplot
#' @exportS3Method
#' @method plot rmt_res
plot.rmt_res <- function(x, ...) {
threshold <- value <- variable <- xi <- y <- NULL
res <- x$res
linedf <- data.frame(
variable = c("p_ks_test", "log_sse", "log_LW", "log_LE"),
xi = c(
res[which(res$p_ks_test == max(res$p_ks_test))[1], "threshold"],
res[which(res$log_sse == min(res$log_sse))[1], "threshold"],
res[which(res$log_LW == min(res$log_LW))[1], "threshold"],
res[which(res$log_LE == max(res$log_LE))[1], "threshold"]
),
x = max(res$threshold) - min(res$threshold),
y = apply(res[, -1], 2, max)
)
reshape2::melt(res, "threshold") -> md
# filter(threshold<0.77)%>%
p <- ggplot(md, aes(threshold, value)) +
geom_point(aes(col = variable)) +
geom_line(aes(col = variable)) +
scale_color_manual(values = get_cols(4, "col1")) +
facet_wrap(. ~ variable, scales = "free_y") +
theme_bw() +
xlab(NULL) +
geom_text(data = linedf, aes(x = xi - 0.02 * x, y = 0.5 * y, label = xi)) +
geom_vline(data = linedf, aes(xintercept = xi), linetype = 2, col = "red") +
theme(legend.position = "none")
message(paste("recommend r_threshold: ", mean(linedf$xi)))
return(p)
}
nnsd_plot <- \(histo = histo, title = title, threshold = threshold,
dis_GOE = dis_GOE, dis_possion = dis_possion, p_ks_test = p_ks_test) {
plot(histo, freq = FALSE, col = "#F4FCA1", main = title, font.main = 1, xlab = "eigenvalue spacing", ylab = "PDF of eigenvalue spacing")
{
actual.ymax <- par("yaxp")[2]
x0 <- -log(actual.ymax * 0.98)
possion_dis <- \(x)exp(-x)
graphics::curve(possion_dis,
from = max(x0, min(histo$breaks)),
to = max(histo$breaks), n = 1001, add = TRUE, type = "l", lty = 1, col = "#EB34FF", lwd = 2
)
}
{
GOE <- function(x) pi / 2 * x * exp(-pi / 4 * x^2)
graphics::curve(GOE,
from = min(histo$breaks),
to = max(histo$breaks), n = 1001, add = TRUE, type = "l",
lty = 1, col = "blue", lwd = 2
)
}
if ((!is.na(dis_GOE)) && (!is.na(dis_possion))) {
graphics::mtext(side = 3, paste(
"Distance to GOE =", signif(dis_GOE, 3),
"\nDistance to Possion =", signif(dis_possion, 3), "; ks_test p.value for possion =", signif(p_ks_test, 3)
), col = "#878787", cex = 0.6)
}
if (!is.na(threshold)) graphics::mtext(side = 4, paste("threshold =", signif(threshold, 4)))
graphics::legend("topright", inset = 0.05, c("Possion", "GOE"), col = c("#EB34FF", "blue"), lty = 1, lwd = 2, cex = 0.8)
}
trapez <- \(x, y){
ind <- 2:length(x)
as.double((x[ind] - x[ind - 1]) %*% (y[ind] + y[ind - 1])) / 2
}
get_sse <- \(ev.spacing){
dens <- density(ev.spacing)
N <- 20
x <- seq(min(ev.spacing), max(ev.spacing), len = 1000)
A <- exp(-min(ev.spacing)) - exp(-max(ev.spacing))
xs <- numeric(N + 1)
xs[1] <- min(ev.spacing)
for (i in 1:N) xs[i + 1] <- -log(exp(-xs[i]) - A / N)
area <- numeric(N)
for (i in 1:N) {
xsec <- x[(x > xs[i]) & (x < xs[i + 1])]
xsec <- c(xs[i], xsec, xs[i + 1])
ysec <- approx(dens$x, dens$y, xout = xsec)$y
area[i] <- trapez(xsec, ysec)
}
sse <- sum((area[i] - A / N)^2)
sse
}
#' Get RMT threshold for a correlation matrix roughly
#'
#' @export
#' @return recommend threshold
#' @rdname RMT_threshold
rmt <- function(occor.r, min_threshold = 0.5, max_threshold = 0.85, step = 0.01) {
if (inherits(occor.r, "corr")) occor.r <- occor.r$r
NNSD <- \(x)abs(diff(x))
s <- seq(0, 3, 0.1)
poisson_d <- exp(-s)
nnsdpois <- density(NNSD(poisson_d))
ps <- c()
threshold <- c()
for (i in seq(min_threshold, max_threshold, step)) {
corr_r1 <- occor.r
corr_r1[abs(corr_r1) < i] <- 0
{
eigen_res <- sort(eigen(corr_r1)$value)
# spline to eigen_res
check <- tryCatch(ssp <- smooth.spline(eigen_res, control.spar = list(low = 0, high = 3)),
error = \(e) {
TRUE
}
)
if (rlang::is_true(check)) next
nnsdw <- density(NNSD(ssp$y))
chival <- sum((nnsdw$y - nnsdpois$y)^2 / nnsdpois$y / 1e3)
}
ps <- c(ps, chival)
threshold <- c(threshold, i)
if (((i * 100) %% 5 == 0)) {
message(paste0("Calculating: ", i))
}
}
res <- data.frame(threshold, ps)
recommend_thres <- res[which.min(res[, 2]), 1]
p <- ggplot(res, aes(threshold, ps)) +
geom_point() +
geom_vline(xintercept = recommend_thres, linetype = 2, col = "red") +
geom_text(x = recommend_thres + 0.01, y = 0.5 * max(res$ps), label = recommend_thres) +
theme_bw(base_size = 15)
print(p)
res1 <- res[(res$threshold < (recommend_thres + 0.05)) & (res$threshold > (recommend_thres - 0.05)), ]
p <- ggplot(res1, aes(threshold, ps)) +
geom_point() +
geom_vline(xintercept = recommend_thres, linetype = 2, col = "red") +
geom_text(x = recommend_thres + 0.01, y = 0.5 * max(res1$ps), label = recommend_thres) +
theme_bw(base_size = 15)
print(p)
message("We recommend r-threshold: ", recommend_thres, ", you can calculate again in a smaller region")
recommend_thres
}