#' @title ClustDiffGenes
#' @description Creates a table of cluster differences
#' @param object \code{DISCBIO} class object.
#' @param K A numeric value of the number of clusters.
#' @param pValue A numeric value of the p-value. Default is 0.05.
#' @param fdr A numeric value of the false discovery rate. Default is 0.01.
#' @param export A logical vector that allows writing the final gene list in
#' excel file. Default is TRUE.
#' @param quiet if `TRUE`, suppresses intermediate text output
#' @param filename_up Name of the exported "up" file (if `export=TRUE`)
#' @param filename_down Name of the exported "down" file (if `export=TRUE`)
#' @param filename_binom Name of the exported binomial file
#' @param filename_sigdeg Name of the exported sigDEG file
#' @importFrom stats pbinom median
#' @import org.Hs.eg.db
#' @rdname ClustDiffGenes
#' @return A list containing two tables.
#' @export
#' @examples
#' sc <- DISCBIO(valuesG1msTest)
#' sc <- Clustexp(sc, cln = 3, quiet = TRUE)
#' cdiff <- ClustDiffGenes(sc, K = 3, fdr = .3, export = FALSE)
#' str(cdiff)
#' cdiff[[2]]
setGeneric(
"ClustDiffGenes",
function(
object, K, pValue = 0.05, fdr = .01, export = FALSE, quiet = FALSE,
filename_up = "Up-DEG-cluster",
filename_down = "Down-DEG-cluster",
filename_binom = "binomial-DEGsTable",
filename_sigdeg = "binomial-sigDEG") {
standardGeneric("ClustDiffGenes")
}
)
#' @export
#' @rdname ClustDiffGenes
setMethod(
"ClustDiffGenes",
signature = "DISCBIO",
definition = function(
object, K, pValue, fdr, export, quiet, filename_up, filename_down,
filename_binom, filename_sigdeg) {
# ======================================================================
# Validating
# ======================================================================
ran_k <- length(object@kmeans$kpart) > 0
ran_m <- length(object@MBclusters) > 0
if (!is.numeric(fdr)) {
stop("fdr has to be a number between 0 and 1")
} else if (fdr < 0 | fdr > 1) {
stop("fdr has to be a number between 0 and 1")
}
if (!is.numeric(pValue)) {
stop("pValue has to be a number between 0 and 1")
} else if (pValue < 0 | pValue > 1) {
stop("pValue has to be a number between 0 and 1")
}
cdiff <- list()
x <- object@ndata
y <- object@expdata[, names(object@ndata)]
if (ran_k) {
part <- object@kmeans$kpart
} else if (ran_m) {
part <- object@MBclusters$clusterid
} else {
stop("Run clustering before running this function")
}
# ======================================================================
# Operating
# ======================================================================
for (i in 1:max(part)) {
if (sum(part == i) == 0) {
next
}
m <- apply(x, 1, mean)
n <-
if (sum(part == i) > 1) {
apply(x[, part == i], 1, mean)
} else {
x[, part == i]
}
no <-
if (sum(part == i) > 1) {
median(apply(y[, part == i], 2, sum)) /
median(apply(x[, part == i], 2, sum))
} else {
sum(y[, part == i]) / sum(x[, part == i])
}
m <- m * no
n <- n * no
pv <- binompval(m / sum(m), sum(n), n)
d <-
data.frame(
mean.all = m,
mean.cl = n,
fc = n / m,
pv = pv
)[order(pv, decreasing = FALSE), ]
cdiff[[i]] <- d[d$pv < pValue, ]
}
DEGsE <- vector()
DEGsS <- vector()
DEGsTable <- data.frame()
for (n in 1:K) {
if (length(cdiff[[n]][, 1]) == 0) {
next
}
if (length(cdiff[[n]][, 1]) > 0) {
p.adj <- p.adjust(cdiff[[n]][, 4], method = "bonferroni")
out <- cbind(cdiff[[n]], p.adj)
out <- subset(out, out[, 5] < fdr)
if (length(out[, 1]) > 0) {
Regulation <- vector()
for (i in seq_len(length(out[, 1]))) {
if (out[i, 1] > out[i, 2]) {
Regulation[i] <- "Down"
} else {
Regulation[i] <- "Up"
}
}
out <- cbind(out, Regulation)
if (quiet) {
suppressMessages(
geneList <-
AnnotationDbi::select(
org.Hs.eg.db,
keys = keys(org.Hs.eg.db),
columns = c("SYMBOL", "ENSEMBL")
)
)
GL <- c(1, "MTRNR2", "ENSG00000210082")
GL1 <- c(1, "MTRNR1", "ENSG00000211459")
geneList <- rbind(geneList, GL, GL1)
} else {
geneList <-
AnnotationDbi::select(
org.Hs.eg.db,
keys = keys(org.Hs.eg.db),
columns = c("SYMBOL", "ENSEMBL")
)
GL <- c(1, "MTRNR2", "ENSG00000210082")
GL1 <- c(1, "MTRNR1", "ENSG00000211459")
geneList <- rbind(geneList, GL, GL1)
}
genes <- rownames(out)
gene_list <- geneList[, 3]
idx_genes <- is.element(gene_list, genes)
genes2 <- geneList[idx_genes, ]
Final <- cbind(genes, out)
Final <-
merge(
Final,
genes2,
by.x = "genes",
by.y = "ENSEMBL",
all.x = TRUE
)
Final <- Final[!duplicated(Final[, 1]), ]
Final[is.na(Final[, 9]), c(1, 9)] <-
Final[is.na(Final[, 9]), 1]
rownames(Final) <- Final[, 1]
Final[, 1] <- Final[, 9]
Final <- Final[, -9]
DEGsS <- c(DEGsS, Final[, 1])
DEGsE <-
c(DEGsE, as.character(rownames(Final)))
Up <- subset(Final, Final[, 7] == "Up")
cols_to_keep <- c(
"Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj"
)
Up <- Up[, cols_to_keep]
Up[, 3] <- rownames(Up)
Up[, 6] <- log2(Up[, 6])
Up[, 1] <- Up[, 2]
colnames(Up) <- c(
"Genes",
"genes",
"E.genes",
"mean.all",
"mean.cl",
"log2.fc",
"p.adj"
)
if (export) {
write.csv(
Up,
file = paste0(filename_up, n, ".csv")
)
}
Down <- subset(Final, Final[, 7] == "Down")
cols_to_keep <- c(
"Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj"
)
Down <- Down[, cols_to_keep]
Down[, 3] <- rownames(Down)
Down[, 6] <- log2(Down[, 6])
Down[, 1] <- Down[, 2]
colnames(Down) <- c(
"Genes",
"genes",
"E.genes",
"mean.all",
"mean.cl",
"log2.fc",
"p.adj"
)
if (export) {
write.csv(
Down,
file = paste0(filename_down, n, ".csv")
)
}
sigDEG <- cbind(DEGsE, DEGsS)
if (export) {
write.csv(
sigDEG,
file = paste0(filename_sigdeg, ".csv")
)
}
DEGsTable[n, 1] <- paste0("Cluster ", n)
DEGsTable[n, 2] <- "Remaining Clusters"
DEGsTable[n, 3] <- length(Up[, 1])
DEGsTable[n, 4] <- paste0(filename_up, n, ".csv")
DEGsTable[n, 5] <- length(Down[, 1])
DEGsTable[n, 6] <- paste0(filename_down, n, ".csv")
}
}
}
if (length(DEGsTable) > 0) {
colnames(DEGsTable) <- c(
"Target Cluster", "VS", "Gene number", "File name",
"Gene number", "File name"
)
if (export) {
write.csv(DEGsTable, file = paste0(filename_binom, ".csv"))
}
return(list(sigDEG, DEGsTable))
} else {
print(paste("There are no DEGs with fdr =", fdr))
}
}
)