--- a +++ b/R/DIscBIO-generic-DEGanalysis2clust.R @@ -0,0 +1,338 @@ +#' @title Determining differentially expressed genes (DEGs) between two +#' particular clusters. +#' @description This function defines DEGs between particular clusters generated +#' by either K-means or model based clustering. +#' @param object \code{DISCBIO} class object. +#' @param Clustering Clustering has to be one of the following: +#' ["K-means","MB"]. Default is "K-means" +#' @param K A numeric value of the number of clusters. +#' @param fdr A numeric value of the false discovery rate. Default is 0.05. +#' @param name A string vector showing the name to be used to save the resulted +#' tables. +#' @param First A string vector showing the first target cluster. Default is +#' "CL1" +#' @param Second A string vector showing the second target cluster. Default is +#' "CL2" +#' @param export A logical vector that allows writing the final gene list in +#' excel file. Default is TRUE. +#' @param quiet if `TRUE`, suppresses intermediate text output +#' @param plot if `TRUE`, plots are generated +#' @param filename_deg Name of the exported DEG table +#' @param filename_sigdeg Name of the exported sigDEG table +#' @param ... additional parameters to be passed to samr() +#' @importFrom graphics title +#' @importFrom utils write.csv capture.output +#' @importFrom AnnotationDbi keys +#' @return A list containing two tables. +setGeneric( + "DEGanalysis2clust", + function(object, K, Clustering = "K-means", fdr = 0.05, name = "Name", + First = "CL1", Second = "CL2", export = FALSE, quiet = FALSE, + plot = TRUE, filename_deg = "DEGsTable", filename_sigdeg = "sigDEG", + ...) { + standardGeneric("DEGanalysis2clust") + } +) + +#' @export +#' @rdname DEGanalysis2clust +setMethod( + "DEGanalysis2clust", + signature = "DISCBIO", + definition = function( + object, K, Clustering, fdr, name, First, Second, export, quiet, plot, + filename_deg, filename_sigdeg, ... + ) { + if (!(Clustering %in% c("K-means", "MB"))) { + stop("Clustering has to be either K-means or MB") + } + gene_list <- object@FinalGeneList + gene_names <- rownames(object@expdata) + idx_genes <- is.element(gene_names, gene_list) + gene_names2 <- gene_names[idx_genes] + dataset <- object@expdata[gene_names2, ] + Nam <- colnames(dataset) + if (Clustering == "K-means") { + Cluster_ID <- object@cpart + if (length(object@cpart) < 1) { + stop("run Clustexp before running DEGanalysis2clust") + } + } + + if (Clustering == "MB") { + Cluster_ID <- object@MBclusters$clusterid + if (length(object@MBclusters$clusterid) < 1) { + stop("run ExprmclustMB before running DEGanalysis2clust") + } + } + num <- seq_len(K) + num1 <- paste("CL", num, sep = "") + for (n in num) { + Nam <- ifelse((Cluster_ID == n), num1[n], Nam) + } + colnames(dataset) <- Nam + sg1 <- dataset[, which(colnames(dataset) == First)] + sg2 <- dataset[, which(colnames(dataset) == Second)] + sg <- cbind(sg1, sg2) + + sg3 <- factor( + gsub(paste0("(", First, "|", Second, ").*"), "\\1", colnames(sg)), + levels = c(paste0(First), paste0(Second)) + ) + sg3 <- sg3[!is.na(sg3)] + + colnames(sg) <- sg3 + len <- c( + length(sg[, which(colnames(sg) == First)]), + length(sg[, which(colnames(sg) == Second)]) + ) + y <- c(rep(1:2, len)) + L <- as.matrix(sg) + gname <- rownames(sg) + x <- L + data <- list(x = x, y = y, geneid = gname) + if (quiet) { + suppressMessages( + invisible(capture.output({ + samr.obj <- sammy( + data, + resp.type = "Two class unpaired", + assay.type = "seq", + testStatistic = "wilcoxon", + random.seed = 15, + ... + ) + delta.table <- samr.compute.delta.table(samr.obj) + })) + ) + } else { + samr.obj <- sammy( + data, + resp.type = "Two class unpaired", + assay.type = "seq", + testStatistic = "wilcoxon", + random.seed = 15, + ... + ) + delta.table <- samr.compute.delta.table(samr.obj) + } + DEGsTable <- data.frame() + DEGsE <- vector() + DEGsS <- vector() + wm <- which.min(delta.table[, 5]) + if (delta.table[wm, 5] <= fdr) { + w <- which(delta.table[, 5] <= fdr) + if (is.null(w)) stop("No suitable deltas. Try a lower FDR value.") + delta <- delta.table[w[1], 1] - 0.001 + if (plot) { + samr.plot(samr.obj, delta) + title(paste("DEGs in the", Second, "in", First, "VS", Second)) + } + siggenes.table <- samr.compute.siggenes.table( + samr.obj, delta, data, delta.table + ) + # ------------------------------------------------------------------ + # Reformat siggenes.table as data.frame + # ------------------------------------------------------------------ + siggenes.table$genes.lo <- reformatSiggenes(siggenes.table$genes.lo) + siggenes.table$genes.up <- reformatSiggenes(siggenes.table$genes.up) + + FDRl <- as.numeric(siggenes.table$genes.lo[, 8]) / 100 + FDRu <- as.numeric(siggenes.table$genes.up[, 8]) / 100 + + siggenes.table$genes.lo[, 8] <- FDRl + siggenes.table$genes.up[, 8] <- FDRu + + DEGsTable[1, 1] <- paste0(First, " VS ", Second) + DEGsTable[1, 2] <- Second + DEGsTable[1, 3] <- length(FDRu) + DEGsTable[1, 4] <- paste0( + "Up-regulated-", name, Second, "in", First, "VS", Second, + ".csv" + ) + DEGsTable[1, 5] <- length(FDRl) + DEGsTable[1, 6] <- paste0( + "Low-regulated-", name, Second, "in", First, "VS", Second, + ".csv" + ) + DEGsTable[2, 1] <- paste0(First, " VS ", Second) + DEGsTable[2, 2] <- First + DEGsTable[2, 3] <- length(FDRu) + DEGsTable[2, 4] <- paste0( + "Low-regulated-", name, First, "in", First, "VS", Second, + ".csv" + ) + DEGsTable[2, 5] <- length(FDRl) + DEGsTable[2, 6] <- paste0( + "Up-regulated-", name, First, "in", First, "VS", Second, + ".csv" + ) + FinalDEGsL <- data.frame() + if (length(FDRl) > 0) { + genes <- siggenes.table$genes.lo[, 3] + if (quiet) { + suppressMessages( + geneList <- AnnotationDbi::select( + org.Hs.eg.db, + keys = keys(org.Hs.eg.db), + columns = c("SYMBOL", "ENSEMBL") + ) + ) + GL <- c(1, "MTRNR2", "ENSG00000210082") + GL1 <- c(1, "MTRNR1", "ENSG00000211459") + geneList <- rbind(geneList, GL, GL1) + } else { + geneList <- AnnotationDbi::select( + org.Hs.eg.db, + keys = keys(org.Hs.eg.db), + columns = c("SYMBOL", "ENSEMBL") + ) + GL <- c(1, "MTRNR2", "ENSG00000210082") + GL1 <- c(1, "MTRNR1", "ENSG00000211459") + geneList <- rbind(geneList, GL, GL1) + } + FinalDEGsL <- cbind(genes, siggenes.table$genes.lo) + gene_list <- geneList[, 3] + idx_genes <- is.element(gene_list, genes) + genes2 <- geneList[idx_genes, ] + if (!is.null(FinalDEGsL)) { + FinalDEGsL <- merge( + FinalDEGsL, + genes2, + by.x = "genes", + by.y = "ENSEMBL", + all.x = TRUE + ) + FinalDEGsL[, 3] <- FinalDEGsL[, 11] + FinalDEGsL <- FinalDEGsL[, c(-1, -10, -11)] + FinalDEGsL <- FinalDEGsL[order(FinalDEGsL[, 8]), ] + FinalDEGsL[is.na(FinalDEGsL[, 2]), c(2, 3)] <- + FinalDEGsL[is.na(FinalDEGsL[, 2]), 3] + } + if (export) { + message("The results of DEGs are saved in your directory") + message( + "Low-regulated genes in the ", Second, " in ", + First, " VS ", Second, "\n" + ) + write.csv( + FinalDEGsL, + file = paste0( + "Low-regulated-", name, Second, "in", First, + "VS", Second, ".csv" + ) + ) + write.csv( + FinalDEGsL, + file = paste0( + "Up-regulated-", name, First, "in", First, "VS", + Second, ".csv" + ) + ) + } + DEGsS <- c(DEGsS, FinalDEGsL[, 2]) + DEGsE <- c(DEGsE, as.character(FinalDEGsL[, 3])) + } + FinalDEGsU <- data.frame() + if (length(FDRu) > 0) { + genes <- siggenes.table$genes.up[, 3] + if (quiet) { + suppressMessages( + geneList <- AnnotationDbi::select( + org.Hs.eg.db, + keys = keys(org.Hs.eg.db), + columns = c("SYMBOL", "ENSEMBL") + ) + ) + GL <- c(1, "MTRNR2", "ENSG00000210082") + geneList <- rbind(geneList, GL) + } else { + geneList <- AnnotationDbi::select( + org.Hs.eg.db, + keys = keys(org.Hs.eg.db), + columns = c("SYMBOL", "ENSEMBL") + ) + GL <- c(1, "MTRNR2", "ENSG00000210082") + geneList <- rbind(geneList, GL) + } + FinalDEGsU <- cbind(genes, siggenes.table$genes.up) + gene_list <- geneList[, 3] + idx_genes <- is.element(gene_list, genes) + genes2 <- geneList[idx_genes, ] + if (!is.null(FinalDEGsU)) { + FinalDEGsU <- merge( + FinalDEGsU, + genes2, + by.x = "genes", + by.y = "ENSEMBL", + all.x = TRUE + ) + FinalDEGsU[, 3] <- FinalDEGsU[, 11] + FinalDEGsU <- FinalDEGsU[, c(-1, -10, -11)] + FinalDEGsU <- FinalDEGsU[order(FinalDEGsU[, 8]), ] + FinalDEGsU[is.na(FinalDEGsU[, 2]), c(2, 3)] <- + FinalDEGsU[is.na(FinalDEGsU[, 2]), 3] + } + if (export) { + message("The results of DEGs are saved in your directory") + message( + "Up-regulated genes in the ", Second, " in ", First, + " VS ", Second, "\n" + ) + write.csv( + FinalDEGsU, + file = paste0( + "Up-regulated-", name, Second, "in", First, "VS", + Second, ".csv" + ) + ) + write.csv( + FinalDEGsU, + file = paste0( + "Low-regulated-", name, First, "in", First, "VS", + Second, ".csv" + ) + ) + } + DEGsS <- c(DEGsS, FinalDEGsU[, 2]) + DEGsE <- c(DEGsE, as.character(FinalDEGsU[, 3])) + } + } else { + DEGsTable[1, 1] <- paste0(First, " VS ", Second) + DEGsTable[1, 2] <- Second + DEGsTable[1, 3] <- NA + DEGsTable[1, 4] <- NA + DEGsTable[1, 5] <- NA + DEGsTable[1, 6] <- NA + + DEGsTable[2, 1] <- paste0(First, " VS ", Second) + DEGsTable[2, 2] <- First + DEGsTable[2, 3] <- NA + DEGsTable[2, 4] <- NA + DEGsTable[2, 5] <- NA + DEGsTable[2, 6] <- NA + } + colnames(DEGsTable) <- c( + "Comparisons", + "Target cluster", + "Gene number", + "File name", + "Gene number", + "File name" + ) + if (!quiet) print(DEGsTable) + sigDEG <- cbind(DEGsE, DEGsS) + if (export) { + write.csv(DEGsTable, file = paste0(filename_deg, ".csv")) + write.csv(sigDEG, file = paste0(filename_sigdeg, ".csv")) + } + return( + list( + sigDEG = sigDEG, + DEGsTable = DEGsTable, + FinalDEGsL = FinalDEGsL, + FinalDEGsU = FinalDEGsU + ) + ) + } +)