|
a |
|
b/R/DIscBIO-generic-ClassVectoringDT.R |
|
|
1 |
#' @title Generating a class vector to be used for the decision tree analysis. |
|
|
2 |
#' @description This function generates a class vector for the input dataset so |
|
|
3 |
#' the decision tree analysis can be implemented afterwards. |
|
|
4 |
#' @param object \code{DISCBIO} class object. |
|
|
5 |
#' @param Clustering Clustering has to be one of the following: ["K-means", |
|
|
6 |
#' "MB"]. Default is "K-means" |
|
|
7 |
#' @param K A numeric value of the number of clusters. |
|
|
8 |
#' @param First A string vector showing the first target cluster. Default is |
|
|
9 |
#' "CL1" |
|
|
10 |
#' @param Second A string vector showing the second target cluster. Default is |
|
|
11 |
#' "CL2" |
|
|
12 |
#' @param sigDEG A data frame of the differentially expressed genes (DEGs) |
|
|
13 |
#' generated by running "DEGanalysis()" or "DEGanalysisM()". |
|
|
14 |
#' @param quiet If `TRUE`, suppresses intermediary output |
|
|
15 |
#' @return A data frame. |
|
|
16 |
setGeneric( |
|
|
17 |
"ClassVectoringDT", |
|
|
18 |
function(object, Clustering = "K-means", K, First = "CL1", Second = "CL2", |
|
|
19 |
sigDEG, quiet = FALSE) { |
|
|
20 |
standardGeneric("ClassVectoringDT") |
|
|
21 |
} |
|
|
22 |
) |
|
|
23 |
|
|
|
24 |
#' @rdname ClassVectoringDT |
|
|
25 |
#' @export |
|
|
26 |
setMethod( |
|
|
27 |
"ClassVectoringDT", |
|
|
28 |
signature = "DISCBIO", |
|
|
29 |
definition = function( |
|
|
30 |
object, Clustering = "K-means", K, First = "CL1", Second = "CL2", sigDEG, |
|
|
31 |
quiet = FALSE |
|
|
32 |
) { |
|
|
33 |
if (!(Clustering %in% c("K-means", "MB"))) { |
|
|
34 |
stop("Clustering has to be either K-means or MB") |
|
|
35 |
} |
|
|
36 |
if (length(sigDEG[, 1]) < 1) { |
|
|
37 |
stop( |
|
|
38 |
"run DEGanalysis or DEGanalysis2clust ", |
|
|
39 |
"before running ClassVectoringDT" |
|
|
40 |
) |
|
|
41 |
} |
|
|
42 |
|
|
|
43 |
if (Clustering == "K-means") { |
|
|
44 |
Cluster_ID <- object@cpart |
|
|
45 |
} |
|
|
46 |
|
|
|
47 |
if (Clustering == "MB") { |
|
|
48 |
Cluster_ID <- object@MBclusters$clusterid |
|
|
49 |
} |
|
|
50 |
Obj <- object@expdata |
|
|
51 |
SC <- DISCBIO(Obj) |
|
|
52 |
SC <- Normalizedata(SC) |
|
|
53 |
DatasetForDT <- SC@fdata |
|
|
54 |
Nam <- colnames(DatasetForDT) |
|
|
55 |
num <- 1:K |
|
|
56 |
num1 <- paste("CL", num, sep = "") |
|
|
57 |
for (n in num) { |
|
|
58 |
Nam <- ifelse((Cluster_ID == n), num1[n], Nam) |
|
|
59 |
} |
|
|
60 |
colnames(DatasetForDT) <- Nam |
|
|
61 |
chosenColumns <- which( |
|
|
62 |
colnames(DatasetForDT) == First | |
|
|
63 |
colnames(DatasetForDT) == Second |
|
|
64 |
) |
|
|
65 |
sg1 <- DatasetForDT[, chosenColumns] |
|
|
66 |
dim(sg1) |
|
|
67 |
# Creating a dataset that includes only the DEGs |
|
|
68 |
gene_list <- sigDEG[, 1] |
|
|
69 |
gene_names <- rownames(DatasetForDT) |
|
|
70 |
idx_genes <- is.element(gene_names, gene_list) |
|
|
71 |
gene_names2 <- gene_names[idx_genes] |
|
|
72 |
DEGsfilteredDataset <- sg1[gene_names2, ] |
|
|
73 |
if (!quiet) { |
|
|
74 |
message( |
|
|
75 |
"The DEGs filtered normalized dataset contains:\n", |
|
|
76 |
"Genes: ", length(DEGsfilteredDataset[, 1]), "\n", |
|
|
77 |
"cells: ", length(DEGsfilteredDataset[1, ]) |
|
|
78 |
) |
|
|
79 |
} |
|
|
80 |
G_list <- sigDEG |
|
|
81 |
genes <- rownames(DEGsfilteredDataset) |
|
|
82 |
DATAforDT <- cbind(genes, DEGsfilteredDataset) |
|
|
83 |
|
|
|
84 |
DATAforDT <- merge(DATAforDT, G_list, by.x = "genes", by.y = "DEGsE") |
|
|
85 |
DATAforDT |
|
|
86 |
DATAforDT[, 1] <- DATAforDT[, length(DATAforDT[1, ])] |
|
|
87 |
DATAforDT <- DATAforDT[!duplicated(DATAforDT[, 1]), ] |
|
|
88 |
|
|
|
89 |
rownames(DATAforDT) <- DATAforDT[, 1] |
|
|
90 |
DATAforDT <- DATAforDT[, c(-1, -length(DATAforDT[1, ]))] |
|
|
91 |
sg <- factor(gsub( |
|
|
92 |
paste0("(", First, "|", Second, ").*"), |
|
|
93 |
"\\1", |
|
|
94 |
colnames(DATAforDT) |
|
|
95 |
), levels = c(paste0(First), paste0(Second))) |
|
|
96 |
sg <- sg[!is.na(sg)] |
|
|
97 |
colnames(DATAforDT) <- sg |
|
|
98 |
return(DATAforDT) |
|
|
99 |
} |
|
|
100 |
) |