[381c22]: / R / DIscBIO-generic-ClassVectoringDT.R

Download this file

101 lines (95 with data), 3.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
#' @title Generating a class vector to be used for the decision tree analysis.
#' @description This function generates a class vector for the input dataset so
#' the decision tree analysis can be implemented afterwards.
#' @param object \code{DISCBIO} class object.
#' @param Clustering Clustering has to be one of the following: ["K-means",
#' "MB"]. Default is "K-means"
#' @param K A numeric value of the number of clusters.
#' @param First A string vector showing the first target cluster. Default is
#' "CL1"
#' @param Second A string vector showing the second target cluster. Default is
#' "CL2"
#' @param sigDEG A data frame of the differentially expressed genes (DEGs)
#' generated by running "DEGanalysis()" or "DEGanalysisM()".
#' @param quiet If `TRUE`, suppresses intermediary output
#' @return A data frame.
setGeneric(
"ClassVectoringDT",
function(object, Clustering = "K-means", K, First = "CL1", Second = "CL2",
sigDEG, quiet = FALSE) {
standardGeneric("ClassVectoringDT")
}
)
#' @rdname ClassVectoringDT
#' @export
setMethod(
"ClassVectoringDT",
signature = "DISCBIO",
definition = function(
object, Clustering = "K-means", K, First = "CL1", Second = "CL2", sigDEG,
quiet = FALSE
) {
if (!(Clustering %in% c("K-means", "MB"))) {
stop("Clustering has to be either K-means or MB")
}
if (length(sigDEG[, 1]) < 1) {
stop(
"run DEGanalysis or DEGanalysis2clust ",
"before running ClassVectoringDT"
)
}
if (Clustering == "K-means") {
Cluster_ID <- object@cpart
}
if (Clustering == "MB") {
Cluster_ID <- object@MBclusters$clusterid
}
Obj <- object@expdata
SC <- DISCBIO(Obj)
SC <- Normalizedata(SC)
DatasetForDT <- SC@fdata
Nam <- colnames(DatasetForDT)
num <- 1:K
num1 <- paste("CL", num, sep = "")
for (n in num) {
Nam <- ifelse((Cluster_ID == n), num1[n], Nam)
}
colnames(DatasetForDT) <- Nam
chosenColumns <- which(
colnames(DatasetForDT) == First |
colnames(DatasetForDT) == Second
)
sg1 <- DatasetForDT[, chosenColumns]
dim(sg1)
# Creating a dataset that includes only the DEGs
gene_list <- sigDEG[, 1]
gene_names <- rownames(DatasetForDT)
idx_genes <- is.element(gene_names, gene_list)
gene_names2 <- gene_names[idx_genes]
DEGsfilteredDataset <- sg1[gene_names2, ]
if (!quiet) {
message(
"The DEGs filtered normalized dataset contains:\n",
"Genes: ", length(DEGsfilteredDataset[, 1]), "\n",
"cells: ", length(DEGsfilteredDataset[1, ])
)
}
G_list <- sigDEG
genes <- rownames(DEGsfilteredDataset)
DATAforDT <- cbind(genes, DEGsfilteredDataset)
DATAforDT <- merge(DATAforDT, G_list, by.x = "genes", by.y = "DEGsE")
DATAforDT
DATAforDT[, 1] <- DATAforDT[, length(DATAforDT[1, ])]
DATAforDT <- DATAforDT[!duplicated(DATAforDT[, 1]), ]
rownames(DATAforDT) <- DATAforDT[, 1]
DATAforDT <- DATAforDT[, c(-1, -length(DATAforDT[1, ]))]
sg <- factor(gsub(
paste0("(", First, "|", Second, ").*"),
"\\1",
colnames(DATAforDT)
), levels = c(paste0(First), paste0(Second)))
sg <- sg[!is.na(sg)]
colnames(DATAforDT) <- sg
return(DATAforDT)
}
)