Switch to unified view

a b/R/DIscBIO-generic-ClassVectoringDT.R
1
#' @title Generating a class vector to be used for the decision tree analysis.
2
#' @description This function generates a class vector for the input dataset so
3
#'   the decision tree analysis can be implemented afterwards.
4
#' @param object \code{DISCBIO} class object.
5
#' @param Clustering Clustering has to be one of the following: ["K-means",
6
#'   "MB"]. Default is "K-means"
7
#' @param K A numeric value of the number of clusters.
8
#' @param First A string vector showing the first target cluster.  Default is
9
#'   "CL1"
10
#' @param Second A string vector showing the second target cluster.  Default is
11
#'   "CL2"
12
#' @param sigDEG A data frame of the differentially expressed genes (DEGs)
13
#'   generated by running "DEGanalysis()" or "DEGanalysisM()".
14
#' @param quiet If `TRUE`, suppresses intermediary output
15
#' @return A data frame.
16
setGeneric(
17
  "ClassVectoringDT",
18
  function(object, Clustering = "K-means", K, First = "CL1", Second = "CL2",
19
           sigDEG, quiet = FALSE) {
20
    standardGeneric("ClassVectoringDT")
21
  }
22
)
23
24
#' @rdname ClassVectoringDT
25
#' @export
26
setMethod(
27
  "ClassVectoringDT",
28
  signature = "DISCBIO",
29
  definition = function(
30
    object, Clustering = "K-means", K, First = "CL1", Second = "CL2", sigDEG,
31
    quiet = FALSE
32
  ) {
33
    if (!(Clustering %in% c("K-means", "MB"))) {
34
      stop("Clustering has to be either K-means or MB")
35
    }
36
    if (length(sigDEG[, 1]) < 1) {
37
      stop(
38
        "run DEGanalysis or DEGanalysis2clust ",
39
        "before running ClassVectoringDT"
40
      )
41
    }
42
43
    if (Clustering == "K-means") {
44
      Cluster_ID <- object@cpart
45
    }
46
47
    if (Clustering == "MB") {
48
      Cluster_ID <- object@MBclusters$clusterid
49
    }
50
    Obj <- object@expdata
51
    SC <- DISCBIO(Obj)
52
    SC <- Normalizedata(SC)
53
    DatasetForDT <- SC@fdata
54
    Nam <- colnames(DatasetForDT)
55
    num <- 1:K
56
    num1 <- paste("CL", num, sep = "")
57
    for (n in num) {
58
      Nam <- ifelse((Cluster_ID == n), num1[n], Nam)
59
    }
60
    colnames(DatasetForDT) <- Nam
61
    chosenColumns <- which(
62
      colnames(DatasetForDT) == First |
63
        colnames(DatasetForDT) == Second
64
    )
65
    sg1 <- DatasetForDT[, chosenColumns]
66
    dim(sg1)
67
    # Creating a dataset that includes only the DEGs
68
    gene_list <- sigDEG[, 1]
69
    gene_names <- rownames(DatasetForDT)
70
    idx_genes <- is.element(gene_names, gene_list)
71
    gene_names2 <- gene_names[idx_genes]
72
    DEGsfilteredDataset <- sg1[gene_names2, ]
73
    if (!quiet) {
74
      message(
75
        "The DEGs filtered normalized dataset contains:\n",
76
        "Genes: ", length(DEGsfilteredDataset[, 1]), "\n",
77
        "cells: ", length(DEGsfilteredDataset[1, ])
78
      )
79
    }
80
    G_list <- sigDEG
81
    genes <- rownames(DEGsfilteredDataset)
82
    DATAforDT <- cbind(genes, DEGsfilteredDataset)
83
84
    DATAforDT <- merge(DATAforDT, G_list, by.x = "genes", by.y = "DEGsE")
85
    DATAforDT
86
    DATAforDT[, 1] <- DATAforDT[, length(DATAforDT[1, ])]
87
    DATAforDT <- DATAforDT[!duplicated(DATAforDT[, 1]), ]
88
89
    rownames(DATAforDT) <- DATAforDT[, 1]
90
    DATAforDT <- DATAforDT[, c(-1, -length(DATAforDT[1, ]))]
91
    sg <- factor(gsub(
92
      paste0("(", First, "|", Second, ").*"),
93
      "\\1",
94
      colnames(DATAforDT)
95
    ), levels = c(paste0(First), paste0(Second)))
96
    sg <- sg[!is.na(sg)]
97
    colnames(DATAforDT) <- sg
98
    return(DATAforDT)
99
  }
100
)