Switch to unified view

a b/R/DIscBIO-generic-ClustDiffGenes.R
1
#' @title ClustDiffGenes
2
#' @description Creates a table of cluster differences
3
#' @param object \code{DISCBIO} class object.
4
#' @param K A numeric value of the number of clusters.
5
#' @param pValue A numeric value of the p-value. Default is 0.05.
6
#' @param fdr A numeric value of the false discovery rate. Default is 0.01.
7
#' @param export A logical vector that allows writing the final gene list in
8
#'   excel file. Default is TRUE.
9
#' @param quiet if `TRUE`, suppresses intermediate text output
10
#' @param filename_up Name of the exported "up" file (if `export=TRUE`)
11
#' @param filename_down Name of the exported "down" file (if `export=TRUE`)
12
#' @param filename_binom Name of the exported binomial file
13
#' @param filename_sigdeg Name of the exported sigDEG file
14
#' @importFrom stats pbinom median
15
#' @import org.Hs.eg.db
16
#' @rdname ClustDiffGenes
17
#' @return A list containing two tables.
18
#' @export
19
#' @examples
20
#' sc <- DISCBIO(valuesG1msTest)
21
#' sc <- Clustexp(sc, cln = 3, quiet = TRUE)
22
#' cdiff <- ClustDiffGenes(sc, K = 3, fdr = .3, export = FALSE)
23
#' str(cdiff)
24
#' cdiff[[2]]
25
setGeneric(
26
  "ClustDiffGenes",
27
  function(
28
      object, K, pValue = 0.05, fdr = .01, export = FALSE, quiet = FALSE,
29
      filename_up = "Up-DEG-cluster",
30
      filename_down = "Down-DEG-cluster",
31
      filename_binom = "binomial-DEGsTable",
32
      filename_sigdeg = "binomial-sigDEG") {
33
    standardGeneric("ClustDiffGenes")
34
  }
35
)
36
#' @export
37
#' @rdname ClustDiffGenes
38
setMethod(
39
  "ClustDiffGenes",
40
  signature = "DISCBIO",
41
  definition = function(
42
      object, K, pValue, fdr, export, quiet, filename_up, filename_down,
43
      filename_binom, filename_sigdeg) {
44
    # ======================================================================
45
    # Validating
46
    # ======================================================================
47
    ran_k <- length(object@kmeans$kpart) > 0
48
    ran_m <- length(object@MBclusters) > 0
49
    if (!is.numeric(fdr)) {
50
      stop("fdr has to be a number between 0 and 1")
51
    } else if (fdr < 0 | fdr > 1) {
52
      stop("fdr has to be a number between 0 and 1")
53
    }
54
    if (!is.numeric(pValue)) {
55
      stop("pValue has to be a number between 0 and 1")
56
    } else if (pValue < 0 | pValue > 1) {
57
      stop("pValue has to be a number between 0 and 1")
58
    }
59
    cdiff <- list()
60
    x <- object@ndata
61
    y <- object@expdata[, names(object@ndata)]
62
    if (ran_k) {
63
      part <- object@kmeans$kpart
64
    } else if (ran_m) {
65
      part <- object@MBclusters$clusterid
66
    } else {
67
      stop("Run clustering before running this function")
68
    }
69
    # ======================================================================
70
    # Operating
71
    # ======================================================================
72
    for (i in 1:max(part)) {
73
      if (sum(part == i) == 0) {
74
        next
75
      }
76
      m <- apply(x, 1, mean)
77
      n <-
78
        if (sum(part == i) > 1) {
79
          apply(x[, part == i], 1, mean)
80
        } else {
81
          x[, part == i]
82
        }
83
      no <-
84
        if (sum(part == i) > 1) {
85
          median(apply(y[, part == i], 2, sum)) /
86
            median(apply(x[, part == i], 2, sum))
87
        } else {
88
          sum(y[, part == i]) / sum(x[, part == i])
89
        }
90
      m <- m * no
91
      n <- n * no
92
      pv <- binompval(m / sum(m), sum(n), n)
93
      d <-
94
        data.frame(
95
          mean.all = m,
96
          mean.cl = n,
97
          fc = n / m,
98
          pv = pv
99
        )[order(pv, decreasing = FALSE), ]
100
      cdiff[[i]] <- d[d$pv < pValue, ]
101
    }
102
    DEGsE <- vector()
103
    DEGsS <- vector()
104
    DEGsTable <- data.frame()
105
106
    for (n in 1:K) {
107
      if (length(cdiff[[n]][, 1]) == 0) {
108
        next
109
      }
110
111
      if (length(cdiff[[n]][, 1]) > 0) {
112
        p.adj <- p.adjust(cdiff[[n]][, 4], method = "bonferroni")
113
        out <- cbind(cdiff[[n]], p.adj)
114
        out <- subset(out, out[, 5] < fdr)
115
        if (length(out[, 1]) > 0) {
116
          Regulation <- vector()
117
          for (i in seq_len(length(out[, 1]))) {
118
            if (out[i, 1] > out[i, 2]) {
119
              Regulation[i] <- "Down"
120
            } else {
121
              Regulation[i] <- "Up"
122
            }
123
          }
124
          out <- cbind(out, Regulation)
125
          if (quiet) {
126
            suppressMessages(
127
              geneList <-
128
                AnnotationDbi::select(
129
                  org.Hs.eg.db,
130
                  keys = keys(org.Hs.eg.db),
131
                  columns = c("SYMBOL", "ENSEMBL")
132
                )
133
            )
134
            GL <- c(1, "MTRNR2", "ENSG00000210082")
135
            GL1 <- c(1, "MTRNR1", "ENSG00000211459")
136
            geneList <- rbind(geneList, GL, GL1)
137
          } else {
138
            geneList <-
139
              AnnotationDbi::select(
140
                org.Hs.eg.db,
141
                keys = keys(org.Hs.eg.db),
142
                columns = c("SYMBOL", "ENSEMBL")
143
              )
144
            GL <- c(1, "MTRNR2", "ENSG00000210082")
145
            GL1 <- c(1, "MTRNR1", "ENSG00000211459")
146
            geneList <- rbind(geneList, GL, GL1)
147
          }
148
          genes <- rownames(out)
149
          gene_list <- geneList[, 3]
150
          idx_genes <- is.element(gene_list, genes)
151
          genes2 <- geneList[idx_genes, ]
152
          Final <- cbind(genes, out)
153
154
          Final <-
155
            merge(
156
              Final,
157
              genes2,
158
              by.x = "genes",
159
              by.y = "ENSEMBL",
160
              all.x = TRUE
161
            )
162
          Final <- Final[!duplicated(Final[, 1]), ]
163
          Final[is.na(Final[, 9]), c(1, 9)] <-
164
            Final[is.na(Final[, 9]), 1]
165
          rownames(Final) <- Final[, 1]
166
          Final[, 1] <- Final[, 9]
167
          Final <- Final[, -9]
168
          DEGsS <- c(DEGsS, Final[, 1])
169
          DEGsE <-
170
            c(DEGsE, as.character(rownames(Final)))
171
          Up <- subset(Final, Final[, 7] == "Up")
172
          cols_to_keep <- c(
173
            "Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj"
174
          )
175
          Up <- Up[, cols_to_keep]
176
          Up[, 3] <- rownames(Up)
177
          Up[, 6] <- log2(Up[, 6])
178
          Up[, 1] <- Up[, 2]
179
          colnames(Up) <- c(
180
            "Genes",
181
            "genes",
182
            "E.genes",
183
            "mean.all",
184
            "mean.cl",
185
            "log2.fc",
186
            "p.adj"
187
          )
188
          if (export) {
189
            write.csv(
190
              Up,
191
              file = paste0(filename_up, n, ".csv")
192
            )
193
          }
194
195
          Down <- subset(Final, Final[, 7] == "Down")
196
          cols_to_keep <- c(
197
            "Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj"
198
          )
199
          Down <- Down[, cols_to_keep]
200
          Down[, 3] <- rownames(Down)
201
          Down[, 6] <- log2(Down[, 6])
202
          Down[, 1] <- Down[, 2]
203
          colnames(Down) <- c(
204
            "Genes",
205
            "genes",
206
            "E.genes",
207
            "mean.all",
208
            "mean.cl",
209
            "log2.fc",
210
            "p.adj"
211
          )
212
          if (export) {
213
            write.csv(
214
              Down,
215
              file = paste0(filename_down, n, ".csv")
216
            )
217
          }
218
219
          sigDEG <- cbind(DEGsE, DEGsS)
220
          if (export) {
221
            write.csv(
222
              sigDEG,
223
              file = paste0(filename_sigdeg, ".csv")
224
            )
225
          }
226
227
          DEGsTable[n, 1] <- paste0("Cluster ", n)
228
          DEGsTable[n, 2] <- "Remaining Clusters"
229
          DEGsTable[n, 3] <- length(Up[, 1])
230
          DEGsTable[n, 4] <- paste0(filename_up, n, ".csv")
231
          DEGsTable[n, 5] <- length(Down[, 1])
232
          DEGsTable[n, 6] <- paste0(filename_down, n, ".csv")
233
        }
234
      }
235
    }
236
    if (length(DEGsTable) > 0) {
237
      colnames(DEGsTable) <- c(
238
        "Target Cluster", "VS", "Gene number", "File name",
239
        "Gene number", "File name"
240
      )
241
      if (export) {
242
        write.csv(DEGsTable, file = paste0(filename_binom, ".csv"))
243
      }
244
      return(list(sigDEG, DEGsTable))
245
    } else {
246
      print(paste("There are no DEGs with fdr =", fdr))
247
    }
248
  }
249
)