Switch to unified view

a b/R/DIscBIO-generic-DEGanalysis.R
1
#' @title Determining differentially expressed genes (DEGs) between all
2
#'   individual clusters.
3
#' @description This function defines DEGs between all individual clusters
4
#'   generated by either K-means or model based clustering.
5
#' @param object \code{DISCBIO} class object.
6
#' @param Clustering Clustering has to be one of the following:
7
#'   ["K-means","MB"]. Default is "K-means"
8
#' @param K A numeric value of the number of clusters.
9
#' @param fdr A numeric value of the false discovery rate. Default is 0.05.
10
#' @param name A string vector showing the name to be used to save the resulted
11
#'   tables.
12
#' @param export A logical vector that allows writing the final gene list in
13
#'   excel file. Default is TRUE.
14
#' @param quiet if `TRUE`, suppresses intermediate text output
15
#' @param plot if `TRUE`, plots are generated
16
#' @param filename_deg Name of the exported DEG table
17
#' @param filename_sigdeg Name of the exported sigDEG table
18
#' @importFrom graphics title
19
#' @importFrom utils write.csv capture.output
20
#' @param ... additional parameters to be passed to samr()
21
#' @return A list containing two tables.
22
setGeneric(
23
  name = "DEGanalysis",
24
  def = function(object,
25
                 K,
26
                 Clustering = "K-means",
27
                 fdr = 0.05,
28
                 name = "Name",
29
                 export = FALSE,
30
                 quiet = FALSE,
31
                 plot = TRUE,
32
                 filename_deg = "DEGsTable",
33
                 filename_sigdeg = "sigDEG",
34
                 ...) {
35
    standardGeneric("DEGanalysis")
36
  }
37
)
38
39
#' @export
40
#' @rdname DEGanalysis
41
setMethod(
42
  f = "DEGanalysis",
43
  signature = "DISCBIO",
44
  definition = function(object,
45
                        K,
46
                        Clustering = "K-means",
47
                        fdr = 0.05,
48
                        name = "Name",
49
                        export = FALSE,
50
                        quiet = FALSE,
51
                        plot = TRUE,
52
                        filename_deg,
53
                        filename_sigdeg,
54
                        ...) {
55
    # Validation
56
    if (!(Clustering %in% c("K-means", "MB"))) {
57
      stop("Clustering has to be either K-means or MB")
58
    }
59
    if (Clustering == "K-means") {
60
      Cluster_ID <- object@cpart
61
      if (length(object@cpart) < 1) {
62
        stop("run Clustexp before running DEGanalysisM")
63
      }
64
    }
65
    if (Clustering == "MB") {
66
      Cluster_ID <- object@MBclusters$clusterid
67
      if (length(object@MBclusters$clusterid) < 1) {
68
        stop("run ExprmclustMB before running DEGanalysisM")
69
      }
70
    }
71
72
    # Defining initial objects
73
    gene_list <- object@FinalGeneList
74
    gene_names <- rownames(object@expdata)
75
    idx_genes <- is.element(gene_names, gene_list)
76
    gene_names2 <- gene_names[idx_genes]
77
    dataset <- object@expdata[gene_names2, ]
78
    Nam <- colnames(dataset)
79
    num <- 1:K
80
    num1 <- paste("CL", num, sep = "")
81
82
    for (n in num) {
83
      Nam <- ifelse((Cluster_ID == n), num1[n], Nam)
84
    }
85
    colnames(dataset) <- Nam
86
87
    if (!quiet) {
88
      message("The dataset is ready for differential expression analysis")
89
    }
90
    num1 <- paste("CL", num, sep = "")
91
    clustName <- paste("Cl", num, sep = "")
92
    ClusterLength <- vector()
93
    for (i in num) {
94
      d1 <- clustName[i]
95
      d2 <- dataset[, which(names(dataset) == num1[i])]
96
      ClusterLength[i] <- length(d2)
97
      assign(d1, d2)
98
    }
99
    ccdf <- data.frame(clustName, ClusterLength)
100
    ccdff <- ccdf[order(ClusterLength), ]
101
    clustName <- ccdff[, 1]
102
    if (!quiet) {
103
      print(clustName)
104
    }
105
    first <- vector()
106
    second <- vector()
107
    if (K < 2) {
108
      stop("K has to be at least 2")
109
    } else if (K == 2) {
110
      first <- c(paste0(clustName[1]))
111
      second <- c(paste0(clustName[2]))
112
    } else if (K == 3) {
113
      first <- c(
114
        rep(paste0(clustName[1]), 2), rep(paste0(clustName[2]), 1)
115
      )
116
      second <- c(
117
        paste0(clustName[2]),
118
        paste0(clustName[3]),
119
        paste0(clustName[3])
120
      )
121
    } else if (K == 4) {
122
      first <- c(
123
        rep(paste0(clustName[1]), 3),
124
        rep(paste0(clustName[2]), 1),
125
        rep(paste0(clustName[4]), 1),
126
        rep(paste0(clustName[3]), 1)
127
      )
128
      second <- c(
129
        paste0(clustName[2]),
130
        paste0(clustName[3]),
131
        paste0(clustName[4]),
132
        paste0(clustName[3]),
133
        paste0(clustName[2]),
134
        paste0(clustName[4])
135
      )
136
    } else if (K == 5) {
137
      first <- c(
138
        rep(paste0(clustName[1]), 4),
139
        rep(paste0(clustName[2]), 3),
140
        rep(paste0(clustName[3]), 2),
141
        rep(paste0(clustName[5]), 1)
142
      )
143
      second <- c(
144
        paste0(clustName[2]),
145
        paste0(clustName[3]),
146
        paste0(clustName[4]),
147
        paste0(clustName[5]),
148
        paste0(clustName[3]),
149
        paste0(clustName[4]),
150
        paste0(clustName[5]),
151
        paste0(clustName[4]),
152
        paste0(clustName[5]),
153
        paste0(clustName[4])
154
      )
155
    } else if (K == 6) {
156
      first <- c(
157
        rep(paste0(clustName[1]), 3),
158
        rep(paste0(clustName[5]), 1),
159
        rep(paste0(clustName[1]), 1),
160
        rep(paste0(clustName[2]), 2),
161
        rep(paste0(clustName[5]), 1),
162
        rep(paste0(clustName[2]), 1),
163
        rep(paste0(clustName[3]), 1),
164
        rep(paste0(clustName[5]), 1),
165
        rep(paste0(clustName[3]), 1),
166
        rep(paste0(clustName[5]), 1),
167
        rep(paste0(clustName[4]), 1),
168
        rep(paste0(clustName[5]), 1)
169
      )
170
      second <- c(
171
        paste0(clustName[2]),
172
        paste0(clustName[3]),
173
        paste0(clustName[4]),
174
        paste0(clustName[1]),
175
        paste0(clustName[6]),
176
        paste0(clustName[3]),
177
        paste0(clustName[4]),
178
        paste0(clustName[2]),
179
        paste0(clustName[6]),
180
        paste0(clustName[4]),
181
        paste0(clustName[3]),
182
        paste0(clustName[6]),
183
        paste0(clustName[4]),
184
        paste0(clustName[6]),
185
        paste0(clustName[6])
186
      )
187
    }
188
    o <- 1:K
189
    oo <- o[-length(o)]
190
    com <- sum(oo)
191
    if (!quiet) message("Number of comparisons: ", com * 2, "\n")
192
    comNUM <- paste("comp", seq_len(com), sep = "")
193
    DEGsTable <- data.frame()
194
    DEGsE <- vector()
195
    DEGsS <- vector()
196
    for (i in seq_len(com)) {
197
      FinalDEGsL <- data.frame()
198
      FinalDEGsU <- data.frame()
199
      FDRl <- vector()
200
      FDRu <- vector()
201
202
      d1 <- comNUM[i]
203
      d2 <- cbind(get(first[i]), get(second[i]))
204
      assign(d1, d2)
205
      len <-
206
        c(length(get(first[i])[1, ]), length(get(second[i])[1, ]))
207
      y <- c(rep(1:2, len))
208
      L <- as.matrix(get(comNUM[i]))
209
      gname <- rownames(get(comNUM[i]))
210
      x <- L
211
      data <- list(x = x, y = y, geneid = gname)
212
      if (quiet) {
213
        invisible(capture.output(
214
          samr.obj <- sammy(
215
            data,
216
            resp.type = "Two class unpaired",
217
            assay.type = "seq",
218
            testStatistic = "wilcoxon",
219
            random.seed = 15,
220
            ...
221
          )
222
        ))
223
      } else {
224
        samr.obj <- sammy(
225
          data,
226
          resp.type = "Two class unpaired",
227
          assay.type = "seq",
228
          testStatistic = "wilcoxon",
229
          random.seed = 15,
230
          ...
231
        )
232
      }
233
      if (quiet) {
234
        invisible(capture.output(
235
          delta.table <- samr.compute.delta.table(samr.obj)
236
        ))
237
      } else {
238
        delta.table <- samr.compute.delta.table(samr.obj)
239
      }
240
      wm <- which.min(delta.table[, 5])
241
      if (delta.table[wm, 5] <= fdr) {
242
        w <- which(delta.table[, 5] <= fdr)
243
        delta <- delta.table[w[1], 1] - 0.001
244
245
        if (plot) {
246
          samr.plot(samr.obj, delta)
247
          title(paste0(
248
            "DEGs in the ", second[i], " in ", first[i], " VS ",
249
            second[i]
250
          ))
251
        }
252
        siggenes.table <- samr.compute.siggenes.table(
253
          samr.obj, delta, data, delta.table
254
        )
255
        FDRl <-
256
          as.numeric(siggenes.table$genes.lo[, 8]) / 100
257
        FDRu <-
258
          as.numeric(siggenes.table$genes.up[, 8]) / 100
259
260
        siggenes.table$genes.lo[, 8] <- FDRl
261
        siggenes.table$genes.up[, 8] <- FDRu
262
263
        DEGsTable[i, 1] <-
264
          paste0(first[i], " VS ", second[i])
265
        DEGsTable[i, 2] <- second[i]
266
        DEGsTable[i, 3] <- length(FDRu)
267
        DEGsTable[i, 4] <- paste0(
268
          "Up-regulated-", name, second[i], "in", first[i], "VS",
269
          second[i], ".csv"
270
        )
271
        DEGsTable[i, 5] <- length(FDRl)
272
        DEGsTable[i, 6] <- paste0(
273
          "Low-regulated-", name, second[i], "in", first[i], "VS",
274
          second[i], ".csv"
275
        )
276
        DEGsTable[i + com, 1] <-
277
          paste0(first[i], " VS ", second[i])
278
        DEGsTable[i + com, 2] <- first[i]
279
        DEGsTable[i + com, 3] <- length(FDRu)
280
        DEGsTable[i + com, 4] <- paste0(
281
          "Low-regulated-", name, first[i], "in", first[i], "VS",
282
          second[i], ".csv"
283
        )
284
        DEGsTable[i + com, 5] <- length(FDRl)
285
        DEGsTable[i + com, 6] <- paste0(
286
          "Up-regulated-", name, first[i], "in", first[i], "VS",
287
          second[i], ".csv"
288
        )
289
290
        if (length(FDRl) > 0) {
291
          genes <- siggenes.table$genes.lo[, 3]
292
          if (quiet) {
293
            suppressMessages(
294
              geneList <-
295
                AnnotationDbi::select(
296
                  org.Hs.eg.db,
297
                  keys = keys(org.Hs.eg.db),
298
                  columns = c("SYMBOL", "ENSEMBL")
299
                )
300
            )
301
            GL <- c(1, "MTRNR2", "ENSG00000210082")
302
            geneList <- rbind(geneList, GL)
303
          } else {
304
            geneList <-
305
              AnnotationDbi::select(
306
                org.Hs.eg.db,
307
                keys = keys(org.Hs.eg.db),
308
                columns = c("SYMBOL", "ENSEMBL")
309
              )
310
            GL <- c(1, "MTRNR2", "ENSG00000210082")
311
            geneList <- rbind(geneList, GL)
312
          }
313
          FinalDEGsL <- cbind(genes, siggenes.table$genes.lo)
314
          gene_list <- geneList[, 3]
315
          idx_genes <- is.element(gene_list, genes)
316
          genes2 <- geneList[idx_genes, ]
317
          FinalDEGsL <- merge(
318
            FinalDEGsL,
319
            genes2,
320
            by.x = "genes",
321
            by.y = "ENSEMBL",
322
            all.x = TRUE
323
          )
324
          FinalDEGsL[, 3] <- FinalDEGsL[, 11]
325
          FinalDEGsL <- FinalDEGsL[, c(-1, -10, -11)]
326
          FinalDEGsL <- FinalDEGsL[order(FinalDEGsL[, 8]), ]
327
          FinalDEGsL[is.na(FinalDEGsL[, 2]), c(2, 3)] <-
328
            FinalDEGsL[is.na(FinalDEGsL[, 2]), 3]
329
          if (!quiet) {
330
            message(
331
              "Low-regulated genes in the ", second[i],
332
              " in ", first[i], " VS ", second[i], "\n"
333
            )
334
          }
335
          if (export) {
336
            write.csv(
337
              FinalDEGsL,
338
              file = paste0(
339
                "Low-regulated-", name, second[i], "in",
340
                first[i], "VS", second[i], ".csv"
341
              )
342
            )
343
            write.csv(
344
              FinalDEGsL,
345
              file = paste0(
346
                "Up-regulated-", name, first[i], "in", first[i],
347
                "VS", second[i], ".csv"
348
              )
349
            )
350
          }
351
          DEGsS <- c(DEGsS, FinalDEGsL[, 2])
352
          DEGsE <- c(DEGsE, as.character(FinalDEGsL[, 3]))
353
        }
354
        if (length(FDRu) > 0) {
355
          genes <- siggenes.table$genes.up[, 3]
356
          if (quiet) {
357
            suppressMessages(
358
              geneList <-
359
                AnnotationDbi::select(
360
                  org.Hs.eg.db,
361
                  keys = keys(org.Hs.eg.db),
362
                  columns = c("SYMBOL", "ENSEMBL")
363
                )
364
            )
365
            GL <- c(1, "MTRNR2", "ENSG00000210082")
366
            GL1 <- c(1, "MTRNR1", "ENSG00000211459")
367
            geneList <- rbind(geneList, GL, GL1)
368
          } else {
369
            geneList <-
370
              AnnotationDbi::select(
371
                org.Hs.eg.db,
372
                keys = keys(org.Hs.eg.db),
373
                columns = c("SYMBOL", "ENSEMBL")
374
              )
375
            GL <- c(1, "MTRNR2", "ENSG00000210082")
376
            GL1 <- c(1, "MTRNR1", "ENSG00000211459")
377
            geneList <- rbind(geneList, GL, GL1)
378
          }
379
          FinalDEGsU <- cbind(genes, siggenes.table$genes.up)
380
          gene_list <- geneList[, 3]
381
          idx_genes <- is.element(gene_list, genes)
382
          genes2 <- geneList[idx_genes, ]
383
          FinalDEGsU <- merge(
384
            FinalDEGsU, genes2,
385
            by.x = "genes",
386
            by.y = "ENSEMBL", all.x = TRUE
387
          )
388
          FinalDEGsU[, 3] <- FinalDEGsU[, 11]
389
          FinalDEGsU <- FinalDEGsU[, c(-1, -10, -11)]
390
          FinalDEGsU <- FinalDEGsU[order(FinalDEGsU[, 8]), ]
391
          FinalDEGsU[is.na(FinalDEGsU[, 2]), c(2, 3)] <-
392
            FinalDEGsU[is.na(FinalDEGsU[, 2]), 3]
393
          if (!quiet) {
394
            message(
395
              "Up-regulated genes in the ", second[i], " in ",
396
              first[i], " VS ", second[i], "\n"
397
            )
398
          }
399
          if (export) {
400
            write.csv(
401
              FinalDEGsU,
402
              file = paste0(
403
                "Up-regulated-", name, second[i], "in",
404
                first[i], "VS", second[i], ".csv"
405
              )
406
            )
407
            write.csv(
408
              FinalDEGsU,
409
              file = paste0(
410
                "Low-regulated-", name, first[i], "in",
411
                first[i], "VS", second[i], ".csv"
412
              )
413
            )
414
          }
415
          DEGsS <- c(DEGsS, FinalDEGsU[, 2])
416
          DEGsE <- c(DEGsE, as.character(FinalDEGsU[, 3]))
417
        }
418
      } else {
419
        DEGsTable[i, 1] <- paste0(first[i], " VS ", second[i])
420
        DEGsTable[i, 2] <- second[i]
421
        DEGsTable[i, 3] <- NA
422
        DEGsTable[i, 4] <- NA
423
        DEGsTable[i, 5] <- NA
424
        DEGsTable[i, 6] <- NA
425
426
        DEGsTable[i + com, 1] <- paste0(first[i], " VS ", second[i])
427
        DEGsTable[i + com, 2] <- first[i]
428
        DEGsTable[i + com, 3] <- NA
429
        DEGsTable[i + com, 4] <- NA
430
        DEGsTable[i + com, 5] <- NA
431
        DEGsTable[i + com, 6] <- NA
432
      }
433
    }
434
435
    if (!quiet & export) {
436
      message("The results of DEGs are saved in your directory")
437
    }
438
    colnames(DEGsTable) <- c(
439
      "Comparisons", "Target cluster", "Gene number", "File name",
440
      "Gene number", "File name"
441
    )
442
    if (export) write.csv(DEGsTable, file = paste0(filename_deg, ".csv"))
443
    if (!quiet) print(DEGsTable)
444
    sigDEG <- cbind(DEGsE, DEGsS)
445
    if (export) write.csv(sigDEG, file = paste0(filename_sigdeg, ".csv"))
446
    return(list(sigDEG, DEGsTable))
447
  }
448
)