|
a |
|
b/R/DIscBIO-generic-DEGanalysis2clust.R |
|
|
1 |
#' @title Determining differentially expressed genes (DEGs) between two |
|
|
2 |
#' particular clusters. |
|
|
3 |
#' @description This function defines DEGs between particular clusters generated |
|
|
4 |
#' by either K-means or model based clustering. |
|
|
5 |
#' @param object \code{DISCBIO} class object. |
|
|
6 |
#' @param Clustering Clustering has to be one of the following: |
|
|
7 |
#' ["K-means","MB"]. Default is "K-means" |
|
|
8 |
#' @param K A numeric value of the number of clusters. |
|
|
9 |
#' @param fdr A numeric value of the false discovery rate. Default is 0.05. |
|
|
10 |
#' @param name A string vector showing the name to be used to save the resulted |
|
|
11 |
#' tables. |
|
|
12 |
#' @param First A string vector showing the first target cluster. Default is |
|
|
13 |
#' "CL1" |
|
|
14 |
#' @param Second A string vector showing the second target cluster. Default is |
|
|
15 |
#' "CL2" |
|
|
16 |
#' @param export A logical vector that allows writing the final gene list in |
|
|
17 |
#' excel file. Default is TRUE. |
|
|
18 |
#' @param quiet if `TRUE`, suppresses intermediate text output |
|
|
19 |
#' @param plot if `TRUE`, plots are generated |
|
|
20 |
#' @param filename_deg Name of the exported DEG table |
|
|
21 |
#' @param filename_sigdeg Name of the exported sigDEG table |
|
|
22 |
#' @param ... additional parameters to be passed to samr() |
|
|
23 |
#' @importFrom graphics title |
|
|
24 |
#' @importFrom utils write.csv capture.output |
|
|
25 |
#' @importFrom AnnotationDbi keys |
|
|
26 |
#' @return A list containing two tables. |
|
|
27 |
setGeneric( |
|
|
28 |
"DEGanalysis2clust", |
|
|
29 |
function(object, K, Clustering = "K-means", fdr = 0.05, name = "Name", |
|
|
30 |
First = "CL1", Second = "CL2", export = FALSE, quiet = FALSE, |
|
|
31 |
plot = TRUE, filename_deg = "DEGsTable", filename_sigdeg = "sigDEG", |
|
|
32 |
...) { |
|
|
33 |
standardGeneric("DEGanalysis2clust") |
|
|
34 |
} |
|
|
35 |
) |
|
|
36 |
|
|
|
37 |
#' @export |
|
|
38 |
#' @rdname DEGanalysis2clust |
|
|
39 |
setMethod( |
|
|
40 |
"DEGanalysis2clust", |
|
|
41 |
signature = "DISCBIO", |
|
|
42 |
definition = function( |
|
|
43 |
object, K, Clustering, fdr, name, First, Second, export, quiet, plot, |
|
|
44 |
filename_deg, filename_sigdeg, ... |
|
|
45 |
) { |
|
|
46 |
if (!(Clustering %in% c("K-means", "MB"))) { |
|
|
47 |
stop("Clustering has to be either K-means or MB") |
|
|
48 |
} |
|
|
49 |
gene_list <- object@FinalGeneList |
|
|
50 |
gene_names <- rownames(object@expdata) |
|
|
51 |
idx_genes <- is.element(gene_names, gene_list) |
|
|
52 |
gene_names2 <- gene_names[idx_genes] |
|
|
53 |
dataset <- object@expdata[gene_names2, ] |
|
|
54 |
Nam <- colnames(dataset) |
|
|
55 |
if (Clustering == "K-means") { |
|
|
56 |
Cluster_ID <- object@cpart |
|
|
57 |
if (length(object@cpart) < 1) { |
|
|
58 |
stop("run Clustexp before running DEGanalysis2clust") |
|
|
59 |
} |
|
|
60 |
} |
|
|
61 |
|
|
|
62 |
if (Clustering == "MB") { |
|
|
63 |
Cluster_ID <- object@MBclusters$clusterid |
|
|
64 |
if (length(object@MBclusters$clusterid) < 1) { |
|
|
65 |
stop("run ExprmclustMB before running DEGanalysis2clust") |
|
|
66 |
} |
|
|
67 |
} |
|
|
68 |
num <- seq_len(K) |
|
|
69 |
num1 <- paste("CL", num, sep = "") |
|
|
70 |
for (n in num) { |
|
|
71 |
Nam <- ifelse((Cluster_ID == n), num1[n], Nam) |
|
|
72 |
} |
|
|
73 |
colnames(dataset) <- Nam |
|
|
74 |
sg1 <- dataset[, which(colnames(dataset) == First)] |
|
|
75 |
sg2 <- dataset[, which(colnames(dataset) == Second)] |
|
|
76 |
sg <- cbind(sg1, sg2) |
|
|
77 |
|
|
|
78 |
sg3 <- factor( |
|
|
79 |
gsub(paste0("(", First, "|", Second, ").*"), "\\1", colnames(sg)), |
|
|
80 |
levels = c(paste0(First), paste0(Second)) |
|
|
81 |
) |
|
|
82 |
sg3 <- sg3[!is.na(sg3)] |
|
|
83 |
|
|
|
84 |
colnames(sg) <- sg3 |
|
|
85 |
len <- c( |
|
|
86 |
length(sg[, which(colnames(sg) == First)]), |
|
|
87 |
length(sg[, which(colnames(sg) == Second)]) |
|
|
88 |
) |
|
|
89 |
y <- c(rep(1:2, len)) |
|
|
90 |
L <- as.matrix(sg) |
|
|
91 |
gname <- rownames(sg) |
|
|
92 |
x <- L |
|
|
93 |
data <- list(x = x, y = y, geneid = gname) |
|
|
94 |
if (quiet) { |
|
|
95 |
suppressMessages( |
|
|
96 |
invisible(capture.output({ |
|
|
97 |
samr.obj <- sammy( |
|
|
98 |
data, |
|
|
99 |
resp.type = "Two class unpaired", |
|
|
100 |
assay.type = "seq", |
|
|
101 |
testStatistic = "wilcoxon", |
|
|
102 |
random.seed = 15, |
|
|
103 |
... |
|
|
104 |
) |
|
|
105 |
delta.table <- samr.compute.delta.table(samr.obj) |
|
|
106 |
})) |
|
|
107 |
) |
|
|
108 |
} else { |
|
|
109 |
samr.obj <- sammy( |
|
|
110 |
data, |
|
|
111 |
resp.type = "Two class unpaired", |
|
|
112 |
assay.type = "seq", |
|
|
113 |
testStatistic = "wilcoxon", |
|
|
114 |
random.seed = 15, |
|
|
115 |
... |
|
|
116 |
) |
|
|
117 |
delta.table <- samr.compute.delta.table(samr.obj) |
|
|
118 |
} |
|
|
119 |
DEGsTable <- data.frame() |
|
|
120 |
DEGsE <- vector() |
|
|
121 |
DEGsS <- vector() |
|
|
122 |
wm <- which.min(delta.table[, 5]) |
|
|
123 |
if (delta.table[wm, 5] <= fdr) { |
|
|
124 |
w <- which(delta.table[, 5] <= fdr) |
|
|
125 |
if (is.null(w)) stop("No suitable deltas. Try a lower FDR value.") |
|
|
126 |
delta <- delta.table[w[1], 1] - 0.001 |
|
|
127 |
if (plot) { |
|
|
128 |
samr.plot(samr.obj, delta) |
|
|
129 |
title(paste("DEGs in the", Second, "in", First, "VS", Second)) |
|
|
130 |
} |
|
|
131 |
siggenes.table <- samr.compute.siggenes.table( |
|
|
132 |
samr.obj, delta, data, delta.table |
|
|
133 |
) |
|
|
134 |
# ------------------------------------------------------------------ |
|
|
135 |
# Reformat siggenes.table as data.frame |
|
|
136 |
# ------------------------------------------------------------------ |
|
|
137 |
siggenes.table$genes.lo <- reformatSiggenes(siggenes.table$genes.lo) |
|
|
138 |
siggenes.table$genes.up <- reformatSiggenes(siggenes.table$genes.up) |
|
|
139 |
|
|
|
140 |
FDRl <- as.numeric(siggenes.table$genes.lo[, 8]) / 100 |
|
|
141 |
FDRu <- as.numeric(siggenes.table$genes.up[, 8]) / 100 |
|
|
142 |
|
|
|
143 |
siggenes.table$genes.lo[, 8] <- FDRl |
|
|
144 |
siggenes.table$genes.up[, 8] <- FDRu |
|
|
145 |
|
|
|
146 |
DEGsTable[1, 1] <- paste0(First, " VS ", Second) |
|
|
147 |
DEGsTable[1, 2] <- Second |
|
|
148 |
DEGsTable[1, 3] <- length(FDRu) |
|
|
149 |
DEGsTable[1, 4] <- paste0( |
|
|
150 |
"Up-regulated-", name, Second, "in", First, "VS", Second, |
|
|
151 |
".csv" |
|
|
152 |
) |
|
|
153 |
DEGsTable[1, 5] <- length(FDRl) |
|
|
154 |
DEGsTable[1, 6] <- paste0( |
|
|
155 |
"Low-regulated-", name, Second, "in", First, "VS", Second, |
|
|
156 |
".csv" |
|
|
157 |
) |
|
|
158 |
DEGsTable[2, 1] <- paste0(First, " VS ", Second) |
|
|
159 |
DEGsTable[2, 2] <- First |
|
|
160 |
DEGsTable[2, 3] <- length(FDRu) |
|
|
161 |
DEGsTable[2, 4] <- paste0( |
|
|
162 |
"Low-regulated-", name, First, "in", First, "VS", Second, |
|
|
163 |
".csv" |
|
|
164 |
) |
|
|
165 |
DEGsTable[2, 5] <- length(FDRl) |
|
|
166 |
DEGsTable[2, 6] <- paste0( |
|
|
167 |
"Up-regulated-", name, First, "in", First, "VS", Second, |
|
|
168 |
".csv" |
|
|
169 |
) |
|
|
170 |
FinalDEGsL <- data.frame() |
|
|
171 |
if (length(FDRl) > 0) { |
|
|
172 |
genes <- siggenes.table$genes.lo[, 3] |
|
|
173 |
if (quiet) { |
|
|
174 |
suppressMessages( |
|
|
175 |
geneList <- AnnotationDbi::select( |
|
|
176 |
org.Hs.eg.db, |
|
|
177 |
keys = keys(org.Hs.eg.db), |
|
|
178 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
179 |
) |
|
|
180 |
) |
|
|
181 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
182 |
GL1 <- c(1, "MTRNR1", "ENSG00000211459") |
|
|
183 |
geneList <- rbind(geneList, GL, GL1) |
|
|
184 |
} else { |
|
|
185 |
geneList <- AnnotationDbi::select( |
|
|
186 |
org.Hs.eg.db, |
|
|
187 |
keys = keys(org.Hs.eg.db), |
|
|
188 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
189 |
) |
|
|
190 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
191 |
GL1 <- c(1, "MTRNR1", "ENSG00000211459") |
|
|
192 |
geneList <- rbind(geneList, GL, GL1) |
|
|
193 |
} |
|
|
194 |
FinalDEGsL <- cbind(genes, siggenes.table$genes.lo) |
|
|
195 |
gene_list <- geneList[, 3] |
|
|
196 |
idx_genes <- is.element(gene_list, genes) |
|
|
197 |
genes2 <- geneList[idx_genes, ] |
|
|
198 |
if (!is.null(FinalDEGsL)) { |
|
|
199 |
FinalDEGsL <- merge( |
|
|
200 |
FinalDEGsL, |
|
|
201 |
genes2, |
|
|
202 |
by.x = "genes", |
|
|
203 |
by.y = "ENSEMBL", |
|
|
204 |
all.x = TRUE |
|
|
205 |
) |
|
|
206 |
FinalDEGsL[, 3] <- FinalDEGsL[, 11] |
|
|
207 |
FinalDEGsL <- FinalDEGsL[, c(-1, -10, -11)] |
|
|
208 |
FinalDEGsL <- FinalDEGsL[order(FinalDEGsL[, 8]), ] |
|
|
209 |
FinalDEGsL[is.na(FinalDEGsL[, 2]), c(2, 3)] <- |
|
|
210 |
FinalDEGsL[is.na(FinalDEGsL[, 2]), 3] |
|
|
211 |
} |
|
|
212 |
if (export) { |
|
|
213 |
message("The results of DEGs are saved in your directory") |
|
|
214 |
message( |
|
|
215 |
"Low-regulated genes in the ", Second, " in ", |
|
|
216 |
First, " VS ", Second, "\n" |
|
|
217 |
) |
|
|
218 |
write.csv( |
|
|
219 |
FinalDEGsL, |
|
|
220 |
file = paste0( |
|
|
221 |
"Low-regulated-", name, Second, "in", First, |
|
|
222 |
"VS", Second, ".csv" |
|
|
223 |
) |
|
|
224 |
) |
|
|
225 |
write.csv( |
|
|
226 |
FinalDEGsL, |
|
|
227 |
file = paste0( |
|
|
228 |
"Up-regulated-", name, First, "in", First, "VS", |
|
|
229 |
Second, ".csv" |
|
|
230 |
) |
|
|
231 |
) |
|
|
232 |
} |
|
|
233 |
DEGsS <- c(DEGsS, FinalDEGsL[, 2]) |
|
|
234 |
DEGsE <- c(DEGsE, as.character(FinalDEGsL[, 3])) |
|
|
235 |
} |
|
|
236 |
FinalDEGsU <- data.frame() |
|
|
237 |
if (length(FDRu) > 0) { |
|
|
238 |
genes <- siggenes.table$genes.up[, 3] |
|
|
239 |
if (quiet) { |
|
|
240 |
suppressMessages( |
|
|
241 |
geneList <- AnnotationDbi::select( |
|
|
242 |
org.Hs.eg.db, |
|
|
243 |
keys = keys(org.Hs.eg.db), |
|
|
244 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
245 |
) |
|
|
246 |
) |
|
|
247 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
248 |
geneList <- rbind(geneList, GL) |
|
|
249 |
} else { |
|
|
250 |
geneList <- AnnotationDbi::select( |
|
|
251 |
org.Hs.eg.db, |
|
|
252 |
keys = keys(org.Hs.eg.db), |
|
|
253 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
254 |
) |
|
|
255 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
256 |
geneList <- rbind(geneList, GL) |
|
|
257 |
} |
|
|
258 |
FinalDEGsU <- cbind(genes, siggenes.table$genes.up) |
|
|
259 |
gene_list <- geneList[, 3] |
|
|
260 |
idx_genes <- is.element(gene_list, genes) |
|
|
261 |
genes2 <- geneList[idx_genes, ] |
|
|
262 |
if (!is.null(FinalDEGsU)) { |
|
|
263 |
FinalDEGsU <- merge( |
|
|
264 |
FinalDEGsU, |
|
|
265 |
genes2, |
|
|
266 |
by.x = "genes", |
|
|
267 |
by.y = "ENSEMBL", |
|
|
268 |
all.x = TRUE |
|
|
269 |
) |
|
|
270 |
FinalDEGsU[, 3] <- FinalDEGsU[, 11] |
|
|
271 |
FinalDEGsU <- FinalDEGsU[, c(-1, -10, -11)] |
|
|
272 |
FinalDEGsU <- FinalDEGsU[order(FinalDEGsU[, 8]), ] |
|
|
273 |
FinalDEGsU[is.na(FinalDEGsU[, 2]), c(2, 3)] <- |
|
|
274 |
FinalDEGsU[is.na(FinalDEGsU[, 2]), 3] |
|
|
275 |
} |
|
|
276 |
if (export) { |
|
|
277 |
message("The results of DEGs are saved in your directory") |
|
|
278 |
message( |
|
|
279 |
"Up-regulated genes in the ", Second, " in ", First, |
|
|
280 |
" VS ", Second, "\n" |
|
|
281 |
) |
|
|
282 |
write.csv( |
|
|
283 |
FinalDEGsU, |
|
|
284 |
file = paste0( |
|
|
285 |
"Up-regulated-", name, Second, "in", First, "VS", |
|
|
286 |
Second, ".csv" |
|
|
287 |
) |
|
|
288 |
) |
|
|
289 |
write.csv( |
|
|
290 |
FinalDEGsU, |
|
|
291 |
file = paste0( |
|
|
292 |
"Low-regulated-", name, First, "in", First, "VS", |
|
|
293 |
Second, ".csv" |
|
|
294 |
) |
|
|
295 |
) |
|
|
296 |
} |
|
|
297 |
DEGsS <- c(DEGsS, FinalDEGsU[, 2]) |
|
|
298 |
DEGsE <- c(DEGsE, as.character(FinalDEGsU[, 3])) |
|
|
299 |
} |
|
|
300 |
} else { |
|
|
301 |
DEGsTable[1, 1] <- paste0(First, " VS ", Second) |
|
|
302 |
DEGsTable[1, 2] <- Second |
|
|
303 |
DEGsTable[1, 3] <- NA |
|
|
304 |
DEGsTable[1, 4] <- NA |
|
|
305 |
DEGsTable[1, 5] <- NA |
|
|
306 |
DEGsTable[1, 6] <- NA |
|
|
307 |
|
|
|
308 |
DEGsTable[2, 1] <- paste0(First, " VS ", Second) |
|
|
309 |
DEGsTable[2, 2] <- First |
|
|
310 |
DEGsTable[2, 3] <- NA |
|
|
311 |
DEGsTable[2, 4] <- NA |
|
|
312 |
DEGsTable[2, 5] <- NA |
|
|
313 |
DEGsTable[2, 6] <- NA |
|
|
314 |
} |
|
|
315 |
colnames(DEGsTable) <- c( |
|
|
316 |
"Comparisons", |
|
|
317 |
"Target cluster", |
|
|
318 |
"Gene number", |
|
|
319 |
"File name", |
|
|
320 |
"Gene number", |
|
|
321 |
"File name" |
|
|
322 |
) |
|
|
323 |
if (!quiet) print(DEGsTable) |
|
|
324 |
sigDEG <- cbind(DEGsE, DEGsS) |
|
|
325 |
if (export) { |
|
|
326 |
write.csv(DEGsTable, file = paste0(filename_deg, ".csv")) |
|
|
327 |
write.csv(sigDEG, file = paste0(filename_sigdeg, ".csv")) |
|
|
328 |
} |
|
|
329 |
return( |
|
|
330 |
list( |
|
|
331 |
sigDEG = sigDEG, |
|
|
332 |
DEGsTable = DEGsTable, |
|
|
333 |
FinalDEGsL = FinalDEGsL, |
|
|
334 |
FinalDEGsU = FinalDEGsU |
|
|
335 |
) |
|
|
336 |
) |
|
|
337 |
} |
|
|
338 |
) |