|
a |
|
b/R/DIscBIO-generic-ClustDiffGenes.R |
|
|
1 |
#' @title ClustDiffGenes |
|
|
2 |
#' @description Creates a table of cluster differences |
|
|
3 |
#' @param object \code{DISCBIO} class object. |
|
|
4 |
#' @param K A numeric value of the number of clusters. |
|
|
5 |
#' @param pValue A numeric value of the p-value. Default is 0.05. |
|
|
6 |
#' @param fdr A numeric value of the false discovery rate. Default is 0.01. |
|
|
7 |
#' @param export A logical vector that allows writing the final gene list in |
|
|
8 |
#' excel file. Default is TRUE. |
|
|
9 |
#' @param quiet if `TRUE`, suppresses intermediate text output |
|
|
10 |
#' @param filename_up Name of the exported "up" file (if `export=TRUE`) |
|
|
11 |
#' @param filename_down Name of the exported "down" file (if `export=TRUE`) |
|
|
12 |
#' @param filename_binom Name of the exported binomial file |
|
|
13 |
#' @param filename_sigdeg Name of the exported sigDEG file |
|
|
14 |
#' @importFrom stats pbinom median |
|
|
15 |
#' @import org.Hs.eg.db |
|
|
16 |
#' @rdname ClustDiffGenes |
|
|
17 |
#' @return A list containing two tables. |
|
|
18 |
#' @export |
|
|
19 |
#' @examples |
|
|
20 |
#' sc <- DISCBIO(valuesG1msTest) |
|
|
21 |
#' sc <- Clustexp(sc, cln = 3, quiet = TRUE) |
|
|
22 |
#' cdiff <- ClustDiffGenes(sc, K = 3, fdr = .3, export = FALSE) |
|
|
23 |
#' str(cdiff) |
|
|
24 |
#' cdiff[[2]] |
|
|
25 |
setGeneric( |
|
|
26 |
"ClustDiffGenes", |
|
|
27 |
function( |
|
|
28 |
object, K, pValue = 0.05, fdr = .01, export = FALSE, quiet = FALSE, |
|
|
29 |
filename_up = "Up-DEG-cluster", |
|
|
30 |
filename_down = "Down-DEG-cluster", |
|
|
31 |
filename_binom = "binomial-DEGsTable", |
|
|
32 |
filename_sigdeg = "binomial-sigDEG") { |
|
|
33 |
standardGeneric("ClustDiffGenes") |
|
|
34 |
} |
|
|
35 |
) |
|
|
36 |
#' @export |
|
|
37 |
#' @rdname ClustDiffGenes |
|
|
38 |
setMethod( |
|
|
39 |
"ClustDiffGenes", |
|
|
40 |
signature = "DISCBIO", |
|
|
41 |
definition = function( |
|
|
42 |
object, K, pValue, fdr, export, quiet, filename_up, filename_down, |
|
|
43 |
filename_binom, filename_sigdeg) { |
|
|
44 |
# ====================================================================== |
|
|
45 |
# Validating |
|
|
46 |
# ====================================================================== |
|
|
47 |
ran_k <- length(object@kmeans$kpart) > 0 |
|
|
48 |
ran_m <- length(object@MBclusters) > 0 |
|
|
49 |
if (!is.numeric(fdr)) { |
|
|
50 |
stop("fdr has to be a number between 0 and 1") |
|
|
51 |
} else if (fdr < 0 | fdr > 1) { |
|
|
52 |
stop("fdr has to be a number between 0 and 1") |
|
|
53 |
} |
|
|
54 |
if (!is.numeric(pValue)) { |
|
|
55 |
stop("pValue has to be a number between 0 and 1") |
|
|
56 |
} else if (pValue < 0 | pValue > 1) { |
|
|
57 |
stop("pValue has to be a number between 0 and 1") |
|
|
58 |
} |
|
|
59 |
cdiff <- list() |
|
|
60 |
x <- object@ndata |
|
|
61 |
y <- object@expdata[, names(object@ndata)] |
|
|
62 |
if (ran_k) { |
|
|
63 |
part <- object@kmeans$kpart |
|
|
64 |
} else if (ran_m) { |
|
|
65 |
part <- object@MBclusters$clusterid |
|
|
66 |
} else { |
|
|
67 |
stop("Run clustering before running this function") |
|
|
68 |
} |
|
|
69 |
# ====================================================================== |
|
|
70 |
# Operating |
|
|
71 |
# ====================================================================== |
|
|
72 |
for (i in 1:max(part)) { |
|
|
73 |
if (sum(part == i) == 0) { |
|
|
74 |
next |
|
|
75 |
} |
|
|
76 |
m <- apply(x, 1, mean) |
|
|
77 |
n <- |
|
|
78 |
if (sum(part == i) > 1) { |
|
|
79 |
apply(x[, part == i], 1, mean) |
|
|
80 |
} else { |
|
|
81 |
x[, part == i] |
|
|
82 |
} |
|
|
83 |
no <- |
|
|
84 |
if (sum(part == i) > 1) { |
|
|
85 |
median(apply(y[, part == i], 2, sum)) / |
|
|
86 |
median(apply(x[, part == i], 2, sum)) |
|
|
87 |
} else { |
|
|
88 |
sum(y[, part == i]) / sum(x[, part == i]) |
|
|
89 |
} |
|
|
90 |
m <- m * no |
|
|
91 |
n <- n * no |
|
|
92 |
pv <- binompval(m / sum(m), sum(n), n) |
|
|
93 |
d <- |
|
|
94 |
data.frame( |
|
|
95 |
mean.all = m, |
|
|
96 |
mean.cl = n, |
|
|
97 |
fc = n / m, |
|
|
98 |
pv = pv |
|
|
99 |
)[order(pv, decreasing = FALSE), ] |
|
|
100 |
cdiff[[i]] <- d[d$pv < pValue, ] |
|
|
101 |
} |
|
|
102 |
DEGsE <- vector() |
|
|
103 |
DEGsS <- vector() |
|
|
104 |
DEGsTable <- data.frame() |
|
|
105 |
|
|
|
106 |
for (n in 1:K) { |
|
|
107 |
if (length(cdiff[[n]][, 1]) == 0) { |
|
|
108 |
next |
|
|
109 |
} |
|
|
110 |
|
|
|
111 |
if (length(cdiff[[n]][, 1]) > 0) { |
|
|
112 |
p.adj <- p.adjust(cdiff[[n]][, 4], method = "bonferroni") |
|
|
113 |
out <- cbind(cdiff[[n]], p.adj) |
|
|
114 |
out <- subset(out, out[, 5] < fdr) |
|
|
115 |
if (length(out[, 1]) > 0) { |
|
|
116 |
Regulation <- vector() |
|
|
117 |
for (i in seq_len(length(out[, 1]))) { |
|
|
118 |
if (out[i, 1] > out[i, 2]) { |
|
|
119 |
Regulation[i] <- "Down" |
|
|
120 |
} else { |
|
|
121 |
Regulation[i] <- "Up" |
|
|
122 |
} |
|
|
123 |
} |
|
|
124 |
out <- cbind(out, Regulation) |
|
|
125 |
if (quiet) { |
|
|
126 |
suppressMessages( |
|
|
127 |
geneList <- |
|
|
128 |
AnnotationDbi::select( |
|
|
129 |
org.Hs.eg.db, |
|
|
130 |
keys = keys(org.Hs.eg.db), |
|
|
131 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
132 |
) |
|
|
133 |
) |
|
|
134 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
135 |
GL1 <- c(1, "MTRNR1", "ENSG00000211459") |
|
|
136 |
geneList <- rbind(geneList, GL, GL1) |
|
|
137 |
} else { |
|
|
138 |
geneList <- |
|
|
139 |
AnnotationDbi::select( |
|
|
140 |
org.Hs.eg.db, |
|
|
141 |
keys = keys(org.Hs.eg.db), |
|
|
142 |
columns = c("SYMBOL", "ENSEMBL") |
|
|
143 |
) |
|
|
144 |
GL <- c(1, "MTRNR2", "ENSG00000210082") |
|
|
145 |
GL1 <- c(1, "MTRNR1", "ENSG00000211459") |
|
|
146 |
geneList <- rbind(geneList, GL, GL1) |
|
|
147 |
} |
|
|
148 |
genes <- rownames(out) |
|
|
149 |
gene_list <- geneList[, 3] |
|
|
150 |
idx_genes <- is.element(gene_list, genes) |
|
|
151 |
genes2 <- geneList[idx_genes, ] |
|
|
152 |
Final <- cbind(genes, out) |
|
|
153 |
|
|
|
154 |
Final <- |
|
|
155 |
merge( |
|
|
156 |
Final, |
|
|
157 |
genes2, |
|
|
158 |
by.x = "genes", |
|
|
159 |
by.y = "ENSEMBL", |
|
|
160 |
all.x = TRUE |
|
|
161 |
) |
|
|
162 |
Final <- Final[!duplicated(Final[, 1]), ] |
|
|
163 |
Final[is.na(Final[, 9]), c(1, 9)] <- |
|
|
164 |
Final[is.na(Final[, 9]), 1] |
|
|
165 |
rownames(Final) <- Final[, 1] |
|
|
166 |
Final[, 1] <- Final[, 9] |
|
|
167 |
Final <- Final[, -9] |
|
|
168 |
DEGsS <- c(DEGsS, Final[, 1]) |
|
|
169 |
DEGsE <- |
|
|
170 |
c(DEGsE, as.character(rownames(Final))) |
|
|
171 |
Up <- subset(Final, Final[, 7] == "Up") |
|
|
172 |
cols_to_keep <- c( |
|
|
173 |
"Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj" |
|
|
174 |
) |
|
|
175 |
Up <- Up[, cols_to_keep] |
|
|
176 |
Up[, 3] <- rownames(Up) |
|
|
177 |
Up[, 6] <- log2(Up[, 6]) |
|
|
178 |
Up[, 1] <- Up[, 2] |
|
|
179 |
colnames(Up) <- c( |
|
|
180 |
"Genes", |
|
|
181 |
"genes", |
|
|
182 |
"E.genes", |
|
|
183 |
"mean.all", |
|
|
184 |
"mean.cl", |
|
|
185 |
"log2.fc", |
|
|
186 |
"p.adj" |
|
|
187 |
) |
|
|
188 |
if (export) { |
|
|
189 |
write.csv( |
|
|
190 |
Up, |
|
|
191 |
file = paste0(filename_up, n, ".csv") |
|
|
192 |
) |
|
|
193 |
} |
|
|
194 |
|
|
|
195 |
Down <- subset(Final, Final[, 7] == "Down") |
|
|
196 |
cols_to_keep <- c( |
|
|
197 |
"Regulation", "genes", "pv", "mean.all", "mean.cl", "fc", "p.adj" |
|
|
198 |
) |
|
|
199 |
Down <- Down[, cols_to_keep] |
|
|
200 |
Down[, 3] <- rownames(Down) |
|
|
201 |
Down[, 6] <- log2(Down[, 6]) |
|
|
202 |
Down[, 1] <- Down[, 2] |
|
|
203 |
colnames(Down) <- c( |
|
|
204 |
"Genes", |
|
|
205 |
"genes", |
|
|
206 |
"E.genes", |
|
|
207 |
"mean.all", |
|
|
208 |
"mean.cl", |
|
|
209 |
"log2.fc", |
|
|
210 |
"p.adj" |
|
|
211 |
) |
|
|
212 |
if (export) { |
|
|
213 |
write.csv( |
|
|
214 |
Down, |
|
|
215 |
file = paste0(filename_down, n, ".csv") |
|
|
216 |
) |
|
|
217 |
} |
|
|
218 |
|
|
|
219 |
sigDEG <- cbind(DEGsE, DEGsS) |
|
|
220 |
if (export) { |
|
|
221 |
write.csv( |
|
|
222 |
sigDEG, |
|
|
223 |
file = paste0(filename_sigdeg, ".csv") |
|
|
224 |
) |
|
|
225 |
} |
|
|
226 |
|
|
|
227 |
DEGsTable[n, 1] <- paste0("Cluster ", n) |
|
|
228 |
DEGsTable[n, 2] <- "Remaining Clusters" |
|
|
229 |
DEGsTable[n, 3] <- length(Up[, 1]) |
|
|
230 |
DEGsTable[n, 4] <- paste0(filename_up, n, ".csv") |
|
|
231 |
DEGsTable[n, 5] <- length(Down[, 1]) |
|
|
232 |
DEGsTable[n, 6] <- paste0(filename_down, n, ".csv") |
|
|
233 |
} |
|
|
234 |
} |
|
|
235 |
} |
|
|
236 |
if (length(DEGsTable) > 0) { |
|
|
237 |
colnames(DEGsTable) <- c( |
|
|
238 |
"Target Cluster", "VS", "Gene number", "File name", |
|
|
239 |
"Gene number", "File name" |
|
|
240 |
) |
|
|
241 |
if (export) { |
|
|
242 |
write.csv(DEGsTable, file = paste0(filename_binom, ".csv")) |
|
|
243 |
} |
|
|
244 |
return(list(sigDEG, DEGsTable)) |
|
|
245 |
} else { |
|
|
246 |
print(paste("There are no DEGs with fdr =", fdr)) |
|
|
247 |
} |
|
|
248 |
} |
|
|
249 |
) |