|
a |
|
b/rna/differential/utils.R |
|
|
1 |
|
|
|
2 |
# Function to differential expression |
|
|
3 |
# - sce: SingleCellExperiment object with the column "group" in the colData |
|
|
4 |
# - groups: the names of the two groups |
|
|
5 |
# - min_detection_rate_per_group: minimum detection rate per group |
|
|
6 |
doDiffExpr <- function(sce, groups, min_detection_rate_per_group = 0.50) { |
|
|
7 |
|
|
|
8 |
# Sanity checks |
|
|
9 |
if (!is(sce, "SingleCellExperiment")) stop("'sce' has to be an instance of SingleCellExperiment") |
|
|
10 |
stopifnot(length(groups)==2) |
|
|
11 |
|
|
|
12 |
# Filter genes by detection rate per group |
|
|
13 |
cdr_A <- rowMeans(logcounts(sce[,sce$group==groups[1]])>0) >= min_detection_rate_per_group |
|
|
14 |
cdr_B <- rowMeans(logcounts(sce[,sce$group==groups[2]])>0) >= min_detection_rate_per_group |
|
|
15 |
out <- .edgeR(sce[cdr_B | cdr_A,]) |
|
|
16 |
|
|
|
17 |
return(out) |
|
|
18 |
} |
|
|
19 |
|
|
|
20 |
|
|
|
21 |
.edgeR <- function(sce) { |
|
|
22 |
|
|
|
23 |
# Convert SCE to DGEList |
|
|
24 |
sce_edger <- scran::convertTo(sce, type="edgeR") |
|
|
25 |
|
|
|
26 |
# Define design matrix (with intercept) |
|
|
27 |
cdr <- colMeans(logcounts(sce)>0) |
|
|
28 |
design <- model.matrix(~cdr+sce$group) |
|
|
29 |
|
|
|
30 |
# Estimate dispersions |
|
|
31 |
sce_edger <- estimateDisp(sce_edger,design) |
|
|
32 |
|
|
|
33 |
# Fit GLM |
|
|
34 |
fit <- glmQLFit(sce_edger,design) |
|
|
35 |
|
|
|
36 |
# Likelihood ratio test |
|
|
37 |
lrt <- glmQLFTest(fit) |
|
|
38 |
|
|
|
39 |
# Construct output data.frame |
|
|
40 |
out <- topTags(lrt, n=nrow(lrt))$table %>% as.data.table(keep.rownames=T) %>% |
|
|
41 |
setnames(c("gene","logFC","logCPM","LR","p.value","padj_fdr")) %>% |
|
|
42 |
.[,c("logCPM","LR","p.value"):=NULL] |
|
|
43 |
|
|
|
44 |
return(out) |
|
|
45 |
} |
|
|
46 |
|
|
|
47 |
################ |
|
|
48 |
## Plot utils ## |
|
|
49 |
################ |
|
|
50 |
|
|
|
51 |
|
|
|
52 |
gg_volcano_plot <- function(to.plot, top_genes=10, xlim=NULL, ylim=NULL, label_groups = NULL) { |
|
|
53 |
|
|
|
54 |
negative_hits <- to.plot[sig==TRUE & logFC<0,gene] |
|
|
55 |
positive_hits <- to.plot[sig==TRUE & logFC>0,gene] |
|
|
56 |
all <- nrow(to.plot) |
|
|
57 |
|
|
|
58 |
# if (is.null(xlim)) |
|
|
59 |
# xlim <- max(abs(to.plot$logFC), na.rm=T) |
|
|
60 |
# if (is.null(ylim)) |
|
|
61 |
# ylim <- max(-log10(to.plot$padj_fdr+1e-100), na.rm=T) |
|
|
62 |
|
|
|
63 |
to.plot <- to.plot[!is.na(logFC) & !is.na(padj_fdr)] |
|
|
64 |
|
|
|
65 |
p <- ggplot(to.plot, aes(x=logFC, y=-log10(padj_fdr+1e-100))) + |
|
|
66 |
labs(x="Log fold change", y=expression(paste("-log"[10],"(q.value)"))) + |
|
|
67 |
ggrastr::geom_point_rast(aes(color=sig, size=sig)) + |
|
|
68 |
# geom_hline(yintercept = -log10(opts$threshold_fdr), color="blue") + |
|
|
69 |
geom_segment(aes(x=0, xend=0, y=0, yend=105), color="orange", size=0.5) + |
|
|
70 |
scale_color_manual(values=c("black","red")) + |
|
|
71 |
scale_size_manual(values=c(0.5,1)) + |
|
|
72 |
scale_x_continuous(limits=c(-6,6)) + |
|
|
73 |
scale_y_continuous(limits=c(0,115)) + |
|
|
74 |
annotate("text", x=0, y=115, size=4, label=sprintf("(%d)", all)) + |
|
|
75 |
annotate("text", x=-5, y=115, size=4, label=sprintf("%d (-)",length(negative_hits))) + |
|
|
76 |
annotate("text", x=5, y=115, size=4, label=sprintf("%d (+)",length(positive_hits))) + |
|
|
77 |
ggrepel::geom_text_repel(data=head(to.plot[sig==T],n=top_genes), aes(x=logFC, y=-log10(padj_fdr+1e-100), label=gene), max.overlaps=Inf, size=4) + |
|
|
78 |
theme_classic() + |
|
|
79 |
theme( |
|
|
80 |
axis.text = element_text(size=rel(0.75), color='black'), |
|
|
81 |
axis.title = element_text(size=rel(1.0), color='black'), |
|
|
82 |
legend.position="none" |
|
|
83 |
) |
|
|
84 |
|
|
|
85 |
|
|
|
86 |
if (length(label_groups)>0) { |
|
|
87 |
p <- p + |
|
|
88 |
annotate("text", x=-4, y=0, size=4, label=sprintf("Up in %s",label_groups[2])) + |
|
|
89 |
annotate("text", x=4, y=0, size=4, label=sprintf("Up in %s",label_groups[1])) |
|
|
90 |
} |
|
|
91 |
|
|
|
92 |
return(p) |
|
|
93 |
} |
|
|
94 |
|