# Function to differential expression
# - sce: SingleCellExperiment object with the column "group" in the colData
# - groups: the names of the two groups
# - min_detection_rate_per_group: minimum detection rate per group
doDiffExpr <- function(sce, groups, min_detection_rate_per_group = 0.50) {
# Sanity checks
if (!is(sce, "SingleCellExperiment")) stop("'sce' has to be an instance of SingleCellExperiment")
stopifnot(length(groups)==2)
# Filter genes by detection rate per group
cdr_A <- rowMeans(logcounts(sce[,sce$group==groups[1]])>0) >= min_detection_rate_per_group
cdr_B <- rowMeans(logcounts(sce[,sce$group==groups[2]])>0) >= min_detection_rate_per_group
out <- .edgeR(sce[cdr_B | cdr_A,])
return(out)
}
.edgeR <- function(sce) {
# Convert SCE to DGEList
sce_edger <- scran::convertTo(sce, type="edgeR")
# Define design matrix (with intercept)
cdr <- colMeans(logcounts(sce)>0)
design <- model.matrix(~cdr+sce$group)
# Estimate dispersions
sce_edger <- estimateDisp(sce_edger,design)
# Fit GLM
fit <- glmQLFit(sce_edger,design)
# Likelihood ratio test
lrt <- glmQLFTest(fit)
# Construct output data.frame
out <- topTags(lrt, n=nrow(lrt))$table %>% as.data.table(keep.rownames=T) %>%
setnames(c("gene","logFC","logCPM","LR","p.value","padj_fdr")) %>%
.[,c("logCPM","LR","p.value"):=NULL]
return(out)
}
################
## Plot utils ##
################
gg_volcano_plot <- function(to.plot, top_genes=10, xlim=NULL, ylim=NULL, label_groups = NULL) {
negative_hits <- to.plot[sig==TRUE & logFC<0,gene]
positive_hits <- to.plot[sig==TRUE & logFC>0,gene]
all <- nrow(to.plot)
# if (is.null(xlim))
# xlim <- max(abs(to.plot$logFC), na.rm=T)
# if (is.null(ylim))
# ylim <- max(-log10(to.plot$padj_fdr+1e-100), na.rm=T)
to.plot <- to.plot[!is.na(logFC) & !is.na(padj_fdr)]
p <- ggplot(to.plot, aes(x=logFC, y=-log10(padj_fdr+1e-100))) +
labs(x="Log fold change", y=expression(paste("-log"[10],"(q.value)"))) +
ggrastr::geom_point_rast(aes(color=sig, size=sig)) +
# geom_hline(yintercept = -log10(opts$threshold_fdr), color="blue") +
geom_segment(aes(x=0, xend=0, y=0, yend=105), color="orange", size=0.5) +
scale_color_manual(values=c("black","red")) +
scale_size_manual(values=c(0.5,1)) +
scale_x_continuous(limits=c(-6,6)) +
scale_y_continuous(limits=c(0,115)) +
annotate("text", x=0, y=115, size=4, label=sprintf("(%d)", all)) +
annotate("text", x=-5, y=115, size=4, label=sprintf("%d (-)",length(negative_hits))) +
annotate("text", x=5, y=115, size=4, label=sprintf("%d (+)",length(positive_hits))) +
ggrepel::geom_text_repel(data=head(to.plot[sig==T],n=top_genes), aes(x=logFC, y=-log10(padj_fdr+1e-100), label=gene), max.overlaps=Inf, size=4) +
theme_classic() +
theme(
axis.text = element_text(size=rel(0.75), color='black'),
axis.title = element_text(size=rel(1.0), color='black'),
legend.position="none"
)
if (length(label_groups)>0) {
p <- p +
annotate("text", x=-4, y=0, size=4, label=sprintf("Up in %s",label_groups[2])) +
annotate("text", x=4, y=0, size=4, label=sprintf("Up in %s",label_groups[1]))
}
return(p)
}