--- a
+++ b/rna/differential/utils.R
@@ -0,0 +1,94 @@
+
+# Function to differential expression
+# - sce: SingleCellExperiment object with the column "group" in the colData
+# - groups: the names of the two groups
+# - min_detection_rate_per_group: minimum detection rate per group
+doDiffExpr <- function(sce, groups, min_detection_rate_per_group = 0.50) {
+    
+  # Sanity checks
+  if (!is(sce, "SingleCellExperiment")) stop("'sce' has to be an instance of SingleCellExperiment")
+  stopifnot(length(groups)==2)
+
+  # Filter genes by detection rate per group
+  cdr_A <- rowMeans(logcounts(sce[,sce$group==groups[1]])>0) >= min_detection_rate_per_group
+  cdr_B <- rowMeans(logcounts(sce[,sce$group==groups[2]])>0) >= min_detection_rate_per_group
+  out <- .edgeR(sce[cdr_B | cdr_A,])
+  
+  return(out)
+}
+
+
+.edgeR <- function(sce) {
+  
+  # Convert SCE to DGEList
+  sce_edger <- scran::convertTo(sce, type="edgeR")
+  
+  # Define design matrix (with intercept)
+  cdr <- colMeans(logcounts(sce)>0)
+  design <- model.matrix(~cdr+sce$group)
+  
+  # Estimate dispersions
+  sce_edger  <- estimateDisp(sce_edger,design)
+  
+  # Fit GLM
+  fit <- glmQLFit(sce_edger,design)
+  
+  # Likelihood ratio test
+  lrt <- glmQLFTest(fit)
+  
+  # Construct output data.frame
+  out <- topTags(lrt, n=nrow(lrt))$table %>% as.data.table(keep.rownames=T) %>%
+    setnames(c("gene","logFC","logCPM","LR","p.value","padj_fdr")) %>%
+    .[,c("logCPM","LR","p.value"):=NULL]
+  
+  return(out)
+}
+
+################
+## Plot utils ##
+################
+
+
+gg_volcano_plot <- function(to.plot, top_genes=10, xlim=NULL, ylim=NULL, label_groups = NULL) {
+  
+  negative_hits <- to.plot[sig==TRUE & logFC<0,gene]
+  positive_hits <- to.plot[sig==TRUE & logFC>0,gene]
+  all <- nrow(to.plot)
+  
+  # if (is.null(xlim))
+  #   xlim <- max(abs(to.plot$logFC), na.rm=T)
+  # if (is.null(ylim))
+  #   ylim <- max(-log10(to.plot$padj_fdr+1e-100), na.rm=T)
+  
+  to.plot <- to.plot[!is.na(logFC) & !is.na(padj_fdr)]
+  
+  p <- ggplot(to.plot, aes(x=logFC, y=-log10(padj_fdr+1e-100))) +
+    labs(x="Log fold change", y=expression(paste("-log"[10],"(q.value)"))) +
+    ggrastr::geom_point_rast(aes(color=sig, size=sig)) +
+    # geom_hline(yintercept = -log10(opts$threshold_fdr), color="blue") +
+    geom_segment(aes(x=0, xend=0, y=0, yend=105), color="orange", size=0.5) +
+    scale_color_manual(values=c("black","red")) +
+    scale_size_manual(values=c(0.5,1)) +
+    scale_x_continuous(limits=c(-6,6)) +
+    scale_y_continuous(limits=c(0,115)) +
+    annotate("text", x=0, y=115, size=4, label=sprintf("(%d)", all)) +
+    annotate("text", x=-5, y=115, size=4, label=sprintf("%d (-)",length(negative_hits))) +
+    annotate("text", x=5, y=115, size=4, label=sprintf("%d (+)",length(positive_hits))) +
+    ggrepel::geom_text_repel(data=head(to.plot[sig==T],n=top_genes), aes(x=logFC, y=-log10(padj_fdr+1e-100), label=gene), max.overlaps=Inf, size=4) +
+    theme_classic() +
+    theme(
+      axis.text = element_text(size=rel(0.75), color='black'),
+      axis.title = element_text(size=rel(1.0), color='black'),
+      legend.position="none"
+    )
+  
+  
+  if (length(label_groups)>0) {
+    p <- p +
+      annotate("text", x=-4, y=0, size=4, label=sprintf("Up in %s",label_groups[2])) +
+      annotate("text", x=4, y=0, size=4, label=sprintf("Up in %s",label_groups[1]))
+  }
+  
+  return(p)
+}
+