Diff of /rna/differential/utils.R [000000] .. [fe0e8b]

Switch to unified view

a b/rna/differential/utils.R
1
2
# Function to differential expression
3
# - sce: SingleCellExperiment object with the column "group" in the colData
4
# - groups: the names of the two groups
5
# - min_detection_rate_per_group: minimum detection rate per group
6
doDiffExpr <- function(sce, groups, min_detection_rate_per_group = 0.50) {
7
    
8
  # Sanity checks
9
  if (!is(sce, "SingleCellExperiment")) stop("'sce' has to be an instance of SingleCellExperiment")
10
  stopifnot(length(groups)==2)
11
12
  # Filter genes by detection rate per group
13
  cdr_A <- rowMeans(logcounts(sce[,sce$group==groups[1]])>0) >= min_detection_rate_per_group
14
  cdr_B <- rowMeans(logcounts(sce[,sce$group==groups[2]])>0) >= min_detection_rate_per_group
15
  out <- .edgeR(sce[cdr_B | cdr_A,])
16
  
17
  return(out)
18
}
19
20
21
.edgeR <- function(sce) {
22
  
23
  # Convert SCE to DGEList
24
  sce_edger <- scran::convertTo(sce, type="edgeR")
25
  
26
  # Define design matrix (with intercept)
27
  cdr <- colMeans(logcounts(sce)>0)
28
  design <- model.matrix(~cdr+sce$group)
29
  
30
  # Estimate dispersions
31
  sce_edger  <- estimateDisp(sce_edger,design)
32
  
33
  # Fit GLM
34
  fit <- glmQLFit(sce_edger,design)
35
  
36
  # Likelihood ratio test
37
  lrt <- glmQLFTest(fit)
38
  
39
  # Construct output data.frame
40
  out <- topTags(lrt, n=nrow(lrt))$table %>% as.data.table(keep.rownames=T) %>%
41
    setnames(c("gene","logFC","logCPM","LR","p.value","padj_fdr")) %>%
42
    .[,c("logCPM","LR","p.value"):=NULL]
43
  
44
  return(out)
45
}
46
47
################
48
## Plot utils ##
49
################
50
51
52
gg_volcano_plot <- function(to.plot, top_genes=10, xlim=NULL, ylim=NULL, label_groups = NULL) {
53
  
54
  negative_hits <- to.plot[sig==TRUE & logFC<0,gene]
55
  positive_hits <- to.plot[sig==TRUE & logFC>0,gene]
56
  all <- nrow(to.plot)
57
  
58
  # if (is.null(xlim))
59
  #   xlim <- max(abs(to.plot$logFC), na.rm=T)
60
  # if (is.null(ylim))
61
  #   ylim <- max(-log10(to.plot$padj_fdr+1e-100), na.rm=T)
62
  
63
  to.plot <- to.plot[!is.na(logFC) & !is.na(padj_fdr)]
64
  
65
  p <- ggplot(to.plot, aes(x=logFC, y=-log10(padj_fdr+1e-100))) +
66
    labs(x="Log fold change", y=expression(paste("-log"[10],"(q.value)"))) +
67
    ggrastr::geom_point_rast(aes(color=sig, size=sig)) +
68
    # geom_hline(yintercept = -log10(opts$threshold_fdr), color="blue") +
69
    geom_segment(aes(x=0, xend=0, y=0, yend=105), color="orange", size=0.5) +
70
    scale_color_manual(values=c("black","red")) +
71
    scale_size_manual(values=c(0.5,1)) +
72
    scale_x_continuous(limits=c(-6,6)) +
73
    scale_y_continuous(limits=c(0,115)) +
74
    annotate("text", x=0, y=115, size=4, label=sprintf("(%d)", all)) +
75
    annotate("text", x=-5, y=115, size=4, label=sprintf("%d (-)",length(negative_hits))) +
76
    annotate("text", x=5, y=115, size=4, label=sprintf("%d (+)",length(positive_hits))) +
77
    ggrepel::geom_text_repel(data=head(to.plot[sig==T],n=top_genes), aes(x=logFC, y=-log10(padj_fdr+1e-100), label=gene), max.overlaps=Inf, size=4) +
78
    theme_classic() +
79
    theme(
80
      axis.text = element_text(size=rel(0.75), color='black'),
81
      axis.title = element_text(size=rel(1.0), color='black'),
82
      legend.position="none"
83
    )
84
  
85
  
86
  if (length(label_groups)>0) {
87
    p <- p +
88
      annotate("text", x=-4, y=0, size=4, label=sprintf("Up in %s",label_groups[2])) +
89
      annotate("text", x=4, y=0, size=4, label=sprintf("Up in %s",label_groups[1]))
90
  }
91
  
92
  return(p)
93
}
94