# Function to differential expression
# - sce: SingleCellExperiment object with the column "group" in the colData
# - groups: the names of the two groups
# - min_detection_rate_per_group: minimum detection rate per group
calculate_diff_acc_edgeR <- function(sce, groups, min_detection_rate_per_group = 0.50) {
# Sanity checks
if (!is(sce, "SingleCellExperiment")) stop("'sce' has to be an instance of SingleCellExperiment")
stopifnot(length(groups)==2)
# Filter genes by detection rate per group
cdr_A <- rowMeans(logcounts(sce[,sce$group==groups[1]])>0) >= min_detection_rate_per_group
cdr_B <- rowMeans(logcounts(sce[,sce$group==groups[2]])>0) >= min_detection_rate_per_group
out <- .edgeR(sce[cdr_B | cdr_A,]) %>% .[,log_padj_fdr:= -log10(padj_fdr)]
return(out)
}
.edgeR <- function(sce) {
# Convert SCE to DGEList
sce_edger <- scran::convertTo(sce, type="edgeR")
# Define design matrix (with intercept)
cdr <- colMeans(logcounts(sce)>0)
design <- model.matrix(~cdr+sce$group)
# Estimate dispersions
sce_edger <- estimateDisp(sce_edger,design)
# Fit GLM
fit <- glmQLFit(sce_edger,design)
# Likelihood ratio test
lrt <- glmQLFTest(fit)
# Construct output data.frame
out <- topTags(lrt, n=nrow(lrt))$table %>% as.data.table(keep.rownames=T) %>%
setnames(c("gene","logFC","logCPM","LR","p.value","padj_fdr")) %>%
.[,c("logCPM","LR"):=NULL]
return(out)
}