[dbb3ea]: / rna / pseudobulk / old / pseudobulk_rna.R

Download this file

86 lines (66 with data), 2.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
library(muscat)
library(DESeq2)
#####################
## Define settings ##
#####################
if (grepl("ricard",Sys.info()['nodename'])) {
source("/Users/ricard/gastrulation_multiome_10x/settings.R")
} else if (grepl("ebi",Sys.info()['nodename'])) {
source("/homes/ricard/gastrulation_multiome_10x/settings.R")
} else {
stop("Computer not recognised")
}
# I/O
io$outdir <- paste0(io$basedir,"/results/rna/pseudobulk")
# Options
opts$samples <- c(
"E7.5_rep1",
"E7.5_rep2",
"E8.0_rep1",
"E8.0_rep2",
"E8.5_rep1",
"E8.5_rep2"
)
###############
## Load data ##
###############
# Load cell metadata
# io$metadata <- "/Users/ricard/data/gastrulation_multiome_10x/results/rna/doublets/sample_metadata_after_doublets.txt.gz"
sample_metadata <- fread(io$metadata) %>%
.[pass_rnaQC==TRUE & doublet_call==FALSE & sample%in%opts$samples & !is.na(celltype.mapped)]
# Load SingleCellExperiment
sce <- load_SingleCellExperiment(io$rna.sce, cells=sample_metadata$cell)
colData(sce) <- sample_metadata %>% tibble::column_to_rownames("cell") %>% DataFrame
###################################
## Aggregate counts per celltype ##
###################################
# assays(sce)$cpm <- edgeR::cpm(assay(sce), normalized.lib.sizes = FALSE, log = FALSE)
sce_pseudobulk <- aggregateData(
sce,
assay = "counts",
by = c("celltype.mapped"),
fun = c("sum"),
scale = FALSE # Should pseudo-bulks be scaled with the effective library size & multiplied by 1M?
)
assayNames(sce_pseudobulk) <- "counts"
###############
## Normalise ##
###############
# create DESeq object
dds <- DESeqDataSet(sce_pseudobulk, design=~1)
# This function calculates a variance stabilizing transformation (VST) from the fitted dispersion-mean relation(s)
# and then transforms the count data (normalized by division by the size factors or normalization factors),
# yielding a matrix of values which are now approximately homoskedastic
dds <- varianceStabilizingTransformation(dds)
logcounts(sce_pseudobulk) <- assay(dds)
###################
## Sanity checks ##
###################
# cor(
# colMeans(logcounts(sce_pseudobulk)),
# metadata(sce_pseudobulk)$n_cells
# )
##########
## Save ##
##########
saveRDS(sce_pseudobulk, paste0(io$outdir,"/SingleCellExperiment.rds"))