[dbb3ea]: / rna / pseudobulk / pseudobulk_rna.R

Download this file

110 lines (80 with data), 4.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
here::i_am("rna/pseudobulk/pseudobulk_rna.R")
source(here::here("settings.R"))
source(here::here("utils.R"))
# suppressPackageStartupMessages(library(Seurat))
######################
## Define arguments ##
######################
p <- ArgumentParser(description='')
p$add_argument('--sce', type="character", help='SingleCellExperiment object')
p$add_argument('--metadata', type="character", help='metadata file')
p$add_argument('--group_by', type="character", help='Metadata column to group cells by')
p$add_argument('--normalisation_method', type="character", help='Metadata column to group cells by')
p$add_argument('--outdir', type="character", help='Output directory')
args <- p$parse_args(commandArgs(TRUE))
## START TEST ##
# args$metadata <- file.path(io$basedir,"results/rna/mapping/sample_metadata_after_mapping.txt.gz")
# args$sce <- file.path(io$basedir,"processed/rna/SingleCellExperiment.rds")
# args$group_by <- "celltype_genotype"
# args$normalisation_method <- "cpm"
# args$outdir <- file.path(io$basedir,sprintf("results/rna/pseudobulk/%s",args$group_by))
## END TEST ##
dir.create(args$outdir, showWarnings = F, recursive = T)
###############
## Load data ##
###############
# Load cell metadata
sample_metadata <- fread(args$metadata) %>%
.[,celltype_genotype:=as.character(NA)] %>% .[!is.na(genotype),celltype_genotype:=sprintf("%s-%s",celltype,genotype)] %>%
.[pass_rnaQC==TRUE & doublet_call==FALSE & !is.na(eval(as.name(args$group_by)))]
# Load SingleCellExperiment
sce <- load_SingleCellExperiment(args$sce, cells=sample_metadata$cell)
colData(sce) <- sample_metadata %>% tibble::column_to_rownames("cell") %>% DataFrame
################
## Pseudobulk ##
################
# assays(sce)$cpm <- edgeR::cpm(assay(sce), normalized.lib.sizes = FALSE, log = FALSE)
sce_pseudobulk <- pseudobulk_sce_fn(
x = sce,
assay = "counts",
by = args$group_by,
fun = "sum",
scale = FALSE # Should pseudo-bulks be scaled with the effective library size & multiplied by 1M?
)
assayNames(sce_pseudobulk) <- "counts"
###################
## Normalisation ##
###################
if (args$normalisation_method=="deseq2") {
suppressPackageStartupMessages(library(DESeq2))
dds <- DESeqDataSet(sce_pseudobulk, design=~1)
dds <- varianceStabilizingTransformation(dds)
logcounts(sce_pseudobulk) <- assay(dds)
} else if (args$normalisation_method=="cpm") {
logcounts(sce_pseudobulk) <- log2(1e6*(sweep(counts(sce_pseudobulk),2,colSums(counts(sce_pseudobulk)),"/"))+1)
# logcounts(sce_pseudobulk) <- edgeR::cpm(counts(sce_pseudobulk), log=TRUE, prior.count = 1)
} else {
stop("Normalisation method not recognised")
}
# test
# plot(x=logcounts(sce_pseudobulk)[1:1000,1:10], foo[1:1000,1:10])
# cor(colSums(logcounts(sce_pseudobulk)),sce_pseudobulk@metadata$n_cells)
# plot(colSums(logcounts(sce_pseudobulk)),sce_pseudobulk@metadata$n_cells)
# Save
# saveRDS(sce_pseudobulk, file.path(args$outdir,sprintf("SingleCellExperiment_pseudobulk_%s.rds",args$group_by)))
saveRDS(sce_pseudobulk, file.path(args$outdir,"SingleCellExperiment_pseudobulk.rds"))
########################################################
## Create Seurat object from the SingleCellExperiment ##
########################################################
# sce_pseudobulk@colData$sample <- rownames(sce_pseudobulk@colData) # at least one metadata column is needed
# seurat_pseudobulk <- as.Seurat(sce_pseudobulk)
# seurat_pseudobulk <- RenameAssays(seurat_pseudobulk, originalexp="RNA")
# Save
# saveRDS(seurat_pseudobulk, file.path(args$outdir,sprintf("Seurat_pseudobulk_%s.rds",args$group_by)))
# saveRDS(seurat_pseudobulk, file.path(args$outdir,"Seurat_pseudobulk.rds"))
################
## Save stats ##
################
to_save.dt <- data.table(table(sample_metadata[[args$group_by]])) %>%
setnames(c("group","N")) %>% setorder(group)
fwrite(to_save.dt, file.path(args$outdir,"stats.txt"), sep="\t", quote = F)