[6c3e5b]: / rna / processing / 6_plot_stats.R

Download this file

125 lines (97 with data), 4.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
here::i_am("rna/processing/6_plot_stats.R")
source(here::here("settings.R"))
source(here::here("utils.R"))
################################
## Initialize argument parser ##
################################
p <- ArgumentParser(description='')
p$add_argument('--sce', type="character", help='SingleCellExperiment file')
p$add_argument('--metadata', type="character", help='Cell metadata file')
p$add_argument('--samples', type="character", default="all", nargs='+', help='Samples to plot')
p$add_argument('--outdir', type="character", help='Output directory')
args <- p$parse_args(commandArgs(TRUE))
## START TEST ##
args <- list()
args$metadata <- file.path(io$basedir,"results/rna/doublet_detection/sample_metadata_after_doublets.txt.gz")
args$sce <- file.path(io$basedir,"processed/rna/SingleCellExperiment.rds")
args$samples <- c("E8.5_CRISPR_T_KO", "E8.5_CRISPR_T_WT") # "all"
args$outdir <- file.path(io$basedir,"results/rna/stats")
## END TEST ##
#####################
## Parse arguments ##
#####################
# I/O
dir.create(args$outdir, showWarnings = T)
# Options
if (args$samples[1]=="all") {
args$samples <- opts$samples
} else {
stopifnot(args$samples%in%opts$samples)
}
###################
## Load metadata ##
###################
sample_metadata <- fread(args$metadata) %>%
.[pass_rnaQC==TRUE & doublet_call==FALSE & sample%in%args$samples] %>%
.[,sample:=factor(sample,levels=opts$samples[opts$samples%in%args$samples])]
# sample_metadata <- fread(io$metadata) %>% .[(pass_rnaQC==TRUE | pass_atacQC==TRUE)]
# table(sample_metadata$sample)
###############
## Load data ##
###############
# Load RNA expression data as SingleCellExperiment object
sce <- load_SingleCellExperiment(args$sce, cells=sample_metadata$cell, normalise = FALSE)
# Add sample metadata as colData
colData(sce) <- sample_metadata %>% tibble::column_to_rownames("cell") %>% DataFrame
################################################
## Plot distribution of UMI counts per sample ##
################################################
to.plot <- args$samples %>% map(function(i) {
mtx <- as.matrix(counts(sce[,sce$sample==i])[,1:100])
data.table(table(mtx[mtx<=10])) %>% setnames(c("value","N")) %>% .[,sample:=i]
}) %>% rbindlist
to.plot[N>=1e6,N:=1e6]
p <- ggbarplot(to.plot, x="value", y="N", fill="gray70") +
facet_wrap(~sample) +
theme(
axis.text = element_text(size=rel(0.65)),
axis.title.x = element_blank()
)
pdf(paste0(args$outdir,"/umi_distribution_per_sample.pdf"), width=8, height=5)
print(p)
dev.off()
#########################################
## Barplots number of cells per sample ##
#########################################
to.plot <- sample_metadata[,.N,by=c("sample","stage")]
fwrite(to.plot[,c("sample","N")], file.path(args$outdir,"ncells_per_sample.txt.gz"), sep="\t", quote=F)
p <- ggbarplot(to.plot, x = "sample", y = "N", fill="stage") +
scale_fill_manual(values=opts$stage.colors) +
labs(x="", y="Number of cells (after QC)") +
theme(
legend.position = "none",
legend.title = element_blank(),
axis.text.y = element_text(colour="black",size=rel(0.7)),
axis.text.x = element_text(colour="black",size=rel(0.8), angle=40, hjust=1)
)
pdf(paste0(args$outdir,"/ncells_per_sample.pdf"), width=8, height=5)
print(p)
dev.off()
#########################################
## Barplots number of reads per sample ##
#########################################
to.plot <- data.table(cell=colnames(sce), sample=sce$sample, stage=sce$stage, N=colSums(counts(sce))) %>%
.[,.(N=sum(N)),by=c("sample","stage")]
fwrite(to.plot[,c("sample","N")], file.path(args$outdir,"nreads_per_sample.txt.gz"), sep="\t", quote=F)
p <- ggbarplot(to.plot, x = "sample", y = "N", fill="stage") +
scale_fill_manual(values=opts$stage.colors) +
labs(x="", y="Number of reads (after QC)") +
theme(
legend.position = "none",
legend.title = element_blank(),
axis.text.y = element_text(colour="black",size=rel(0.7)),
axis.text.x = element_text(colour="black",size=rel(0.8), angle=40, hjust=1)
)
pdf(paste0(args$outdir,"/ncells_per_sample.pdf"), width=8, height=5)
print(p)
dev.off()