[fe0e8b]: / rna_atac / mofa / prepare_mofa.R

Download this file

168 lines (129 with data), 6.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
here::i_am("rna_atac/mofa/prepare_mofa.R")
source(here::here("settings.R"))
source(here::here("utils.R"))
suppressPackageStartupMessages(library(scran))
suppressPackageStartupMessages(library(scater))
######################
## Define arguments ##
######################
p <- ArgumentParser(description='')
p$add_argument('--metadata', type="character", help='Cell metadata file')
p$add_argument('--atac_matrix', type="character", help='ATAC Matrix to use')
p$add_argument('--atac_matrix_file', type="character", help='ATAC Matrix to use')
p$add_argument('--atac_feature_stats', type="character", help='ATAC feature stats')
p$add_argument('--sce', type="character", help='RNA SingleCellExperiment')
p$add_argument('--stages', type="character", default="all", nargs='+', help='Stages to plot')
p$add_argument('--samples', type="character", default="all", nargs='+', help='Stages to plot')
p$add_argument('--nfeatures_atac', type="integer", default=25000, help='Number of ATAC features')
p$add_argument('--nfeatures_rna', type="integer", default=5000, help='Number of RNA features')
p$add_argument('--remove_ExE_cells', type="character", default="True", help='Remove ExE cells?')
p$add_argument('--outdir', type="character", help='Output directory')
p$add_argument('--test', action="store_true", help='Test mode?')
args <- p$parse_args(commandArgs(TRUE))
## START TEST ##
# io$basedir <- file.path(io$basedir,"test")
# args$metadata <- file.path(io$basedir,"results/atac/archR/celltype_assignment/sample_metadata_after_celltype_assignment.txt.gz")
# args$atac_matrix <- "PeakMatrix"
# args$atac_matrix_file <- file.path(io$basedir,"processed/atac/archR/Matrices/PeakMatrix_summarized_experiment.rds")
# args$sce <- file.path(io$basedir,"processed/rna/SingleCellExperiment.rds")
# args$atac_feature_stats <- file.path(io$basedir, sprintf("results/atac/archR/feature_stats/%s/%s_celltype_stats.txt.gz",args$atac_matrix,args$atac_matrix))
# args$nfeatures_atac <- 25000
# args$nfeatures_rna <- 5000
# args$stages <- "E8.75"
# args$samples <- c("E8.75_rep2")
# args$remove_ExE_cells <- "True"
# args$outdir <- file.path(io$basedir, "results/rna_atac/mofa/all_cells")
## END TEST ##
#####################
## Parse arguments ##
#####################
if (args$stages[1]=="all") {
args$stages <- opts$stages
} else {
stopifnot(args$stages%in%opts$stages)
}
if (args$samples[1]=="all") {
args$samples <- opts$samples
} else {
stopifnot(args$samples%in%opts$samples)
}
if (args$remove_ExE_cells=="True") {
args$remove_ExE_cells <- TRUE
} else if (args$remove_ExE_cells=="False") {
args$remove_ExE_cells <- FALSE
} else {
stop('remove_ExE_cells should be "True" or "False"')
}
dir.create(args$outdir, showWarnings=F, recursive = T)
##########################
## Load sample metadata ##
##########################
sample_metadata <- fread(args$metadata) %>%
.[pass_atacQC==TRUE & pass_rnaQC==TRUE & doublet_call==FALSE & sample%in%args$samples & stage%in%args$stages]
if (args$remove_ExE_cells) {
print("Removing ExE cells...")
sample_metadata <- sample_metadata %>% .[!celltype%in%c("Visceral_endoderm","ExE_endoderm","ExE_ectoderm","Parietal_endoderm")]
}
# print stats
table(sample_metadata$stage)
table(sample_metadata$celltype)
####################
## Load ATAC data ##
####################
print(sprintf("Fetching single-cell ATAC %s...",args$matrix))
atac.se <- readRDS(args$atac_matrix_file)[,sample_metadata$cell]
############################
## Feature selection ATAC ##
############################
# Load feature stats
atac_feature_stats.dt <- fread(args$atac_feature_stats)
# Define highly variable features
atac_features <- atac_feature_stats.dt %>%
setorder(-var_pseudobulk) %>%
head(n=args$nfeatures_atac) %>% .$feature
if (args$test) {
print("Test mode activated, subsetting number of ATAC features...")
atac_features <- atac_features %>% head(n=500)
}
#############################
## ATAC data normalisation ##
#############################
# TFIDF normalisation
atac_tfidf.mtx <- tfidf(assay(atac.se[atac_features,]), method=1, scale.factor=1e4) %>% round(2)
# hist(atac_tfidf.mtx[1:10000,1:1000] %>% as.matrix)
##############################
## Load RNA expression data ##
##############################
rna.sce <- load_SingleCellExperiment(args$sce, normalise = TRUE, cells = sample_metadata$cell)
# Add sample metadata to the colData of the SingleCellExperiment
colData(rna.sce) <- sample_metadata %>% as.data.frame %>% tibble::column_to_rownames("cell") %>%
.[colnames(rna.sce),] %>% DataFrame()
# Filter features manually
rna.sce <- rna.sce[grep("*Rik|^Gm|^Mt-|^Rps|^Rpl|^Olfr",rownames(rna.sce), invert=T),]
# Feature selection
rna_features <- modelGeneVar(rna.sce) %>% as.data.table(keep.rownames = T) %>%
setnames("rn","gene") %>%
.[mean>=0.001] %>% setorder(-p.value) %>% head(args$nfeatures_rna) %>% .$gene
if (args$test) {
print("Test mode activated, subsetting number of RNA features...")
rna_features <- rna_features %>% head(n=500)
}
# Fetch matrix
rna.mtx <- assay(rna.sce[rna_features,],"logcounts") %>% round(2)
dim(rna.mtx)
# hist(rna.mtx[1:5000,1:1000] %>% as.matrix)
###########################
## Prepare data for MOFA ##
###########################
cells <- intersect(colnames(rna.mtx),colnames(atac_tfidf.mtx))
rna.mtx <- rna.mtx[,cells]
atac_tfidf.mtx <- atac_tfidf.mtx[,cells]
###############
## Save data ##
###############
Matrix::writeMM(rna.mtx, file.path(args$outdir,"rna.mtx"))
Matrix::writeMM(atac_tfidf.mtx, file.path(args$outdir,"atac_tfidf.mtx"))
write.table(rownames(rna.mtx), file.path(args$outdir,"rna_features.txt"), quote=F, row.names=F, col.names=F)
write.table(rownames(atac_tfidf.mtx), file.path(args$outdir,"atac_features.txt"), quote=F, row.names=F, col.names=F)
write.table(cells, file.path(args$outdir,"cells.txt"), quote=F, row.names=F, col.names=F)
fwrite(sample_metadata, file.path(args$outdir,"sample_metadata.txt.gz"))