Diff of /settings.R [000000] .. [fe0e8b]

Switch to unified view

a b/settings.R
1
suppressPackageStartupMessages(library(SingleCellExperiment))
2
suppressPackageStartupMessages(library(data.table))
3
suppressPackageStartupMessages(library(purrr))
4
suppressPackageStartupMessages(library(ggplot2))
5
suppressPackageStartupMessages(library(ggpubr))
6
suppressPackageStartupMessages(library(stringr))
7
suppressPackageStartupMessages(library(argparse))
8
9
#########
10
## I/O ##
11
#########
12
13
io <- list()
14
if (grepl("ricard",Sys.info()['nodename'])) {
15
  io$basedir <- "/Users/ricard/data/gastrulation_multiome_10x"
16
  io$atlas.basedir <- "/Users/ricard/data/gastrulation10x"
17
  io$gene_metadata <- "/Users/ricard/data/ensembl/mouse/v87/BioMart/all_genes/Mmusculus_genes_BioMart.87.txt"
18
} else if (Sys.info()[['nodename']]=="BI2404M") {
19
  io$basedir <- "/Users/argelagr/data/gastrulation_multiome_10x/test"
20
  io$atlas.basedir <- "/Users/argelagr/data/gastrulation10x"
21
  io$gene_metadata <- "/Users/argelagr/data/ensembl/mouse/v87/BioMart/mRNA/Mmusculus_genes_BioMart.87.txt"
22
  io$TFs_file <- "/Users/argelagr/data/mm10_regulation/TFs/TFs.txt"
23
} else if (grepl("pebble|headstone", Sys.info()['nodename'])) {
24
  if (grepl("Clark", Sys.info()['effective_user'])) {
25
    io$basedir       <- "/bi/scratch/Stephen_Clark/multiome/resilio"
26
    io$rawdata       <- "/bi/scratch/Stephen_Clark/multiome/raw"
27
    io$gene_metadata <- "/bi/scratch/Stephen_Clark/annotations/Mmusculus_genes_BioMart.87.txt"
28
  } else if (grepl("argelag", Sys.info()['effective_user'])) {
29
    io$basedir <-'/bi/group/reik/ricard/data/gastrulation_multiome_10x/test'
30
    io$archR.directory <- file.path(io$basedir,"processed/atac/archR")
31
    io$gene_metadata <- "/bi/group/reik/ricard/data/ensembl/mouse/v87/BioMart/all_genes/Mmusculus_genes_BioMart.87.txt"
32
    io$atlas.basedir <- "/bi/group/reik/ricard/data/pijuansala2019_gastrulation10x"
33
    io$TFs_file <- "/bi/group/reik/ricard/data/mm10_regulation/TFs/TFs.txt" 
34
  }
35
} else if (grepl("bi2228m", Sys.info()['nodename'])) {
36
  io$basedir      <- "/Users/clarks/data/multiome/"
37
  io$rawdata      <- "/Users/clarks/data/multiome_raw_sub/"
38
} else if(grepl('LAPTOP',Sys.info()['nodename'])){
39
  io$basedir      <- 'D:/OneDrive - zju.edu.cn/research/bioin/lab/Ricard/data/gastrulation_multiome_10x'
40
} else if(grepl('Workstation',Sys.info()['nodename'])){
41
  io$basedir      <-'/home/lijingyu/gastrulation/data/gastrulation_multiome_10x'
42
  io$gene_metadata <- "/home/lijingyu/gastrulation/data/gastrulation_multiome_10x/Mmusculus_genes_BioMart.87.txt"
43
  io$atlas.basedir <- "/home/lijingyu/gastrulation/data/gastrulation10x"
44
} else {
45
  stop("Computer not recognised")
46
}
47
48
io$metadata <- file.path(io$basedir,"sample_metadata.txt.gz")
49
50
# TFs
51
# io$TFs <- file.path(io$basedir,"results/TFs.txt")
52
53
# RNA
54
io$rna.anndata <- file.path(io$basedir,"processed/rna/anndata.h5ad")
55
io$rna.seurat <- file.path(io$basedir,"processed/rna/seurat.rds")
56
io$rna.sce <- file.path(io$basedir,"processed/rna/SingleCellExperiment.rds")
57
io$rna.tfs.sce <- file.path(io$basedir,"processed/rna/SingleCellExperiment_TFs.rds")
58
io$rna.pseudobulk.sce <- file.path(io$basedir,"results/rna/pseudobulk/celltype/SingleCellExperiment_pseudobulk.rds")
59
io$rna.tfs.pseudobulk.sce <- file.path(io$basedir,"results/rna/pseudobulk/celltype/SingleCellExperiment_TFs_pseudobulk.rds")
60
io$rna.metacells.sce <- file.path(io$basedir,"results/rna/metacells/all_cells/SingleCellExperiment_metacells.rds")
61
62
# Differential RNA expression and marker genes
63
# io$rna.differential <- file.path(io$basedir,"results/rna/differential")
64
io$rna.celltype.marker_genes.all <- file.path(io$basedir,"results/rna/differential/pseudobulk/celltype/parsed/marker_genes_all.txt.gz")
65
io$rna.celltype.marker_genes.filt <- file.path(io$basedir,"results/rna/differential/pseudobulk/celltype/parsed/marker_genes_filtered.txt.gz")
66
io$rna.celltype.marker_tfs.all <- file.path(io$basedir,"results/rna/differential/pseudobulk/celltype/parsed/marker_tfs_all.txt.gz")
67
io$rna.celltype.marker_tfs.filt <- file.path(io$basedir,"results/rna/differential/pseudobulk/celltype/parsed/marker_tfs_filtered.txt.gz")
68
69
# RNA atlas (Pijuan-Sala2019)
70
io$rna.atlas.metadata <- file.path(io$atlas.basedir,"sample_metadata.txt.gz")
71
io$rna.atlas.sce <- file.path(io$atlas.basedir,"processed/SingleCellExperiment.rds")
72
io$rna.atlas.marker_genes.up <- file.path(io$atlas.basedir,"results/differential/celltypes/genes/all_stages/marker_genes/marker_genes_upregulated_filtered.txt.gz")
73
io$rna.atlas.marker_genes.all <- file.path(io$atlas.basedir,"results/differential/celltypes/genes/all_stages/marker_genes/marker_genes_upregulated_all.txt.gz")
74
io$rna.atlas.marker_TFs.up <- file.path(io$atlas.basedir,"results/differential/celltypes/TFs/TF_markers/marker_TFs_upregulated_filtered.txt.gz")
75
io$rna.atlas.marker_TFs.all <- file.path(io$atlas.basedir,"results/differential/celltypes/TFs/TF_markers/marker_TFs_upregulated_all.txt.gz")
76
io$rna.atlas.differential <- file.path(io$atlas.basedir,"results/differential")
77
# io$rna.atlas.average_expression_per_celltype <- file.path(io$atlas.basedir,"results/marker_genes/all_stages/avg_expr_per_celltype_and_gene.txt.gz")
78
io$rna.atlas.sce.pseudobulk <- file.path(io$atlas.basedir,"results/pseudobulk/SingleCellExperiment_pseudobulk.rds")
79
io$rna.atlas.celltype_proportions <- file.path(io$atlas.basedir,"results/celltype_proportions/celltype_proportions.txt.gz")
80
81
# ATAC
82
# io$signac.directory <- file.path(io$basedir,"processed/atac/signac")
83
# io$signac <- file.path(io$signac.directory,"/signac.rds")
84
# io$cistopic.model <- file.path(io$basedir,"results/atac/cistopic/cistopic_warpLDA_bestModel.rds")
85
io$archR.directory <- file.path(io$basedir,"processed/atac/archR")
86
io$motifmatcher_scores.se <- file.path(io$archR.directory,"Annotations/CISBP-Scores.rds")
87
io$motifmatcher_positions.se <-file.path(io$archR.directory,"Annotations/CISBP-Positions.rds")
88
io$archR.projectMetadata <- file.path(io$archR.directory,"projectMetadata.rds")
89
io$archR.peakSet.granges <- file.path(io$archR.directory,"PeakSet.rds")
90
io$archR.peakSet.stats <- file.path(io$basedir,"results/atac/archR/feature_stats/PeakMatrix/PeakMatrix_celltype_stats.txt.gz")
91
io$archR.bgdPeaks <- file.path(io$archR.directory,"Background-Peaks.rds")
92
io$archR.peakSet.bed <- file.path(io$archR.directory,"PeakCalls/peaks_archR_macs2.bed.gz")
93
io$archR.peakMatrix.cells <- file.path(io$archR.directory,"Matrices/PeakMatrix_summarized_experiment.rds")
94
io$archR.peakMatrix.metacells <- file.path(io$basedir,"results/atac/archR/metacells/all_cells/PeakMatrix/PeakMatrix_summarized_experiment_metacells.rds")
95
io$archR.peakMatrix.pseudobulk <- file.path(io$basedir,"results/atac/archR/pseudobulk/celltype/PeakMatrix/pseudobulk_PeakMatrix_summarized_experiment.rds")
96
io$archR.peak.metadata <- file.path(io$archR.directory,"PeakCalls/peak_metadata.tsv.gz")
97
io$archR.peak.stats <- file.path(io$basedir,"results/atac/archR/feature_stats/PeakMatrix_celltype.mapped_stats.txt.gz")
98
io$archR.peak2gene.all <- file.path(io$basedir,"results/atac/archR/peak_calling/peaks2genes/peaks2genes_all.txt.gz")
99
io$archR.peak2gene.nearest <- file.path(io$basedir,"results/atac/archR/peak_calling/peaks2genes/peaks2genes_nearest.txt.gz")
100
io$archR.GeneScoreMatrix.cells <- file.path(io$basedir,"processed/atac/archR/Matrices/GeneScoreMatrix_TSS_summarized_experiment.rds")
101
io$archR.GeneScoreMatrix.pseudobulk <- file.path(io$basedir,"results/atac/archR/pseudobulk/celltype/GeneScoreMatrix_TSS/pseudobulk_GeneScoreMatrix_TSS_summarized_experiment.rds")
102
io$archR.chromvar.cells <- file.path(io$basedir,"results/atac/archR/chromvar/cells/chromVAR_deviations_CISBP_archr.rds")
103
io$archR.chromvar.pseudobulk <- file.path(io$basedir,"results/atac/archR/chromvar/pseudobulk/celltype/chromVAR_deviations_CISBP_pseudobulk_archr.rds")
104
io$archR.feature_stats <- file.path(io$basedir,"results/atac/archR/chromvar_chip/pseudobulk/chromVAR_chip_CISBP_archr.rds")
105
io$archR.motif2gene <- file.path(io$basedir,"processed/atac/archR/Annotations/CISBP_motif2gene.txt.gz")
106
# io$archR.chromvar_chip.cells <- file.path(io$basedir,"results/atac/archR/chromvar/cells/celltype/chromVAR_deviations_CISBP_pseudobulk_archr.rds")
107
108
# io$archR.pseudobulk.genes.se <- file.path(io$basedir,"processed/atac/archR/pseudobulk/celltype.predicted/pseudobulk_GeneScoreMatrix_summarized_experiment.rds")
109
110
# Differential ATAC and marker peaks
111
io$atac.markers_peaks.pseudobulk.all <- file.path(io$basedir,"results/atac/archR/differential/pseudobulk/celltype/PeakMatrix/parsed/markers_all.txt.gz")
112
io$atac.markers_peaks.pseudobulk.filt <- file.path(io$basedir,"results/atac/archR/differential/pseudobulk/celltype/PeakMatrix/parsed/markers_filt.txt.gz")
113
# io$archR.markers_chromvar <- file.path(io$basedir,"results/atac/archR/chromvar/differential/markers/marker_TFs.txt.gz")
114
115
# ATAC public data (Pijuan-Sala2020)
116
io$pijuansala.basedir <- file.path(io$basedir,"public_datasets/Pijuan-Sala_2020")
117
io$pijuansala.archR.directory <- file.path(io$pijuansala.basedir,"data/processed/archR")
118
119
# paga
120
io$paga.connectivity <- file.path(io$atlas.basedir,"results/paga/paga_connectivity.csv")
121
io$paga.coordinates <- file.path(io$atlas.basedir,"results/paga/paga_coordinates.csv")
122
123
# Virtual ChIP-seq
124
# io$tf2peak_cor.se <- file.path(io$basedir,"results/rna_atac/rna_vs_acc/pseudobulk/TFexpr_vs_peakAcc/cor_TFexpr_vs_peakAcc_SummarizedExperiment.rds")
125
# io$tf.activities <- file.path(io$atlas.basedir,"results/rna_atac/rna_vs_chromvar/TF_activities/tf_activities.txt.gz")
126
io$virtual_chip.dir <- file.path(io$basedir,"results/rna_atac/virtual_chipseq")
127
io$virtual_chip.mtx <- file.path(io$basedir,"results/rna_atac/virtual_chipseq/virtual_chip_mtx.rds")
128
io$archR.chromvar_chip.pseudobulk <- file.path(io$basedir,"results/atac/archR/chromvar_chip/pseudobulk/chromVAR_chip_CISBP_archr.rds")
129
130
# Coexpression
131
# io$tf2tf_cor.mtx <- file.path(io$basedir,"results/rna/coexpression/correlation_matrix_tf2tf.rds")
132
# io$tf2gene_cor.mtx <- file.path(io$basedir,"results/rna/coexpression/correlation_matrix_tf2gene.rds")
133
134
# Dimensionality reduction
135
io$rna.dimred.pca <- file.path(io$basedir,"results/rna/dimensionality_reduction/sce/batch_correction_sample_remove_ExE_cells_False/pca_features2500_pcs50.txt.gz")
136
io$atac.dimred.lsi <- file.path(io$basedir,"results/atac/archR/dimensionality_reduction/cells/PeakMatrix/remove_ExE_cells_False/batch_correction_None/lsi_nfeatures25000_ndims50.txt.gz")
137
# io$rna.dimred.umap <- file.path(io$basedir,"results/rna/dimensionality_reduction/all_cells/E7.5_rep1-E7.5_rep2-E8.0_rep1-E8.0_rep2-E8.5_rep1-E8.5_rep2_umap_features2500_pcs30_neigh25_dist0.3.txt.gz")
138
# io$atac.dimred.umap <- file.path(io$basedir,"results/atac/archR/dimensionality_reduction/PeakMatrix/all_cells/E7.5_rep1-E7.5_rep2-E8.0_rep1-E8.0_rep2-E8.5_rep1-E8.5_rep2_umap_nfeatures50000_ndims50_neigh30_dist0.45.txt.gz")
139
140
#############
141
## Options ##
142
#############
143
144
opts <- list()
145
146
opts$stages <- c(
147
  "E7.5",
148
  "E7.75",
149
  "E8.0",
150
  "E8.5",
151
  "E8.75"
152
)
153
154
opts$samples <- c(
155
  "E7.5_rep1",
156
  "E7.5_rep2",
157
  "E7.75_rep1",
158
  "E8.0_rep1",
159
  "E8.0_rep2",
160
  "E8.5_rep1",
161
  "E8.5_rep2",
162
  "E8.5_CRISPR_T_WT",
163
  "E8.5_CRISPR_T_KO",
164
  "E8.75_rep1",
165
  "E8.75_rep2"
166
)
167
168
# opts$rename.samples <- c(
169
#   "E8.75_rep1" = "e8_75_1_L002",
170
#   "E8.75_rep2" = "e8_75_2_L002",
171
#   "E8.5_rep1" = "multiome1",
172
#   "E8.5_rep2" = "multiome2",
173
#   "E8.0_rep1" = "E8_0_rep1_multiome",
174
#   "E8.0_rep2" = "E8_0_rep2_multiome",
175
#   "E7.5_rep1" = "rep1_L001_multiome",
176
#   "E7.5_rep2" = "rep2_L002_multiome"
177
#   # E7.75_rep1
178
#   # E8.5_CRISPR_T_WT
179
#   # E8.5_CRISPR_T_KO
180
# )
181
182
opts$sample2stage <- c(
183
  "E7.5_rep1" = "E7.5",
184
  "E7.5_rep2" = "E7.5",
185
  "E7.75_rep1" = "E7.75",
186
  "E8.0_rep1" = "E8.0",
187
  "E8.0_rep2" = "E8.0",
188
  "E8.5_rep1" = "E8.5",
189
  "E8.5_rep2" = "E8.5",
190
  "E8.75_rep1" = "E8.75",
191
  "E8.75_rep2" = "E8.75",
192
  "E8.5_CRISPR_T_KO" = "E8.5",
193
  "E8.5_CRISPR_T_WT" = "E8.5"
194
)
195
196
197
opts$celltypes <- c(
198
  "Epiblast",
199
  "Primitive_Streak",
200
  "Caudal_epiblast",
201
  "PGC",
202
  "Anterior_Primitive_Streak",
203
  "Notochord",
204
  "Def._endoderm",
205
  "Gut",
206
  "Nascent_mesoderm",
207
  "Mixed_mesoderm",
208
  "Intermediate_mesoderm",
209
  "Caudal_Mesoderm",
210
  "Paraxial_mesoderm",
211
  "Somitic_mesoderm",
212
  "Pharyngeal_mesoderm",
213
  "Cardiomyocytes",
214
  "Allantois",
215
  "ExE_mesoderm",
216
  "Mesenchyme",
217
  "Haematoendothelial_progenitors",
218
  "Endothelium",
219
  "Blood_progenitors_1",
220
  "Blood_progenitors_2",
221
  "Erythroid1",
222
  "Erythroid2",
223
  "Erythroid3",
224
  "NMP",
225
  "Rostral_neurectoderm",
226
  "Caudal_neurectoderm",
227
  "Neural_crest",
228
  "Forebrain_Midbrain_Hindbrain",
229
  "Spinal_cord",
230
  "Surface_ectoderm",
231
  "Visceral_endoderm",
232
  "ExE_endoderm",
233
  "ExE_ectoderm",
234
  "Parietal_endoderm"
235
)
236
237
opts$stage.colors = c(
238
  "E7.5" = "#edf8b1",
239
  "E7.75" = "#c7e9b4",
240
  "E8.0" = "#7fcdbb",
241
  "E8.25" = "#41b6c4",
242
  "E8.5" = "#1d91c0",
243
  "E8.75" = "#225ea8"
244
)
245
# opts$stage.colors <- viridis::viridis(n=length(opts$stages))
246
# names(opts$stage.colors) <- rev(opts$stages)
247
248
opts$celltype.colors = c(
249
  "Epiblast" = "#635547",
250
  "Primitive_Streak" = "#DABE99",
251
  "Caudal_epiblast" = "#9e6762",
252
  "PGC" = "#FACB12",
253
  "Anterior_Primitive_Streak" = "#c19f70",
254
  "Notochord" = "#0F4A9C",
255
  "Def._endoderm" = "#F397C0",
256
  "Gut" = "#EF5A9D",
257
  "Nascent_mesoderm" = "#C594BF",
258
  "Mixed_mesoderm" = "#DFCDE4",
259
  "Intermediate_mesoderm" = "#139992",
260
  "Caudal_Mesoderm" = "#3F84AA",
261
  "Paraxial_mesoderm" = "#8DB5CE",
262
  "Somitic_mesoderm" = "#005579",
263
  "Pharyngeal_mesoderm" = "#C9EBFB",
264
  "Cardiomyocytes" = "#B51D8D",
265
  "Allantois" = "#532C8A",
266
  "ExE_mesoderm" = "#8870ad",
267
  "Mesenchyme" = "#cc7818",
268
  "Haematoendothelial_progenitors" = "#FBBE92",
269
  "Endothelium" = "#ff891c",
270
  "Blood_progenitors" = "#c9a997",
271
  "Blood_progenitors_1" = "#f9decf",
272
  "Blood_progenitors_2" = "#c9a997",
273
  "Erythroid" = "#EF4E22",
274
  "Erythroid1" = "#C72228",
275
  "Erythroid2" = "#f79083",
276
  "Erythroid3" = "#EF4E22",
277
  "NMP" = "#8EC792",
278
  "Neurectoderm" = "#65A83E",
279
  "Rostral_neurectoderm" = "#65A83E",
280
  "Caudal_neurectoderm" = "#354E23",
281
  "Neural_crest" = "#C3C388",
282
  "Forebrain_Midbrain_Hindbrain" = "#647a4f",
283
  "Spinal_cord" = "#CDE088",
284
  "Surface_ectoderm" = "#f7f79e",
285
  "Visceral_endoderm" = "#F6BFCB",
286
  "ExE_endoderm" = "#7F6874",
287
  "ExE_ectoderm" = "#989898",
288
  "Parietal_endoderm" = "#1A1A1A"
289
)
290
291
opts$chr <- paste0("chr",c(1:19,"X","Y"))
292
293
###################
294
## Load metadata ##
295
###################
296
297
# sample_metadata <- fread(io$metadata)
298
# .[pass_QC==T] %>% 
299
# .[batch%in%opts$batches] %>%
300
# .[,celltype.mapped:=stringr::str_replace_all(celltype.mapped," ","_")] %>%
301
# .[,celltype.mapped:=stringr::str_replace_all(celltype.mapped,"/","_")] %>%
302
# .[,celltype.mapped:=factor(celltype.mapped, levels=names(opts$celltype.colors))]
303
304
305
###################
306
## Edit metadata ##
307
###################
308
309
# io$metadata <- file.path(io$basedir,"results/atac/archR/qc/sample_metadata_after_qc.txt.gz")
310
# io$metadata <- file.path(io$basedir,"results/rna/mapping/sample_metadata_after_mapping.txt.gz")
311
# sample_metadata <- fread(io$metadata)
312
# sample_metadata[pass_rnaQC==FALSE & nFrags_atac<=1e4,pass_atacQC:=FALSE]
313
# sample_metadata[sample=="E7.75_rep1" & ribosomal_percent_RNA>=8,pass_rnaQC:=FALSE]
314
# sample_metadata[sample=="E7.75_rep1" & ribosomal_percent_RNA>=4.5 & nFeature_RNA<=3500,pass_rnaQC:=FALSE]
315
# fwrite(sample_metadata, io$metadata, sep="\t", quote=F, na="NA")
316
317