Diff of /R/ReadData.R [000000] .. [9905a0]

Switch to unified view

a b/R/ReadData.R
1
# Reference CITE-seq data
2
# RNA–seq, epitope and TCR raw and processed data have been deposited in the Gene Expression Omnibus (GEO) under accession codes GSE252608 and GSE252455.
3
# Or reproduced by IntegrateTcells.Rmd
4
Combined_T <- readRDS("output/Tcells_Integrated.rds") 
5
6
# List of objects containing 5' scRNA data mapped to CITE-seq reference
7
# Please create with MapTcells_5scRNA.Rmd
8
sobjs_T_5prime <- readRDS("output/SeuratObjects_T_5'scRNA.rds")
9
10
# Read pseudotime from trajectory analysis
11
load("output/ttox_pseudotime.RData")
12
13
# Gradient boosting results
14
# Available in data folder
15
# Reproducible by GB_surface.Rmd and GB_surfaceplus.Rmd
16
GBresults_surfaceplus <- readRDS("output/GBresults_Combined_T_surfaceplus.rds")
17
GBclasses_surfaceplus <- readRDS("output/GBclasses_Combined_T_surfaceplus.rds")
18
GBresults_surface <- readRDS("output/GBresults_Combined_T_surface.rds")
19
GBclasses_surface <- readRDS("output/GBclasses_Combined_T_surface.rds")
20
21
# ADT thresholds based on denoised protein expression (TotalVI)
22
# Reproducible by thresholds_denoised.Rmd
23
thresh <- read.csv("output/threshholds_denProtein.csv")
24
25
# Sample list
26
df_meta <- read.csv("data/metaData.csv")
27
28
# Flow cytometry data
29
df_facs <- read.csv("data/FlowCytometryData.csv") %>% 
30
  `colnames<-`(gsub(colnames(.), pattern = ".", replacement = "/", fixed = T)) %>% 
31
  pivot_longer(cols = 3:ncol(.), names_to = "Population", values_to = "FACS")
32
df_ikzf3 <- read_delim("data/IKZF3_add.csv", show_col_types = F, progress = F)
33
34
# Subset to TTOX and add pseudotime
35
Idents(Combined_T) <- "IdentI"
36
ttox <- subset(Combined_T, idents=c(3,5,12,16))
37
ttox <- AddMetaData(ttox, metadata=ttox_pseudotime, col.name = "Pseudotime")
38
39
# Read CIBERSORTx output (https://cibersortx.stanford.edu/)
40
df_ttoxcompl_schmitz <- read.csv("data/Cibersortx_Schmitz_FreqTTOX.csv")
41
df_tfhtreg_schmitz <- read.csv("data/Cibersortx_Schmitz_FreqTFH+TREG.csv")
42
df_ttoxcompl_chapuy <- read.csv("data/Cibersortx_Chapuy_FreqTTOX.csv")
43
df_tfhtreg_chapuy <- read.csv("data/Cibersortx_Chapuy_FreqTFH+TREG.csv")
44
45
# External survival data (Schmitz et al. 2018, Chapuy et al. 2018)
46
# Available at https://www.nejm.org/doi/10.1056/NEJMoa1801445
47
# Available at https://www.nature.com/articles/s41591-018-0016-8
48
df_surv_schmitz <- read.delim("data/MetaData_Schmitz_2018.txt", na.strings = "")
49
df_surv_chapuy <- read.delim("data/MetaData_Chapuy_2018.txt", na.strings = "") %>% 
50
  filter(!is.na(time_pfs) & !is.na(time_os))
51
52
# External snv data (Chapuy et al. 2018)
53
# Available at https://www.nature.com/articles/s41591-018-0016-8
54
df_snvs_chapuy <- read.delim("data/SomaticVariants_Chapuy2018.txt", na.strings = "")  %>% 
55
  pivot_longer(cols = 3:ncol(.), names_to = "PatientID") %>% 
56
  left_join(., df_ttoxcompl_chapuy, by="PatientID") %>% 
57
  drop_na()
58
59
# Single cell T-cell receptor data
60
# RNA–seq, epitope and TCR raw and processed data have been deposited in the Gene Expression Omnibus (GEO) under accession codes GSE252608 and GSE252455.
61
DF_TCRrep <- readTCR(list.files(path = "countMatrices", pattern = "TCRrep", full.names = T))
62
63
# Read CODEX data (only meta data) 
64
# Available at BioStudies database (https://www.ebi.ac.uk/biostudies/) under accession number S-BIAD565
65
codex_annotation <- data.table::fread("data/cells_annotation.csv") %>% tibble() %>% 
66
  filter(Merged_final!="na") %>% 
67
  add_entity()