|
a |
|
b/R/ReadData.R |
|
|
1 |
# Reference CITE-seq data |
|
|
2 |
# RNA–seq, epitope and TCR raw and processed data have been deposited in the Gene Expression Omnibus (GEO) under accession codes GSE252608 and GSE252455. |
|
|
3 |
# Or reproduced by IntegrateTcells.Rmd |
|
|
4 |
Combined_T <- readRDS("output/Tcells_Integrated.rds") |
|
|
5 |
|
|
|
6 |
# List of objects containing 5' scRNA data mapped to CITE-seq reference |
|
|
7 |
# Please create with MapTcells_5scRNA.Rmd |
|
|
8 |
sobjs_T_5prime <- readRDS("output/SeuratObjects_T_5'scRNA.rds") |
|
|
9 |
|
|
|
10 |
# Read pseudotime from trajectory analysis |
|
|
11 |
load("output/ttox_pseudotime.RData") |
|
|
12 |
|
|
|
13 |
# Gradient boosting results |
|
|
14 |
# Available in data folder |
|
|
15 |
# Reproducible by GB_surface.Rmd and GB_surfaceplus.Rmd |
|
|
16 |
GBresults_surfaceplus <- readRDS("output/GBresults_Combined_T_surfaceplus.rds") |
|
|
17 |
GBclasses_surfaceplus <- readRDS("output/GBclasses_Combined_T_surfaceplus.rds") |
|
|
18 |
GBresults_surface <- readRDS("output/GBresults_Combined_T_surface.rds") |
|
|
19 |
GBclasses_surface <- readRDS("output/GBclasses_Combined_T_surface.rds") |
|
|
20 |
|
|
|
21 |
# ADT thresholds based on denoised protein expression (TotalVI) |
|
|
22 |
# Reproducible by thresholds_denoised.Rmd |
|
|
23 |
thresh <- read.csv("output/threshholds_denProtein.csv") |
|
|
24 |
|
|
|
25 |
# Sample list |
|
|
26 |
df_meta <- read.csv("data/metaData.csv") |
|
|
27 |
|
|
|
28 |
# Flow cytometry data |
|
|
29 |
df_facs <- read.csv("data/FlowCytometryData.csv") %>% |
|
|
30 |
`colnames<-`(gsub(colnames(.), pattern = ".", replacement = "/", fixed = T)) %>% |
|
|
31 |
pivot_longer(cols = 3:ncol(.), names_to = "Population", values_to = "FACS") |
|
|
32 |
df_ikzf3 <- read_delim("data/IKZF3_add.csv", show_col_types = F, progress = F) |
|
|
33 |
|
|
|
34 |
# Subset to TTOX and add pseudotime |
|
|
35 |
Idents(Combined_T) <- "IdentI" |
|
|
36 |
ttox <- subset(Combined_T, idents=c(3,5,12,16)) |
|
|
37 |
ttox <- AddMetaData(ttox, metadata=ttox_pseudotime, col.name = "Pseudotime") |
|
|
38 |
|
|
|
39 |
# Read CIBERSORTx output (https://cibersortx.stanford.edu/) |
|
|
40 |
df_ttoxcompl_schmitz <- read.csv("data/Cibersortx_Schmitz_FreqTTOX.csv") |
|
|
41 |
df_tfhtreg_schmitz <- read.csv("data/Cibersortx_Schmitz_FreqTFH+TREG.csv") |
|
|
42 |
df_ttoxcompl_chapuy <- read.csv("data/Cibersortx_Chapuy_FreqTTOX.csv") |
|
|
43 |
df_tfhtreg_chapuy <- read.csv("data/Cibersortx_Chapuy_FreqTFH+TREG.csv") |
|
|
44 |
|
|
|
45 |
# External survival data (Schmitz et al. 2018, Chapuy et al. 2018) |
|
|
46 |
# Available at https://www.nejm.org/doi/10.1056/NEJMoa1801445 |
|
|
47 |
# Available at https://www.nature.com/articles/s41591-018-0016-8 |
|
|
48 |
df_surv_schmitz <- read.delim("data/MetaData_Schmitz_2018.txt", na.strings = "") |
|
|
49 |
df_surv_chapuy <- read.delim("data/MetaData_Chapuy_2018.txt", na.strings = "") %>% |
|
|
50 |
filter(!is.na(time_pfs) & !is.na(time_os)) |
|
|
51 |
|
|
|
52 |
# External snv data (Chapuy et al. 2018) |
|
|
53 |
# Available at https://www.nature.com/articles/s41591-018-0016-8 |
|
|
54 |
df_snvs_chapuy <- read.delim("data/SomaticVariants_Chapuy2018.txt", na.strings = "") %>% |
|
|
55 |
pivot_longer(cols = 3:ncol(.), names_to = "PatientID") %>% |
|
|
56 |
left_join(., df_ttoxcompl_chapuy, by="PatientID") %>% |
|
|
57 |
drop_na() |
|
|
58 |
|
|
|
59 |
# Single cell T-cell receptor data |
|
|
60 |
# RNA–seq, epitope and TCR raw and processed data have been deposited in the Gene Expression Omnibus (GEO) under accession codes GSE252608 and GSE252455. |
|
|
61 |
DF_TCRrep <- readTCR(list.files(path = "countMatrices", pattern = "TCRrep", full.names = T)) |
|
|
62 |
|
|
|
63 |
# Read CODEX data (only meta data) |
|
|
64 |
# Available at BioStudies database (https://www.ebi.ac.uk/biostudies/) under accession number S-BIAD565 |
|
|
65 |
codex_annotation <- data.table::fread("data/cells_annotation.csv") %>% tibble() %>% |
|
|
66 |
filter(Merged_final!="na") %>% |
|
|
67 |
add_entity() |