446 lines (311 with data), 12.0 kB
---
title: "Integrate T cells"
author: Tobias Roider
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
output:
rmdformats::readthedown:
editor_options:
chunk_output_type: console
---
```{r options, include=FALSE, warning = FALSE}
library(knitr)
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
dpi = 100, cache = FALSE, warning = FALSE)
opts_knit$set(root.dir = "../")
options(bitmapType = "cairo")
```
# Functions and packages
```{r read data}
require(future)
require(future.apply)
source("R/ReadPackages.R")
source("R/Functions.R")
source("R/ThemesColors.R")
#source("R/Helpers.R")
```
# Read meta data
```{r read meta}
df_meta <- read.csv("data/metaData.csv", sep = ",") %>%
filter(!is.na(Run))
df_meta %>% select(PatientID, Entity, Run) %>%
DT::datatable(., options = list(pageLength = 5, autoWidth = TRUE))
```
## Read count tables
```{r read count}
sobjs_T <- lapply(df_meta$PatientID, function(x){
# Read count matrices
rna <- read.delim(paste0("countMatrices/", x, "_Tcells_3'RNA.txt"), sep = " ")
adt <- read.delim(paste0("countMatrices/", x, "_Tcells_ADT.txt"), sep = " ")
# Create Seurat Object
sobj <- CreateSeuratObject(counts = rna)
# Add ADT data
sobj[["ADT"]] <- CreateAssayObject(counts = adt)
DefaultAssay(sobj) <- "RNA"
# Add Percentage of mitochondrial genes and some meta data
sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
sobj$Barcode_full <- colnames(sobj)
sobj$PatientID <- x
meta_tmp <- df_meta %>% filter(PatientID==x)
# Add more meta data
sobj$Entity <- meta_tmp$Entity
sobj$Subtype <- meta_tmp$Subtype
sobj$Age <- meta_tmp$Age
sobj$Sex <- meta_tmp$Sex
sobj$Status <- meta_tmp$Status
sobj$Run <- meta_tmp$Run
return(sobj)
}) %>% `names<-`(df_meta$PatientID)
```
## Process CITE-seq data
```{r process}
sobjs_T <- lapply(sobjs_T, function(sobj){
# Please note that low quality cells (e.g. high mito counts), doublets,
# and non T-cells were already filtered and are not contained in the provided count matrices.
# Thus further filtering is not necessary here.
# In case you need unfiltered raw data, please contact tobias.roider@embl.de
# Normalize data
sobj <- NormalizeData(sobj, normalization.method = "LogNormalize", scale.factor = 10000)
# Run Seurat Processing
sobj <- SeuratProc_T(sobj, verbose=FALSE,
dims.clustering=1:14,
resolution.clustering = 0.4,
dims.umap=1:13)
# Run Processing for ADT data
DefaultAssay(sobj) <- "ADT"
sobj <- NormalizeData(sobj, assay = "ADT", normalization.method = "CLR")
# Run Seurat Processing for ADT part
sobj <- SeuratProcADT_T(sobj, verbose=FALSE,
dims.clustering=1:14,
resolution.clustering = 0.4,
dims.umap=1:13)
DefaultAssay(sobj) <- "RNA"
Idents(sobj) <- "RNA_snn_res.0.4"
return(sobj)
})
```
# Integrate data
## Merge data
```{r merge}
# Merge objects
for(i in 1:length(sobjs_T)) {
if(i==1){
Combined_T <- merge(sobjs_T[[1]], sobjs_T[[2]])
}
if(i>2){
Combined_T <- merge(Combined_T, sobjs_T[[i]])
}
}
```
## Split objects by run
```{r split}
# Split objects again by run to account for most important batch factor
splitted_objects <- SplitObject(Combined_T, split.by = "Run")
```
## Integrate RNA
### Find anchors and integrate data
```{r anchors rna}
anchors <- FindIntegrationAnchors(object.list = splitted_objects,
dims = 1:20,
assay = rep("RNA", length(splitted_objects)))
Combined_T <- IntegrateData(anchorset = anchors,
new.assay.name = "integratedRNA")
DefaultAssay(Combined_T) <- "integratedRNA"
```
### Standard workflow for integrated object
```{r workflow rna}
Combined_T <- ScaleData(Combined_T)
Combined_T <- RunPCA(Combined_T,
reduction.name = "pcaRNA", reduction.key = "pcaRNA_")
Combined_T <- RunUMAP(Combined_T, dims = 1:20, reduction.key = "umapRNA_",
reduction.name = "umapRNA", reduction = "pcaRNA")
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaRNA", dims = 1:20)
Combined_T <- FindClusters(Combined_T, resolution = 0.6)
```
### Visualization
#### Cluster
```{r vis cluster rna, fig.height=5, fig.width=5.5}
DimPlot(Combined_T, reduction = "umapRNA", label = T, raster = T)+
NoLegend()
```
#### PatientID
```{r vis cluster pat, include=F, eval=F}
DimPlot(Combined_T, reduction = "umapRNA", label = F, raster = T, group.by = "PatientID")+
NoLegend()
```
#### Run
```{r vis run run, include=F, eval=F}
DimPlot(Combined_T, reduction = "umapRNA", label = F, raster = T, group.by = "Run")+
NoLegend()
```
## Integrate ADT
### Find anchors and integrate data
```{r anchors adt}
anchors <- FindIntegrationAnchors(object.list = splitted_objects,
dims = 1:20,
assay = rep("ADT", length(splitted_objects)))
Combined_T_ADT <- IntegrateData(anchorset = anchors,
new.assay.name = "integratedADT")
Combined_T[["integratedADT"]] <- Combined_T_ADT[["integratedADT"]]
```
### Standard workflow for integrated object
```{r workflow adt}
DefaultAssay(Combined_T) <- "integratedADT"
# Run the standard workflow for visualization and clustering
Combined_T <- ScaleData(Combined_T)
Combined_T <- RunPCA(Combined_T, npcs = 30, nfeatures.print = 5,
reduction.name = "pcaADT", reduction.key = "pcaADT_")
Combined_T <- RunUMAP(Combined_T, reduction = "pcaADT", dims = 1:20,
reduction.name = "umapADT",
reduction.key = "umapADT_")
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaADT", dims = 1:20)
Combined_T <- FindClusters(Combined_T, resolution = 0.4)
```
### Visualization
#### Cluster
```{r vis cluster adt, fig.height=5, fig.width=5.5}
DimPlot(Combined_T, reduction = "umapADT", label = TRUE, raster = T)+NoLegend()
```
#### PatientID
```{r vis pat adt, include=F, eval=F}
DimPlot(Combined_T, reduction = "umapADT", label = FALSE, raster = T,
group.by = "PatientID")+NoLegend()
```
#### Run
```{r vis run adt, include=F, eval=F}
DimPlot(Combined_T, reduction = "umapADT", label = FALSE, raster = T,
group.by = "Run")+NoLegend()
```
# Identify and refine clusters
## Repeat clutering with higher resolution
```{r clustering high res, fig.height=5, fig.width=5.5}
DefaultAssay(Combined_T) <- "integratedRNA"
Combined_T <- FindClusters(Combined_T, resolution = 1.4)
DimPlot(Combined_T, reduction = "umapRNA", label = T, raster = T,
group.by = "integratedRNA_snn_res.1.4")+NoLegend()
Idents(Combined_T) <- "integratedRNA_snn_res.1.4"
```
# Identify clusters
## Find Markers
```{r identify markers}
Idents(Combined_T) <- "integratedRNA_snn_res.1.4"
clusters <- paste0("cluster_", 0:(length(unique(Idents(Combined_T)))-1))
# Marker
markers_rna <- lapply(clusters, function(x){
z <- as.numeric(gsub(x, pattern = "cluster_", replacement = ""))
df_mark <- FindMarkers(Combined_T, ident.1 = z, assay = "integratedRNA", test.use = "roc") %>%
mutate(avg_diff=round(avg_diff, 3),
avg_log2FC=round(avg_log2FC, 3)) %>%
select(-avg_diff, -pct.1, -pct.2) %>%
rownames_to_column("Gene")
return(df_mark)
}) %>% `names<-`(clusters) %>%
bind_rows(., .id = "Cluster") %>%
mutate(Cluster=substr(Cluster,9,10))
# Marker
markers_adt <- lapply(clusters, function(x){
z <- as.numeric(gsub(x, pattern = "cluster_", replacement = ""))
df_mark <- FindMarkers(Combined_T, ident.1 = z, assay = "integratedADT", test.use = "roc") %>%
mutate(avg_diff=round(avg_diff, 3),
avg_log2FC=round(avg_log2FC, 3)) %>%
select(-avg_diff, -pct.1, -pct.2) %>%
rownames_to_column("Protein")
return(df_mark)
}) %>% `names<-`(clusters) %>%
bind_rows(., .id = "Cluster") %>%
mutate(Cluster=substr(Cluster,9,10))
```
### Show Markers
#### RNA
```{r show markers rna}
DT::datatable(markers_rna, rownames = FALSE, filter = "top",
options = list(pageLength = 10, autoWidth = TRUE))
```
#### ADT
```{r show markers adt}
DT::datatable(markers_adt, rownames = FALSE, filter = "top",
options = list(pageLength = 10, autoWidth = TRUE))
```
# Refine object
## Identify unwanted clusters
```{r refine clusters}
# Remove cluster of with levels of LYZ (--> myeloid cells)
c1 <- markers_rna %>% filter(Gene=="LYZ", myAUC>0.8) %>% pull(Cluster)
# Remove cluster with high levels of mito genes
c2 <- markers_rna %>% group_by(Cluster) %>%
top_n(5, myAUC) %>%
filter(Gene=="MT-CO3") %>%
pull(Cluster)
# Remove cluster with high levels of interferon induced genes
c3 <- markers_rna %>% group_by(Cluster) %>%
top_n(5, myAUC) %>%
filter(Gene=="IFI44L") %>%
pull(Cluster)
# Remove cluster with high levels of interferon induced genes
c4 <- markers_rna %>% group_by(Cluster) %>%
top_n(1, myAUC) %>%
filter(Gene=="ISG15") %>%
pull(Cluster)
# Remove cluster with high levels of heat shock proteins
c5 <- markers_rna %>% group_by(Cluster) %>%
top_n(5, myAUC) %>%
filter(Gene=="HSP90AB1") %>%
pull(Cluster)
clusters_keep <- setdiff(levels(Combined_T$integratedRNA_snn_res.1.4), c(c1,c2,c3,c4,c5))
clusters_keep
```
## Subset object
```{r subset object, fig.height=5, fig.width=5.5}
Combined_T <- subset(Combined_T, idents = clusters_keep)
DimPlot(Combined_T, label = T)+
NoLegend()
Combined_T <- RunUMAP(Combined_T, dims = 1:30, reduction = "pcaRNA",
reduction.name = "umapRNA", reduction.key = "umapRNA_")
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaRNA", dims = 1:20)
Combined_T <- FindClusters(Combined_T, resolution = 1.4)
DimPlot(Combined_T, label = T, group.by = "integratedRNA_snn_res.1.4", raster = T)+
NoLegend()
Idents(Combined_T) <- "IdentI"
```
# Run WNN pipeline
```{r run wnn, fig.height=5, fig.width=5.5}
Combined_T <- FindMultiModalNeighbors(
Combined_T, reduction.list = list("pcaRNA", "pcaADT"), k.nn = 30,
dims.list = list(1:12, 1:20), modality.weight.name = c("RNA.weight", "ADT.weight")
)
Combined_T <- RunUMAP(Combined_T, nn.name = "weighted.nn", reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_", return.model = TRUE)
Combined_T <- FindClusters(Combined_T, graph.name = "wsnn", algorithm = 3, resolution = 0.7)
DimPlot(Combined_T, reduction = 'wnn.umap', group.by = "wsnn_res.0.7", label = T, raster = T)+
NoLegend()
```
# Remove singletons
```{r remove singletons}
Combined_T <- subset(Combined_T, idents = c(0:14))
```
# Re-run WNN pipeline
```{r rerun wnn}
Combined_T <- FindMultiModalNeighbors(
Combined_T, reduction.list = list("pcaRNA", "pcaADT"), k.nn = 30,
dims.list = list(1:15, 1:20), modality.weight.name = c("RNA.weight", "ADT.weight")
)
Combined_T <- RunUMAP(Combined_T, nn.name = "weighted.nn", reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_", return.model = TRUE)
Combined_T <- FindClusters(Combined_T, graph.name = "wsnn", algorithm = 3, resolution = 0.7)
```
# Compare with original object
```{r compare with original, fig.height=5, fig.width=5.5}
Combined_T_or <- readRDS("output/Tcells_Integrated.rds")
DimPlot(AddMetaData(Combined_T, metadata = Idents(Combined_T_or),
col.name = "IdentI_original"),
reduction = 'wnn.umap', group.by = "IdentI_original", label = T, raster = T)+
NoLegend()
```
# Save object
```{r save, eval=F}
saveRDS(Combined_T, file = "output/Tcells_Integrated.rds")
# Output might slightly differ depending on the version and system you use. For exact reproduction of figures please use Seurat Object provided at HeiData.
```
# Session Info
```{r}
sessionInfo()
```