Switch to unified view

a b/analysis/IntegrateTcells.Rmd
1
---
2
title: "Integrate T cells"
3
author: Tobias Roider
4
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
5
output: 
6
  
7
  rmdformats::readthedown: 
8
editor_options: 
9
  chunk_output_type: console
10
---
11
12
```{r options, include=FALSE, warning = FALSE}
13
14
library(knitr)
15
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
16
               dpi = 100, cache = FALSE, warning = FALSE)
17
opts_knit$set(root.dir = "../")
18
options(bitmapType = "cairo")
19
20
```
21
22
# Functions and packages
23
```{r read data}
24
25
require(future)
26
require(future.apply)
27
source("R/ReadPackages.R")
28
source("R/Functions.R")
29
source("R/ThemesColors.R")
30
#source("R/Helpers.R")
31
32
```
33
34
# Read meta data
35
```{r read meta}
36
37
df_meta <- read.csv("data/metaData.csv", sep = ",") %>% 
38
  filter(!is.na(Run))
39
40
df_meta %>% select(PatientID, Entity, Run) %>% 
41
  DT::datatable(., options = list(pageLength = 5, autoWidth = TRUE))
42
43
```
44
45
## Read count tables
46
```{r read count}
47
48
sobjs_T <- lapply(df_meta$PatientID, function(x){
49
  
50
  # Read count matrices
51
  rna <- read.delim(paste0("countMatrices/", x, "_Tcells_3'RNA.txt"), sep = " ")
52
  adt <- read.delim(paste0("countMatrices/", x, "_Tcells_ADT.txt"), sep = " ")
53
  
54
  # Create Seurat Object
55
  sobj <- CreateSeuratObject(counts = rna)
56
  
57
  # Add ADT data
58
  sobj[["ADT"]] <- CreateAssayObject(counts = adt)
59
  DefaultAssay(sobj) <- "RNA"
60
  
61
  # Add Percentage of mitochondrial genes and some meta data
62
  sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
63
  sobj$Barcode_full <- colnames(sobj)
64
  sobj$PatientID <- x
65
  
66
  meta_tmp <- df_meta %>% filter(PatientID==x)
67
  
68
  # Add more meta data
69
  sobj$Entity <- meta_tmp$Entity
70
  sobj$Subtype <- meta_tmp$Subtype
71
  sobj$Age <- meta_tmp$Age
72
  sobj$Sex <- meta_tmp$Sex
73
  sobj$Status <- meta_tmp$Status
74
  sobj$Run <- meta_tmp$Run
75
  
76
  return(sobj)
77
  
78
}) %>% `names<-`(df_meta$PatientID)
79
80
```
81
82
## Process CITE-seq data
83
```{r process}
84
85
sobjs_T <- lapply(sobjs_T, function(sobj){
86
  
87
  # Please note that low quality cells (e.g. high mito counts), doublets, 
88
  # and non T-cells were already filtered and are not contained in the provided count matrices. 
89
  # Thus further filtering is not necessary here.
90
  # In case you need unfiltered raw data, please contact tobias.roider@embl.de
91
  
92
  # Normalize data
93
  sobj <- NormalizeData(sobj, normalization.method = "LogNormalize", scale.factor = 10000)
94
  
95
  # Run Seurat Processing
96
  sobj <- SeuratProc_T(sobj, verbose=FALSE, 
97
                       dims.clustering=1:14, 
98
                       resolution.clustering = 0.4, 
99
                       dims.umap=1:13)
100
  
101
  # Run Processing for ADT data
102
  DefaultAssay(sobj) <- "ADT"
103
  sobj <- NormalizeData(sobj, assay = "ADT", normalization.method = "CLR")
104
  
105
  # Run Seurat Processing for ADT part
106
  sobj <- SeuratProcADT_T(sobj, verbose=FALSE, 
107
                          dims.clustering=1:14, 
108
                          resolution.clustering = 0.4, 
109
                          dims.umap=1:13)
110
  
111
  DefaultAssay(sobj) <- "RNA"
112
  Idents(sobj) <- "RNA_snn_res.0.4"
113
  
114
  return(sobj)
115
  
116
})
117
118
```
119
120
# Integrate data
121
## Merge data
122
```{r merge}
123
124
# Merge objects
125
for(i in 1:length(sobjs_T)) {
126
  if(i==1){
127
    Combined_T <- merge(sobjs_T[[1]], sobjs_T[[2]])
128
  }
129
  if(i>2){
130
    Combined_T <- merge(Combined_T, sobjs_T[[i]])
131
  }
132
}
133
134
```
135
136
## Split objects by run
137
```{r split}
138
139
# Split objects again by run to account for most important batch factor
140
splitted_objects <- SplitObject(Combined_T, split.by = "Run")
141
142
```
143
144
## Integrate RNA
145
### Find anchors and integrate data
146
```{r anchors rna}
147
148
anchors <- FindIntegrationAnchors(object.list = splitted_objects, 
149
                                  dims = 1:20, 
150
                                  assay = rep("RNA", length(splitted_objects)))
151
152
Combined_T <- IntegrateData(anchorset = anchors, 
153
                            new.assay.name = "integratedRNA")
154
155
DefaultAssay(Combined_T) <- "integratedRNA"
156
157
```
158
159
### Standard workflow for integrated object
160
```{r workflow rna}
161
162
Combined_T <- ScaleData(Combined_T) 
163
Combined_T <- RunPCA(Combined_T, 
164
                     reduction.name = "pcaRNA", reduction.key = "pcaRNA_")
165
166
Combined_T <- RunUMAP(Combined_T, dims = 1:20, reduction.key = "umapRNA_",
167
                      reduction.name = "umapRNA", reduction = "pcaRNA")
168
169
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaRNA", dims = 1:20)
170
Combined_T <- FindClusters(Combined_T, resolution = 0.6)
171
172
```
173
174
### Visualization
175
#### Cluster
176
```{r vis cluster rna, fig.height=5, fig.width=5.5}
177
178
DimPlot(Combined_T, reduction = "umapRNA", label = T, raster = T)+
179
  NoLegend()
180
181
```
182
183
#### PatientID
184
```{r vis cluster pat, include=F, eval=F}
185
186
DimPlot(Combined_T, reduction = "umapRNA", label = F, raster = T, group.by = "PatientID")+
187
  NoLegend()
188
189
```
190
191
#### Run
192
```{r vis run run, include=F, eval=F}
193
194
DimPlot(Combined_T, reduction = "umapRNA", label = F, raster = T, group.by = "Run")+
195
  NoLegend()
196
197
```
198
199
## Integrate ADT
200
### Find anchors and integrate data
201
```{r anchors adt}
202
203
anchors <- FindIntegrationAnchors(object.list = splitted_objects, 
204
                                  dims = 1:20, 
205
                                  assay = rep("ADT", length(splitted_objects)))
206
207
Combined_T_ADT <- IntegrateData(anchorset = anchors,
208
                                new.assay.name = "integratedADT")
209
210
Combined_T[["integratedADT"]] <- Combined_T_ADT[["integratedADT"]]
211
212
```
213
214
### Standard workflow for integrated object
215
```{r workflow adt}
216
217
DefaultAssay(Combined_T) <- "integratedADT"
218
219
# Run the standard workflow for visualization and clustering
220
Combined_T <- ScaleData(Combined_T)
221
Combined_T <- RunPCA(Combined_T, npcs = 30, nfeatures.print = 5,
222
                         reduction.name = "pcaADT", reduction.key = "pcaADT_")
223
224
Combined_T <- RunUMAP(Combined_T, reduction = "pcaADT", dims = 1:20, 
225
                          reduction.name = "umapADT", 
226
                          reduction.key = "umapADT_")
227
228
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaADT", dims = 1:20)
229
Combined_T <- FindClusters(Combined_T, resolution = 0.4)
230
231
```
232
233
### Visualization
234
#### Cluster
235
```{r vis cluster adt, fig.height=5, fig.width=5.5}
236
237
DimPlot(Combined_T, reduction = "umapADT", label = TRUE, raster = T)+NoLegend()
238
239
```
240
241
#### PatientID
242
```{r vis pat adt, include=F, eval=F}
243
244
DimPlot(Combined_T, reduction = "umapADT", label = FALSE, raster = T,
245
        group.by = "PatientID")+NoLegend()
246
247
```
248
249
#### Run
250
```{r vis run adt, include=F, eval=F}
251
252
DimPlot(Combined_T, reduction = "umapADT", label = FALSE, raster = T,
253
        group.by = "Run")+NoLegend()
254
255
```
256
257
# Identify and refine clusters
258
## Repeat clutering with higher resolution
259
```{r clustering high res, fig.height=5, fig.width=5.5}
260
261
DefaultAssay(Combined_T) <- "integratedRNA"
262
Combined_T <- FindClusters(Combined_T, resolution = 1.4)
263
DimPlot(Combined_T, reduction = "umapRNA", label = T, raster = T,
264
        group.by = "integratedRNA_snn_res.1.4")+NoLegend()
265
266
Idents(Combined_T) <- "integratedRNA_snn_res.1.4"
267
268
```
269
270
# Identify clusters
271
## Find Markers
272
```{r identify markers}
273
274
Idents(Combined_T) <- "integratedRNA_snn_res.1.4"
275
276
clusters <- paste0("cluster_", 0:(length(unique(Idents(Combined_T)))-1))
277
278
# Marker
279
markers_rna <- lapply(clusters, function(x){
280
  z <- as.numeric(gsub(x, pattern = "cluster_", replacement = ""))
281
  
282
  df_mark <- FindMarkers(Combined_T, ident.1 = z, assay = "integratedRNA", test.use = "roc") %>% 
283
    mutate(avg_diff=round(avg_diff, 3),
284
           avg_log2FC=round(avg_log2FC, 3)) %>% 
285
    select(-avg_diff, -pct.1, -pct.2) %>%
286
    rownames_to_column("Gene")
287
  
288
  return(df_mark)
289
  }) %>% `names<-`(clusters) %>% 
290
  bind_rows(., .id = "Cluster") %>% 
291
  mutate(Cluster=substr(Cluster,9,10))
292
293
# Marker
294
markers_adt <- lapply(clusters, function(x){
295
  z <- as.numeric(gsub(x, pattern = "cluster_", replacement = ""))
296
  
297
  df_mark <- FindMarkers(Combined_T, ident.1 = z, assay = "integratedADT", test.use = "roc") %>% 
298
    mutate(avg_diff=round(avg_diff, 3),
299
           avg_log2FC=round(avg_log2FC, 3)) %>% 
300
    select(-avg_diff, -pct.1, -pct.2) %>% 
301
    rownames_to_column("Protein")
302
  
303
  return(df_mark)
304
  }) %>% `names<-`(clusters) %>% 
305
  bind_rows(., .id = "Cluster") %>% 
306
  mutate(Cluster=substr(Cluster,9,10))
307
308
```
309
310
### Show Markers
311
#### RNA
312
```{r show markers rna}
313
DT::datatable(markers_rna, rownames = FALSE, filter = "top", 
314
              options = list(pageLength = 10, autoWidth = TRUE))
315
316
317
```
318
319
#### ADT
320
```{r show markers adt}
321
322
DT::datatable(markers_adt, rownames = FALSE, filter = "top", 
323
              options = list(pageLength = 10, autoWidth = TRUE))
324
325
```
326
327
# Refine object
328
## Identify unwanted clusters
329
```{r refine clusters}
330
# Remove cluster of with levels of LYZ (--> myeloid cells)
331
c1 <- markers_rna %>% filter(Gene=="LYZ", myAUC>0.8) %>% pull(Cluster)
332
333
# Remove cluster with high levels of mito genes
334
c2 <- markers_rna %>% group_by(Cluster) %>%  
335
  top_n(5, myAUC) %>% 
336
  filter(Gene=="MT-CO3") %>% 
337
  pull(Cluster)
338
  
339
# Remove cluster with high levels of interferon induced genes
340
c3 <- markers_rna %>% group_by(Cluster) %>%  
341
  top_n(5, myAUC) %>% 
342
  filter(Gene=="IFI44L") %>% 
343
  pull(Cluster)
344
345
# Remove cluster with high levels of interferon induced genes
346
c4 <- markers_rna %>% group_by(Cluster) %>%  
347
  top_n(1, myAUC) %>% 
348
  filter(Gene=="ISG15") %>% 
349
  pull(Cluster)
350
351
# Remove cluster with high levels of heat shock proteins
352
c5 <- markers_rna %>% group_by(Cluster) %>%  
353
  top_n(5, myAUC) %>% 
354
  filter(Gene=="HSP90AB1") %>% 
355
  pull(Cluster)
356
357
clusters_keep <- setdiff(levels(Combined_T$integratedRNA_snn_res.1.4), c(c1,c2,c3,c4,c5))
358
clusters_keep
359
360
```
361
362
## Subset object
363
```{r subset object, fig.height=5, fig.width=5.5}
364
365
Combined_T <- subset(Combined_T, idents = clusters_keep)
366
DimPlot(Combined_T, label = T)+
367
  NoLegend()
368
369
Combined_T <- RunUMAP(Combined_T, dims = 1:30, reduction = "pcaRNA",  
370
                               reduction.name = "umapRNA", reduction.key = "umapRNA_")
371
Combined_T <- FindNeighbors(Combined_T, reduction = "pcaRNA", dims = 1:20)
372
Combined_T <- FindClusters(Combined_T, resolution = 1.4)
373
374
DimPlot(Combined_T, label = T, group.by = "integratedRNA_snn_res.1.4", raster = T)+
375
  NoLegend()
376
377
Idents(Combined_T) <- "IdentI"
378
379
```
380
381
# Run WNN pipeline
382
```{r run wnn, fig.height=5, fig.width=5.5}
383
384
Combined_T <- FindMultiModalNeighbors(
385
  Combined_T, reduction.list = list("pcaRNA", "pcaADT"), k.nn = 30,
386
  dims.list = list(1:12, 1:20), modality.weight.name = c("RNA.weight", "ADT.weight")
387
)
388
389
Combined_T <- RunUMAP(Combined_T, nn.name = "weighted.nn", reduction.name = "wnn.umap", 
390
                      reduction.key = "wnnUMAP_", return.model = TRUE)
391
392
Combined_T <- FindClusters(Combined_T, graph.name = "wsnn", algorithm = 3, resolution = 0.7)
393
394
DimPlot(Combined_T, reduction = 'wnn.umap', group.by = "wsnn_res.0.7", label = T, raster = T)+
395
  NoLegend()
396
397
```
398
399
# Remove singletons
400
```{r remove singletons}
401
402
Combined_T <- subset(Combined_T, idents = c(0:14))
403
404
```
405
406
# Re-run WNN pipeline
407
```{r rerun wnn}
408
409
Combined_T <- FindMultiModalNeighbors(
410
  Combined_T, reduction.list = list("pcaRNA", "pcaADT"), k.nn = 30, 
411
  dims.list = list(1:15, 1:20), modality.weight.name = c("RNA.weight", "ADT.weight")
412
)
413
414
Combined_T <- RunUMAP(Combined_T, nn.name = "weighted.nn", reduction.name = "wnn.umap", 
415
                      reduction.key = "wnnUMAP_", return.model = TRUE)
416
Combined_T <- FindClusters(Combined_T, graph.name = "wsnn", algorithm = 3, resolution = 0.7)
417
418
```
419
420
# Compare with original object
421
```{r compare with original, fig.height=5, fig.width=5.5}
422
423
Combined_T_or <- readRDS("output/Tcells_Integrated.rds")
424
DimPlot(AddMetaData(Combined_T, metadata = Idents(Combined_T_or), 
425
                    col.name = "IdentI_original"), 
426
        reduction = 'wnn.umap', group.by = "IdentI_original", label = T, raster = T)+
427
  NoLegend()
428
429
```
430
431
# Save object
432
```{r save, eval=F}
433
434
saveRDS(Combined_T, file = "output/Tcells_Integrated.rds")
435
436
# Output might slightly differ depending on the version and system you use. For exact reproduction of figures please use Seurat Object provided at HeiData. 
437
438
```
439
440
# Session Info
441
```{r}
442
443
sessionInfo()
444
445
```