Switch to unified view

a b/TcellAnalysis/notebooks/COVID_Tcell_annotations.Rmd
1
---
2
title: "COVID19: T cell annotation"
3
output: html_notebook
4
---
5
6
Annotating T cells subtypes.
7
8
```{r, warning=FALSE, message=FALSE}
9
library(ggplot2)
10
library(scran)
11
library(ggsci)
12
library(cowplot)
13
library(scattermore)
14
library(ComplexHeatmap)
15
library(RColorBrewer)
16
library(pheatmap)
17
library(scater)
18
library(reshape2)
19
library(colorspace)
20
library(dplyr)
21
library(ggthemes)
22
library(edgeR)
23
library(scales)
24
library(ggrepel)
25
library(viridis)
26
library(MASS)
27
```
28
29
I've taken all of the full clusters and selected the ones that I think contain T cells: 1, 6, 7, 8, 10 & 13.
30
31
```{r, warning=FALSE, message=FALSE}
32
tcell.umap <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_UMAPs.tsv",
33
                         sep="\t", header=TRUE, stringsAsFactors=FALSE)
34
# fix the Sanger barcodes
35
tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")] <- gsub(tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")],
36
                                                               pattern="^(CV[0-9]+_[A-Z0-9]+-CV[0-9]+_[A-Z0-9]+)_", replacement="")
37
38
tcell.clusters <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv",
39
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
40
41
# tcell.meta <- merge(tcell.umap, tcell.clusters, by='CellID')
42
```
43
44
45
```{r, warning=FALSE, message=FALSE}
46
all.meta <- read.table("~/Dropbox/COVID19/Data/COVID19_scMeta-data.tsv",
47
                       sep="\t", header=TRUE, stringsAsFactors=FALSE)
48
rownames(all.meta) <- all.meta$CellID
49
50
# remove BGCV01_CV0209 and CV0198
51
all.meta <- all.meta[!all.meta$sample_id %in% c("BGCV01_CV0902"), ]
52
all.meta <- all.meta[!all.meta$patient_id %in% c("CV0198"), ]
53
```
54
55
To normalise the cell type proportions, I need to know how many cells were captured for each patient-sample combo.
56
57
```{r, warning=FALSE, message=FALSE}
58
n.cell.vecc <- table(all.meta$sample_id)
59
```
60
61
62
63
```{r, warning=FALSE, message=FALSE}
64
tcell.merge <- merge(tcell.clusters, all.meta, by='CellID')
65
tcell.merge <- merge(tcell.merge, tcell.umap, by='CellID')
66
```
67
68
69
```{r}
70
gene.clust <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells-cluster_gene-Ave.tsv",
71
                         sep="\t", header=TRUE, stringsAsFactors=FALSE)
72
73
protein.clust <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells-cluster_protein-Ave.tsv",
74
                            sep="\t", header=TRUE, stringsAsFactors=FALSE)
75
```
76
77
78
```{r, warning=FALSE, message=FALSE}
79
plot.proteins <- unique(c("CD19", "CD86", "CD11b", "CD56", "CD8", "CD4", "CD3", "CD16", "CD14", "CD335", "CD158", "SIGLEC7",
80
                          "FCER2", "TCR", "CTLA4", "CXCR5",
81
                          "CX3CR1", "CD79b", "CD20", "CD1c", "CD25", "CD45RO", "HLA-DR", "CD21", "CD5", "CD64", "CD127",
82
                          "CD34", "CD40", "CD123", "CD27", "CD1c", "TCR_Va7.2", "TCR_Vg2", "CD235ab", "CD10", "CD2", "CD71",
83
                          "TCR_Vg9", "TCR_Va24-Ja18", "CCR7", "CD45RA", "CD62L", "CD138", "CD38", "CD274", "CD279", "TIGIT"))
84
85
plot.genes <- unique(c("CD3G", "CD3E", "FCGR3A", "CD8A", "CD8B", "CD4", "CD80", "CD86", "CXCL10", "HLA.DRA", "HLA.DRB1", "VEGFA",
86
                       "CCR7", "CCR2", "CCR5", "CXCR3", "CD36", "FOXP3", "MS4A1", "LYZ", "GZMB", "CD68", "GATA3", "PECAM1", "RORC", 
87
                       "FCGR2A", "FCGR2B", "FCGR3B", "TPO", "HBB", "NCR1", "NCR2", "F8", "HIF1A", "CTLA4", "CD28", "CXCR1",
88
                       "PDCD1", "TIM3", "LAG3", "TYMS", "PITX1", "TIGIT", "CXCL13", "BCL6",
89
                       "ITGA2B", "ITGAX", "TOX", "FYN", "NCAM1", "LRRC32", "SELL", "CXCR5", "CSF1", "IL7R", "CD40LG", "MYC", "MKI67",
90
                       "IFNG", "IL6", "THRB", "CD19", "CD38", "CD27", "F2", "F5", "THRB", "CD44", "IKZF2", "IKZF4"))
91
```
92
93
94
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
95
gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1,
96
                        FUN=function(XP) XP/max(XP)))
97
gene.plot.hm[is.na(gene.plot.hm)] <- 0
98
colnames(gene.plot.hm) <- gene.clust$GeneID
99
100
pdf("~/Dropbox/COVID19/plot.dir/Gene_heatmap-Tcells.pdf",
101
    height=10.95, width=9.95, useDingbats=FALSE)
102
Heatmap(gene.plot.hm, name="Z-score")
103
dev.off()
104
105
Heatmap(gene.plot.hm, name="Z-score")
106
```
107
108
I'll ignore the clusters that are clearly not T-cells, like any residual monocytes, B cell and NK cells, etc.
109
110
```{r, warning=FALSE, message=FALSE}
111
table(tcell.merge$Site, tcell.merge$Sub.Cluster)
112
```
113
114
115
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
116
rownames(protein.clust) <- protein.clust$Protein
117
protein.plot.hm <- protein.clust[intersect(protein.clust$Protein, plot.proteins), 
118
                                 setdiff(colnames(protein.clust), "Protein")]
119
#protein.plot.hm[protein.plot.hm < 0] <- 0
120
protein.plot.hm <- t(apply(protein.plot.hm, 1, FUN=function(XP) (XP - mean(XP))/sd(XP)))
121
122
protein.plot.hm[is.na(protein.plot.hm)] <- 0
123
124
rownames(protein.plot.hm) <- intersect(protein.clust$Protein, plot.proteins)
125
126
# pdf("~/Dropbox/COVID19/plot.dir/protein_heatmap-celltypes.pdf",
127
#     height=10.95, width=9.95, useDingbats=FALSE)
128
# Heatmap(protein.plot.hm, name="Z-score")
129
# dev.off()
130
131
Heatmap(protein.plot.hm, name="Z-score")
132
```
133
134
135
136
```{r}
137
tcell.merge$Annotation <- "Unknown"
138
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(11)] <- "PoorQC"
139
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(14, 19)] <- "NK"
140
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(8)] <- "Doublets:Bcell"
141
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(16)] <- "Doublets:Platelet"
142
143
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(17)] <- "ILCs"
144
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(12)] <- "gdT"
145
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(23, 24)] <- "CD8.TE"
146
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(7)] <- "CD8.EM"
147
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(22)] <- "Tfh-like"
148
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(3)] <- "Treg"
149
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(15)] <- "MAIT"
150
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(5, 6, 9)] <- "CD4.Naive"
151
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(1, 20)] <- "CD8.Naive"
152
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(2, 4, 10, 11, 21)] <- "CD4.CM"
153
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(13, 25)] <- "Exhausted"
154
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(18)] <- "Proliferating"
155
156
table(tcell.merge$Annotation, tcell.merge$Sub.Cluster)
157
```
158
159
```{r, warning=FALSE, message=FALSE}
160
table(tcell.merge$Annotation, tcell.merge$Site)
161
```
162
163
164
```{r, fig.height=9.75, fig.width=10.95}
165
n.cells <- length(unique(tcell.merge$Sub.Cluster))
166
cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells)
167
names(cell.cols) <- unique(tcell.merge$Sub.Cluster)
168
169
tcell.merge$Sub.Cluster <- factor(tcell.merge$Sub.Cluster,
170
                                  levels=c(1:25))
171
172
ggplot(tcell.merge, 
173
       aes(x=MNN.UMAP1, y=MNN.UMAP2, colour=Sub.Cluster)) +
174
    geom_scattermore() +
175
    theme_cowplot() +
176
    facet_wrap(~Annotation) +
177
    scale_colour_manual(values=cell.cols) +
178
    guides(colour=guide_legend(override.aes = list(size=3)))
179
```
180
181
Plot the markers genes for these groups: CD4, CD8, TCR V$\alpha$7.2, TCR V$\gamma$9, TCR V$\gamma$2, CD45RA, CCR7, CD45RO, CD27, CD28,
182
CTLA4, HLA-DR, FOXP3, CXCR5, GZMB, IFNG, TOX, PDCD1.
183
184
I think these annotations look OK.
185
186
```{r, warning=FALSE, message=FALSE}
187
# read in the gene expression
188
goi.df <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells_GeneGOI.tsv",
189
                     sep="\t", header=TRUE, stringsAsFactors=FALSE)
190
191
tcell.goi.merge <- merge(tcell.merge, goi.df, by='CellID')
192
```
193
194
195
```{r, warning=FALSE, message=FALSE}
196
# re-classify CD8.TE with high NCAM1 * FCGR3A expression
197
tcell.goi.merge$Annotation[tcell.goi.merge$NCAM1 > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") &
198
                               tcell.goi.merge$TRAV1.2 > 0] <- "NKT"
199
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") &
200
                               tcell.goi.merge$TRAV1.2 > 0] <- "NKT"
201
```
202
203
Remove doublets based on expression of markers for different cell lineages.
204
205
```{r}
206
# re-classify CD8.TE with high NCR1 * FCGR3A expression
207
bcell.markers <- tcell.goi.merge$IGHM > 0 | tcell.goi.merge$IGHG3 > 0 | tcell.goi.merge$IGHD > 0 | tcell.goi.merge$IGHG4 > 0 |
208
    tcell.goi.merge$IGHG2 > 0 | tcell.goi.merge$IGHG1 > 0 | tcell.goi.merge$IGHA1 > 0
209
210
tcell.goi.merge$Annotation[bcell.markers] <- "Doublets:Bcell"
211
212
table(tcell.goi.merge$Annotation, tcell.goi.merge$Site)
213
```
214
215
216
217
```{r, warning=FALSE, message=FALSE}
218
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.goi.merge)
219
rownames(clust.model.matrix) <- tcell.goi.merge$CellID
220
221
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))])
222
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
223
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Annotation", replacement="")
224
225
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0)
226
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X)))
227
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0
228
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Annotation", replacement="")
229
```
230
231
232
```{r, message=FALSE, warning=FALSE}
233
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP)))
234
gene.ave.label.df$Annotation <- rownames(gene.ave.label.df)
235
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Annotation')
236
colnames(gene.ave.label.melt) <- c("Annotation", "Gene", "AveExprs")
237
238
gene.bin.label.df <- gene.bin.by.label
239
gene.bin.label.df$Annotation <- rownames(gene.bin.label.df)
240
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Annotation')
241
colnames(gene.bin.label.melt) <- c("Annotation", "Gene", "PropExprs")
242
243
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Annotation', 'Gene'))
244
dot.plot.df$Gene <- as.character(dot.plot.df$Gene)
245
```
246
247
248
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25}
249
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG",
250
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "NCAM1",
251
                    "CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7")
252
253
ggplot(dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ], 
254
       aes(x=Gene, y=Annotation, colour=AveExprs, size=PropExprs*100)) +
255
    geom_point() +
256
    theme_cowplot() +
257
    scale_colour_gradient(low='grey70', high='blue') +
258
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'),
259
          axis.text.y=element_text(colour='black'),
260
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
261
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
262
    guides(colour=guide_colorbar(title='Mean Expression'),
263
           size=guide_legend(title='% Express')) +
264
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_dotplot-genes.pdf",
265
           height=5.15, width=8.25, useDingbats=FALSE)
266
```
267
268
Do the same for the proteins.
269
270
```{r, warning=FALSE, message=FALSE}
271
# read in the gene expression
272
prot.df <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells_ProteinGOI.tsv",
273
                     sep="\t", header=TRUE, stringsAsFactors=FALSE)
274
colnames(prot.df) <- c(paste0("Prot.", colnames(prot.df)[!colnames(prot.df) %in% c("CellID")]), "CellID")
275
276
prot.df$Site <- "Ncl"
277
prot.df$Site[grepl(prot.df$CellID, pattern="BGCV")] <- "Cambridge"
278
prot.df$Site[grepl(prot.df$CellID, pattern="^S[0-9]+")] <- "Sanger"
279
tcell.prot.merge <- merge(tcell.goi.merge, prot.df, by=c('CellID', 'Site'))
280
```
281
282
283
```{r, warning=FALSE, message=FALSE}
284
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.prot.merge)
285
rownames(clust.model.matrix) <- tcell.prot.merge$CellID
286
287
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
288
                                                                              pattern="\\.x", replacement="")])
289
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
290
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Annotation", replacement="")
291
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
292
293
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
294
                                                                              pattern="\\.x", replacement="")] > 0)
295
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
296
bin.by.label[is.na(bin.by.label)] <- 0
297
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Annotation", replacement="")
298
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
299
```
300
301
302
```{r, message=FALSE, warning=FALSE}
303
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP))))
304
ave.label.df$Annotation <- rownames(ave.label.df)
305
ave.label.melt <- melt(ave.label.df, id.vars='Annotation')
306
colnames(ave.label.melt) <- c("Annotation", "Protein", "AveExprs")
307
308
bin.label.df <- bin.by.label
309
bin.label.df$Annotation <- rownames(bin.label.df)
310
bin.label.melt <- melt(bin.label.df, id.vars='Annotation')
311
colnames(bin.label.melt) <- c("Annotation", "Protein", "PropExprs")
312
313
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Annotation', 'Protein'))
314
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein)
315
```
316
317
318
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25}
319
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
320
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "HAVCR2",
321
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
322
323
ggplot(protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ], 
324
       aes(x=Protein, y=Annotation, colour=AveExprs, size=PropExprs*100)) +
325
    geom_point() +
326
    theme_cowplot() +
327
    scale_colour_gradient(low='grey70', high=darken('red')) +
328
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'),
329
          axis.text.y=element_text(colour='black'),
330
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
331
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
332
    guides(colour=guide_colorbar(title='Mean Expression'),
333
           size=guide_legend(title='% Express')) +
334
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_dotplot-protein.pdf",
335
           height=5.15, width=8.25, useDingbats=FALSE)
336
```
337
338
339
Write out the exhausted, proliferating and Tfh-like clusters for sub-clustering.
340
341
```{r}
342
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Proliferating")],
343
            file="~/Dropbox/COVID19/Data/Proliferating_barcodes.tsv",
344
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
345
346
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Tfh-like")],
347
            file="~/Dropbox/COVID19/Data/Tfh_barcodes.tsv",
348
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
349
350
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Exhausted")],
351
            file="~/Dropbox/COVID19/Data/Exhausted_barcodes.tsv",
352
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
353
```
354
355
### Tfh sub-anntotating
356
357
Read in sub-clusters and annotate.
358
359
```{r, warning=FALSE, message=FALSE}
360
tfh.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Tfh_clusters.tsv",
361
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
362
tfh.subcluster.merge <- merge(tfh.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID')
363
table(tfh.subcluster.merge$Sub.Cluster)
364
```
365
366
Plot average expression.
367
368
```{r, warning=FALSE, message=FALSE}
369
tfh.subcluster.merge$Sub.Cluster <- as.character(tfh.subcluster.merge$Sub.Cluster)
370
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=tfh.subcluster.merge)
371
rownames(clust.model.matrix) <- tfh.subcluster.merge$CellID
372
373
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
374
                                                                              pattern="\\.x", replacement="")])
375
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
376
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
377
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
378
379
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
380
                                                                              pattern="\\.x", replacement="")] > 0)
381
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
382
bin.by.label[is.na(bin.by.label)] <- 0
383
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
384
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
385
```
386
387
388
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
389
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG",
390
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4",
391
                    "IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL",  "IGHE", "CD19", "MS4A1", "HBB", "PECAM1", "TPO", "THRB",
392
                    "CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7")
393
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1,
394
                        FUN=function(XP) XP/max(XP)))
395
gene.plot.hm[is.na(gene.plot.hm)] <- 0
396
#colnames(gene.plot.hm) <- gene.clust$GeneID
397
398
Heatmap(gene.plot.hm, name="Z-score")
399
```
400
401
402
403
```{r, warning=FALSE, message=FALSE}
404
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=tfh.subcluster.merge)
405
rownames(clust.model.matrix) <- tfh.subcluster.merge$CellID
406
407
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
408
                                                                              pattern="\\.x", replacement="")])
409
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
410
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
411
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
412
413
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
414
                                                                              pattern="\\.x", replacement="")] > 0)
415
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
416
bin.by.label[is.na(bin.by.label)] <- 0
417
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
418
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
419
```
420
421
422
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
423
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
424
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20",
425
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
426
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2,
427
                        FUN=function(XP) (XP - min(XP))/max((XP - min(XP)))))
428
prot.plot.hm[is.na(prot.plot.hm)] <- 0
429
#colnames(gene.plot.hm) <- gene.clust$GeneID
430
431
Heatmap(prot.plot.hm, name="Z-score")
432
```
433
434
435
```{r}
436
# not Tfh
437
tfh.subcluster.merge$Sub.Annotation <- "Tfh-like"
438
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(9)] <- "Treg"
439
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(14)] <- "NKT"
440
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(1)] <- "gdT"
441
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(2, 3, 16)] <- "PD1-.Tfh-like"
442
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(11, 15)] <- "PD1+.Tfh-like"
443
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(6)] <- "CD8.EM"
444
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(7)] <- "CD4.EM"
445
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(8)] <- "CD4.CM"
446
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(13)] <- "NK"
447
448
449
450
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(4, 5, 10, 12)] <- "Doublets"
451
rownames(tfh.subcluster.merge) <- tfh.subcluster.merge$CellID
452
table(tfh.subcluster.merge$Sub.Annotation, tfh.subcluster.merge$Sub.Cluster)
453
```
454
455
456
### Proliferating sub-anntotating
457
458
Read in sub-clusters and annotate.
459
460
```{r, warning=FALSE, message=FALSE}
461
prolif.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_ProlifTcell_clusters.tsv",
462
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
463
prolif.subcluster.merge <- merge(prolif.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID')
464
table(prolif.subcluster.merge$Sub.Cluster)
465
```
466
467
Plot average expression.
468
469
```{r, warning=FALSE, message=FALSE}
470
prolif.subcluster.merge$Sub.Cluster <- as.character(prolif.subcluster.merge$Sub.Cluster)
471
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=prolif.subcluster.merge)
472
rownames(clust.model.matrix) <- prolif.subcluster.merge$CellID
473
474
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
475
                                                                              pattern="\\.x", replacement="")])
476
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
477
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
478
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
479
480
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
481
                                                                              pattern="\\.x", replacement="")] > 0)
482
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
483
bin.by.label[is.na(bin.by.label)] <- 0
484
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
485
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
486
```
487
488
489
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
490
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG",
491
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4",
492
                    "IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL",  "IGHE", "CD19", "MS4A1", "HBB", "PECAM1", "TPO", "THRB",
493
                    "CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7")
494
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1,
495
                        FUN=function(XP) XP/max(XP)))
496
gene.plot.hm[is.na(gene.plot.hm)] <- 0
497
#colnames(gene.plot.hm) <- gene.clust$GeneID
498
499
Heatmap(gene.plot.hm, name="Z-score")
500
```
501
502
503
504
```{r, warning=FALSE, message=FALSE}
505
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=prolif.subcluster.merge)
506
rownames(clust.model.matrix) <- prolif.subcluster.merge$CellID
507
508
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
509
                                                                              pattern="\\.x", replacement="")])
510
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
511
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
512
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
513
514
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
515
                                                                              pattern="\\.x", replacement="")] > 0)
516
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
517
bin.by.label[is.na(bin.by.label)] <- 0
518
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
519
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
520
```
521
522
523
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
524
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
525
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20",
526
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
527
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2,
528
                        FUN=function(XP) (XP - min(XP))/max((XP - min(XP)))))
529
prot.plot.hm[is.na(prot.plot.hm)] <- 0
530
#colnames(gene.plot.hm) <- gene.clust$GeneID
531
532
Heatmap(prot.plot.hm, name="Z-score")
533
```
534
535
536
```{r, warning=FALSE, message=FALSE}
537
# not Tfh
538
prolif.subcluster.merge$Sub.Annotation <- "Proliferating"
539
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(2, 12)] <- "Doublets:Bcell"
540
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(1)] <- "PD1-.Tfh-like"
541
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(4, 10)] <- "CD4.Naive"
542
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(5, 9)] <- "NK"
543
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(6)] <- "gdT"
544
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(7)] <- "MAIT"
545
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(11)] <- "CD4.CM"
546
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(3, 13)] <- "CD4.EM"
547
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(8)] <- "CD8.EM"
548
549
550
rownames(prolif.subcluster.merge) <- prolif.subcluster.merge$CellID
551
table(prolif.subcluster.merge$Sub.Annotation, prolif.subcluster.merge$Sub.Cluster)
552
```
553
554
### Exhausted sub-anntotating
555
556
Read in sub-clusters and annotate.
557
558
```{r, warning=FALSE, message=FALSE}
559
exhaust.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Exhausted_clusters.tsv",
560
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
561
exhaust.subcluster.merge <- merge(exhaust.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID')
562
table(exhaust.subcluster.merge$Sub.Cluster)
563
```
564
565
Plot average expression.
566
567
```{r, warning=FALSE, message=FALSE}
568
exhaust.subcluster.merge$Sub.Cluster <- as.character(exhaust.subcluster.merge$Sub.Cluster)
569
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=exhaust.subcluster.merge)
570
rownames(clust.model.matrix) <- exhaust.subcluster.merge$CellID
571
572
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
573
                                                                              pattern="\\.x", replacement="")])
574
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
575
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
576
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
577
578
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)),
579
                                                                              pattern="\\.x", replacement="")] > 0)
580
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
581
bin.by.label[is.na(bin.by.label)] <- 0
582
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
583
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
584
```
585
586
587
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
588
dot.plot.genes <- c("CD3E", "CD3G", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", "TIGIT",
589
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4", "GARP",
590
                    "IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL", "CD11b", "ITGAX", "ITGAM", "FAS", "PECAM1", "HBB", "IGHE",
591
                    "CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7", "IL17A")
592
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1,
593
                        FUN=function(XP) XP/max(XP)))
594
gene.plot.hm[is.na(gene.plot.hm)] <- 0
595
#colnames(gene.plot.hm) <- gene.clust$GeneID
596
597
Heatmap(gene.plot.hm, name="Z-score")
598
```
599
600
601
602
```{r, warning=FALSE, message=FALSE}
603
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=exhaust.subcluster.merge)
604
rownames(clust.model.matrix) <- exhaust.subcluster.merge$CellID
605
606
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
607
                                                                              pattern="\\.x", replacement="")])
608
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
609
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="")
610
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
611
612
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
613
                                                                              pattern="\\.x", replacement="")] > 0)
614
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
615
bin.by.label[is.na(bin.by.label)] <- 0
616
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="")
617
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
618
```
619
620
621
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
622
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
623
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20", "CD11b", "CD14", "FAS",
624
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
625
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2,
626
                        FUN=function(XP) (XP - min(XP))/max((XP - min(XP)))))
627
prot.plot.hm[is.na(prot.plot.hm)] <- 0
628
#colnames(gene.plot.hm) <- gene.clust$GeneID
629
630
Heatmap(prot.plot.hm, name="Z-score")
631
```
632
633
634
```{r, warning=FALSE, message=FALSE}
635
# not Tfh
636
exhaust.subcluster.merge$Sub.Annotation <- "Exhausted"
637
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(2, 4, 9, 10)] <- "Doublets:Bcell"
638
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(14)] <- "Doublets"
639
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(15, 17, 20)] <- "NKT"
640
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(3, 4, 7, 11, 13, 27)] <- "CD4.Naive"
641
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(5, 16, 18, 23)] <- "PD1-.Tfh-like"
642
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(1)] <- "PD1+.Tfh-like"
643
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(6)] <- "CD4.CM"
644
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(25)] <- "CD4.EM"
645
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(8)] <- "Treg"
646
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(12, 19, 21, 22, 24)] <- "gdT"
647
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(26)] <- "NK"
648
649
650
rownames(exhaust.subcluster.merge) <- exhaust.subcluster.merge$CellID
651
table(exhaust.subcluster.merge$Sub.Annotation, exhaust.subcluster.merge$Sub.Cluster)
652
```
653
654
Fill in the sub-annotations.
655
656
```{r, warning=FALSE, message=FALSE}
657
rownames(tcell.prot.merge) <- tcell.prot.merge$CellID
658
tcell.prot.merge$Sub.Annotation <- tcell.prot.merge$Annotation
659
tcell.prot.merge[rownames(tfh.subcluster.merge), ]$Sub.Annotation <- tfh.subcluster.merge$Sub.Annotation
660
tcell.prot.merge[rownames(prolif.subcluster.merge), ]$Sub.Annotation <- prolif.subcluster.merge$Sub.Annotation
661
tcell.prot.merge[rownames(exhaust.subcluster.merge), ]$Sub.Annotation <- exhaust.subcluster.merge$Sub.Annotation
662
663
# If we use the mRNA, there are A LOT of DP & DN T cells.
664
665
# re-classify CD4s within CD8 clusters
666
tcell.prot.merge$Sub.Annotation[(tcell.prot.merge$CD8A > 0 | tcell.prot.merge$CD8B > 0) &
667
                                    tcell.prot.merge$CD4 <= 0 &
668
                                    tcell.prot.merge$CXCR5 <= 0 &
669
                                    tcell.prot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like")] <- "CD8.TE"
670
671
# final doublet classification
672
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$IGHA1 > 0 | tcell.prot.merge$IGHD > 0 | tcell.prot.merge$IGHE > 0 | tcell.prot.merge$IGHG1 > 0 |
673
                                    tcell.prot.merge$IGHG2 > 0 | tcell.prot.merge$IGHG3 > 0 | tcell.prot.merge$IGHG4 > 0] <- "Doublets:Bcell"
674
675
# final NKT classification
676
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$FCGR3A > 0 & tcell.prot.merge$NCAM1 > 0 &  
677
                                    tcell.prot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT"
678
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$FCGR3A > 0 & tcell.prot.merge$NCR1 > 0 & 
679
                                    tcell.prot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT"
680
681
# fix the Treg annotations
682
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg") &
683
                                    !(tcell.prot.merge$FOXP3 > 0 & tcell.prot.merge$IKZF2 > 0)] <- "CD4.IL22"
684
685
# Add in a Th17 phenotype
686
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") &
687
                                   (tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) &
688
                                    tcell.prot.merge$RORC > 0 &
689
                                    (tcell.prot.merge$IL17A > 0 | 
690
                                         tcell.prot.merge$IL17F > 0 | tcell.prot.merge$IL21 > 0)] <- "CD4.Th17"
691
692
# Add in a Th1 phenotype
693
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") &
694
                                    (tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) &
695
                                    (tcell.prot.merge$IL2 > 0 | tcell.prot.merge$TNF > 0 | 
696
                                         tcell.prot.merge$LTA) &
697
                                    tcell.prot.merge$TBX21 > 0] <- "CD4.Th1"
698
699
# Add in a Th2 phenotype
700
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") &
701
                                    (tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) &
702
                                    tcell.prot.merge$GATA3 > 0 &
703
                                    (tcell.prot.merge$IL4 > 0 | tcell.prot.merge$IL5 > 0)] <- "CD4.Th2"
704
705
# merge the Tfh annotation
706
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like")] <- "CD4.Tfh"
707
708
# Add in the proliferating/activated/exhausted annotation
709
active.tcells <- read.csv("~/Dropbox/COVID19/Data/prolif_CD4CD8.csv",
710
                          header=FALSE, stringsAsFactors=FALSE)
711
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD4")]] <- "CD4.Prolif"
712
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD8")]] <- "CD8.Prolif"
713
714
table(tcell.prot.merge$Sub.Annotation, tcell.prot.merge$Site)
715
```
716
717
718
719
```{r, warning=FALSE, message=FALSE}
720
library(BiocNeighbors)
721
tcell.pcs <- read.table("~/Dropbox/COVID19/Data/Tcells_MNN.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE)
722
tcell.pcs <- tcell.pcs[tcell.pcs$CellID %in% tcell.prot.merge$CellID, ]
723
rownames(tcell.pcs) <- tcell.pcs$CellID
724
```
725
726
727
```{r, message=FALSE, warning=FALSE}
728
tcell.harmony <- read.csv("~/Dropbox/COVID19/Data/COVID_Harmony_PCA_OnlyT_embeddings.csv",
729
                          header=TRUE, stringsAsFactors=FALSE, row.names=1)
730
tcell.harmony <- tcell.harmony[rownames(tcell.harmony) %in% tcell.prot.merge$CellID, ]
731
tcell.harmony$CellID <- rownames(tcell.harmony)
732
733
write.table(tcell.harmony,
734
            file="~/Dropbox/COVID19/Data/COVID_Tcells_harmony.tsv",
735
            sep="\t", row.names=FALSE, quote=FALSE)
736
```
737
738
739
```{r, warning=FALSE, message=FALSE}
740
# # keep.tcells <- unique(tcell.prot.merge$CellID)
741
# tcell.kNN <- findKNN(as.matrix(tcell.pcs[, 1:30]), k=7, get.distance=FALSE)$index
742
# rownames(tcell.kNN) <- tcell.pcs$CellID
743
```
744
745
746
```{r, warning=FALSE, message=FALSE}
747
# tcell.harm.kNN <- findKNN(as.matrix(tcell.harmony[, 1:30]), k=7, get.distance=FALSE)$index
748
# rownames(tcell.harm.kNN) <- rownames(tcell.harmony)
749
```
750
751
752
```{r, warning=FALSE, message=FALSE}
753
# keep.tcells <- intersect(rownames(tcell.harmony), 
754
#                          unique(tcell.prot.merge$CellID[tcell.prot.merge$Sub.Annotation %in% c("CD4.Tfh", "CD4.Th1", "CD4.Th2", "CD4.Th17")]))
755
# 
756
# refined.harmony.annot.list <- list()
757
# for(i in seq_along(keep.tcells)){
758
#     i.cell <- keep.tcells[i]
759
#     i.type <- tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID == i.cell]
760
# 
761
#     i.nn <- tcell.pcs$CellID[tcell.harm.kNN[i.cell, ]]
762
#     i.labels <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.nn, ]$Sub.Annotation
763
#     i.tab <- table(i.labels)
764
#     i.max <- max(i.tab)
765
#     i.max.type <- names(i.tab)[which(i.tab == i.max)]
766
# 
767
#     if((i.type == i.max.type) & (length(i.max.type) == 1)){
768
#         refined.harmony.annot.list[[i.cell]] <- i.type
769
#     } else if((i.type != i.max.type) & (length(i.max.type) == 1)){
770
#         refined.harmony.annot.list[[i.cell]] <- i.max.type
771
#     } else{
772
#         # keep the same annotation if it's ambiguous
773
#         refined.harmony.annot.list[[i.cell]] <- i.type
774
#     }
775
# 
776
# }
777
# 
778
# refined.harmony.df <- data.frame("CellID"=names(refined.harmony.annot.list), "Refined.Harmony.Annotation"=unlist(refined.harmony.annot.list))
779
# refined.harmony.merge <- merge(refined.harmony.df, tcell.prot.merge, by='CellID')
780
# 
781
# table(refined.harmony.merge$Refined.Harmony.Annotation, refined.harmony.merge$Sub.Annotation)
782
```
783
784
785
```{r, warning=FALSE, message=FALSE}
786
# refined.annot.list <- list()
787
# for(i in seq_along(keep.tcells)){
788
#     i.cell <- keep.tcells[i]
789
#     i.type <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.cell, ]$Sub.Annotation
790
# 
791
#     i.nn <- tcell.pcs$CellID[tcell.kNN[i.cell, ]]
792
#     i.labels <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.nn, ]$Sub.Annotation
793
#     i.tab <- table(i.labels)
794
#     i.max <- max(i.tab)
795
#     i.max.type <- names(i.tab)[which(i.tab == i.max)]
796
# 
797
#     if((i.type == i.max.type) & (length(i.max.type) == 1)){
798
#         refined.annot.list[[i.cell]] <- i.type
799
#     } else if((i.type != i.max.type) & (length(i.max.type) == 1)){
800
#         refined.annot.list[[i.cell]] <- i.max.type
801
#     } else{
802
#         # keep the same annotation if it's ambiguous
803
#         refined.annot.list[[i.cell]] <- i.type
804
#     }
805
# 
806
# }
807
# 
808
# refined.df <- data.frame("CellID"=names(refined.annot.list), "Refined.Annotation"=unlist(refined.annot.list))
809
# refined.merge <- merge(refined.df, tcell.prot.merge, by='CellID')
810
# 
811
# table(refined.merge$Refined.Annotation, refined.merge$Sub.Annotation)
812
```
813
814
815
```{r}
816
# write out the annotations, without the doublets, NK cells and ILCs
817
write.table(tcell.prot.merge[!tcell.prot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", 
818
                                                                     "Doublets:Platelet", "Doublets:Bcell", "Doublets"),
819
                        c("CellID", "Sub.Annotation")],
820
            file="~/Dropbox/COVID19/Data/Tcell_annotations_ext.tsv",
821
            sep="\t", quote=FALSE, row.names=FALSE)
822
823
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("Doublets:Platelet", "Doublets:Bcell", "Doublets"),
824
                        c("CellID")],
825
            file="~/Dropbox/COVID19/Data/Tcell_doublets_ext.tsv",
826
            sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
827
828
write.table(tcell.prot.merge[!tcell.prot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs",
829
                                                                     "Doublets:Platelet", "Doublets:Bcell", "Doublets"),
830
                             c("CellID")],
831
            file="~/Dropbox/COVID19/Data/Tcell_barcodes.tsv",
832
            sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
833
834
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Th1", "CD4.Th2", "CD4.Th17",
835
                                                                     "Treg", "CD4.Prolif", "CD4.Tfh"),
836
                             c("CellID")],
837
            file="~/Dropbox/COVID19/Data/Tcell-CD4_barcodes.tsv",
838
            sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
839
840
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("CD8.Naive", "CD8.CM", "CD8.EM", "CD8.Prolif", "CD8.TE", "NKT", "MAIT", "gdT"),
841
                             c("CellID")],
842
            file="~/Dropbox/COVID19/Data/Tcell-CD8_barcodes.tsv",
843
            sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
844
```
845
846
Plot the proportions of T cells, normalised properly.
847
848
```{r}
849
cell.freq.tab <- table(tcell.prot.merge$sample_id,
850
                       tcell.prot.merge$Sub.Annotation)
851
852
cell.freq.df <- as.data.frame(sapply(rownames(cell.freq.tab), FUN=function(X) cell.freq.tab[X, ]/n.cell.vecc[X]))
853
cell.freq.df$CellType <- rownames(cell.freq.df)
854
cell.freq.df <- melt(cell.freq.df, id.vars=c("CellType"))
855
cell.freq.df$CellType <- as.character(cell.freq.df$CellType)
856
colnames(cell.freq.df) <- c("CellType", "sample_id", "Freq")
857
cell.freq.df$Sex <- "Female"
858
cell.freq.df$Sex[cell.freq.df$sample_id %in% unique(tcell.prot.merge$sample_id[tcell.prot.merge$Sex %in% c("M", "Male")])] <- "Male"
859
860
covid.meta <- read.csv("~/Dropbox/COVID19/Data/Metadata FINAL 10122020.csv",
861
                        header=TRUE, stringsAsFactors=FALSE)
862
863
cell.freq.merge <- merge(cell.freq.df, covid.meta, by=c('sample_id', 'Sex'))
864
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$D0_status_summary %in% c("Non_covid"), ]
865
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$patient_id %in% c("CV0198"), ]
866
# remove doublets, etc
867
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
868
```
869
870
871
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=9.15}
872
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525")
873
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")
874
875
cell.freq.merge$D0_status_summary <- factor(cell.freq.merge$D0_status_summary,
876
                                            levels=c("Healthy", "Asymptomatic", "Mild", 
877
                                                     "Moderate", "Severe", "Critical", "LPS", "Non_covid"))
878
879
# ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
880
#                            cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets"), ],
881
#        aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) +
882
#     geom_boxplot() +
883
#     theme_cowplot() +
884
#     facet_wrap(~CellType, scales="free_y") +
885
#     scale_fill_manual(values=group.cols) +
886
#     expand_limits(y=c(0)) +
887
#     theme(axis.text.x=element_blank(),
888
#           strip.background=element_rect(fill='white', colour='white'),
889
#           strip.text=element_text(size=14)) +
890
#     labs(x="Severity Group", y="Proportion") +
891
#     guides(fill=guide_legend(title="Severity")) +
892
#     ggsave("~/Dropbox/COVID19/plot.dir/Tcell-doublets_proportions-boxplot.pdf",
893
#            height=2.95, width=9.15, useDingbats=FALSE) +
894
#     NULL
895
```
896
897
898
```{r}
899
write.table(cell.freq.merge,
900
            file="~/Dropbox/COVID19/Data/Tcell_proportions.tsv",
901
            sep="\t", quote=FALSE, row.names=FALSE)
902
```
903
904
905
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.15}
906
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
907
                           !cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets"), ],
908
       aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) +
909
    geom_boxplot() +
910
    theme_cowplot() +
911
    facet_wrap(~CellType, scales="free_y") +
912
    scale_fill_manual(values=group.cols) +
913
    expand_limits(y=c(0)) +
914
    theme(axis.text.x=element_blank(),
915
          strip.background=element_rect(fill='white', colour='white'),
916
          strip.text=element_text(size=14)) +
917
    labs(x="Severity Group", y="Proportion") +
918
    guides(fill=guide_legend(title="Severity")) +
919
    ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions-boxplot.pdf",
920
           height=5.95, width=11.15, useDingbats=FALSE) +
921
    NULL
922
```
923
924
For the figures these need to be a filled barchart (?!). Focus in on the Tfh-like cells.
925
926
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55}
927
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
928
                           cell.freq.merge$CellType %in% c("CD4.Tfh"), ],
929
       aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) +
930
    geom_boxplot() +
931
    theme_cowplot() +
932
    facet_wrap(~CellType, scales="free_y") +
933
    scale_fill_manual(values=group.cols) +
934
    expand_limits(y=c(0)) +
935
    theme(axis.text.x=element_blank(),
936
          strip.background=element_rect(fill='white', colour='white'),
937
          strip.text=element_text(size=14)) +
938
    labs(x="Severity Group", y="Proportion") +
939
    guides(fill=guide_legend(title="Severity")) +
940
    ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions_Tfh-boxplot.pdf",
941
           height=2.15, width=3.55, useDingbats=FALSE) +
942
    NULL
943
```
944
945
Run a LM comparing categories.
946
947
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55}
948
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
949
                           cell.freq.merge$CellType %in% c("CD4.Tfh"), ],
950
       aes(x=D0_status_summary, y=Freq, fill=Sex)) +
951
    geom_boxplot() +
952
    theme_cowplot() +
953
    facet_wrap(~CellType, scales="free_y") +
954
    scale_fill_colorblind() +
955
    expand_limits(y=c(0)) +
956
    theme(axis.text.x=element_blank(),
957
          strip.background=element_rect(fill='white', colour='white'),
958
          strip.text=element_text(size=14)) +
959
    labs(x="Severity Group", y="Proportion") +
960
    guides(fill=guide_legend(title="Severity")) +
961
    ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions_Tfh-byGender_boxplot.pdf",
962
           height=2.15, width=3.55, useDingbats=FALSE) +
963
    NULL
964
```
965
966
Compare healthy vs. asymptomatic, and asymptomatic > critical.
967
968
```{r, warning=FALSE, message=FALSE}
969
healthy.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") &
970
                                        cell.freq.merge$D0_status_summary %in% c("Healthy") &
971
                                        cell.freq.merge$Collection_Day %in% c("D0")]
972
asymp.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") &
973
                                        cell.freq.merge$D0_status_summary %in% c("Asymptomatic") &
974
                                        cell.freq.merge$Collection_Day %in% c("D0")]
975
976
tfh.utest <- wilcox.test(x=healthy.tfh, y=asymp.tfh, alternative="two.sided")
977
tfh.utest
978
```
979
980
I should model the counts, normalized by the total numbers of captured cells.
981
982
```{r}
983
tfh.lm.list <- list()
984
covid.tfh.merge <- cell.freq.merge[!cell.freq.merge$D0_status_summary %in% c("Healthy"), ]
985
covid.tfh.merge$OrderedStatus <- ordered(covid.tfh.merge$D0_status_summary,
986
                                         levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
987
covid.tfh.merge$Days_from_onset <- as.numeric(covid.tfh.merge$Days_from_onset)
988
989
c.types <- c("CD4.Tfh")
990
991
for(i in seq_along(c.types)){
992
    i.c <- c.types[i]
993
    
994
    i.lm.df <- data.frame("sample_id"=rownames(cell.freq.tab), "Count"=cell.freq.tab[, i.c])
995
    i.lm.df <- merge(i.lm.df, covid.tfh.merge[covid.tfh.merge$CellType %in% i.c, ], by='sample_id')
996
    
997
    # i.tfh.lm <- glm(Count ~ Sex + Age + Site + OrderedStatus,
998
    #                data=i.lm.df, family=poisson(link="log"))
999
    i.tfh.lm <- glm.nb(Count ~ Sex + Age + Site + OrderedStatus, data=i.lm.df, init.theta=1)
1000
    
1001
    i.lm.res <- summary(i.tfh.lm)
1002
    tfh.lm.list[[i.c]] <- data.frame("CellType"=rep(i.c, nrow(i.lm.res$coefficients)),
1003
                                     "Beta"=i.lm.res$coefficients[, 1],
1004
                                     "SE"=i.lm.res$coefficients[, 2],
1005
                                     "STAT"=i.lm.res$coefficients[, 3],
1006
                                     "Pvalue"=i.lm.res$coefficients[, 4],
1007
                                     "Predictor"=rownames(i.lm.res$coefficients))
1008
}
1009
1010
tfh.lm.df <- do.call(rbind.data.frame, tfh.lm.list)
1011
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="OrderedStatus", replacement="Severity")
1012
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Male", replacement="")
1013
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Site", replacement="")
1014
tfh.lm.df <- tfh.lm.df[!tfh.lm.df$Predictor %in% c("(Intercept)"), ]
1015
tfh.lm.df$P.Adjust <- p.adjust(tfh.lm.df$Pvalue)
1016
```
1017
1018
1019
```{r, warning=FALSE, message=FALSE, fig.height=3.95, fig.width=4.95}
1020
ggplot(tfh.lm.df, aes(x=Predictor, y=Beta, fill=Pvalue < 0.05)) +
1021
    geom_hline(yintercept=0, lty=2, col='grey70') +
1022
    geom_errorbar(aes(ymin=Beta-SE, ymax=Beta+SE)) +
1023
    geom_point(shape=22, size=3) +
1024
    theme_cowplot() +
1025
     theme(strip.background=element_rect(fill='white', colour='white'),
1026
          strip.text=element_text(size=14)) +
1027
    coord_flip() +
1028
    scale_fill_manual(values=c("blue", "orange")) +
1029
    facet_wrap(~CellType, scales="free_x")  + 
1030
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_Tfh_props-LM.pdf",
1031
           height=3.95, width=4.95, useDingbats=FALSE)
1032
```
1033
1034
1035
```{r, warning=FALSE, message=FALSE}
1036
cellfreq.df <- cell.freq.merge %>% group_by(D0_status_summary, CellType) %>% summarise(Mean = mean(Freq))
1037
cellfreq.df$D0_status_summary <- factor(cellfreq.df$D0_status_summary,
1038
                                        levels=c("Healthy", "Asymptomatic", "Mild", 
1039
                                                 "Moderate", "Severe", "Critical", "LPS"))
1040
```
1041
1042
Create a new colour palette.
1043
1044
```{r}
1045
cell.order <- c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Prolif", "CD4.Th1", "CD4.Th2", "CD4.Th17", "CD4.Tfh",
1046
                "Treg", "CD8.Naive", "CD8.Activated", "CD8.Prolif", "CD8.CM", "CD8.TE", "CD8.EM", "gdT", "MAIT", "NKT")
1047
1048
all.qual.cols <- brewer.pal.info[brewer.pal.info$category %in% c("qual") & brewer.pal.info$colorblind, ]
1049
col_vector = unlist(mapply(brewer.pal, all.qual.cols$maxcolors, rownames(all.qual.cols)))
1050
1051
# cell.cols <- col_vector[c(1:7, 9:19)]
1052
cell.cols <- col_vector[c(1:17, 19, 20)]
1053
names(cell.cols) <- cell.order
1054
```
1055
1056
1057
```{r}
1058
pie(rep(1, 18), col=cell.cols)
1059
```
1060
1061
1062
```{r, warning=FALSE, message=FALSE, fig.height=4.25, fig.width=6.95}
1063
# n.cells <- length(cell.order)
1064
# cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells)
1065
# names(cell.cols) <- cell.order
1066
1067
cellfreq.df$CellType <- factor(cellfreq.df$CellType,
1068
                               levels=cell.order)
1069
1070
ggplot(cellfreq.df,
1071
       aes(x=D0_status_summary, y = Mean, fill=CellType))+ 
1072
    geom_bar(position="fill", stat="identity", width = 0.9, colour = "black")+
1073
    scale_fill_manual(values = cell.cols) +
1074
    theme(aspect.ratio = 1/1.5, 
1075
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 14, colour = "black"),
1076
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1077
          axis.text.y = element_text(size = 14, colour = "black"),
1078
          legend.key.size=unit(0.4, "cm"),
1079
          axis.title.x=element_blank(), axis.title.y=element_blank(),
1080
          legend.text = element_text(size = 14), legend.title=element_text(size=14),
1081
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1082
    guides(fill=guide_legend(title="Cell type", ncol=1)) +
1083
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_barchart.pdf",
1084
           height=4.25, width=6.95, useDingbats=FALSE)
1085
```
1086
1087
I have a misgiving about this, as it masks the distribution of the proportions - maybe a series of filled boxplots, e.g. a waterfall?
1088
1089
```{r, warning=FALSE, message=FALSE, fig.height=5.25, fig.width=12.95}
1090
cell.freq.merge$CellType <- factor(cell.freq.merge$CellType,
1091
                                   levels=cell.order)
1092
1093
ggplot(cell.freq.merge,
1094
       aes(x=sample_id, y = Freq, fill=CellType))+ 
1095
    geom_bar(position="fill", stat="identity", width = 0.8, colour = "black")+
1096
    scale_fill_manual(values = cell.cols) +
1097
    facet_grid(. ~ D0_status_summary, scales="free_x", space="free") +
1098
    theme(axis.text.x = element_blank(),
1099
          panel.spacing=unit(0, "lines"),
1100
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1101
          axis.text.y = element_text(size = 18),
1102
          axis.title.x=element_blank(), axis.title.y=element_blank(),
1103
          axis.ticks.x=element_blank(),
1104
          strip.background=element_rect(fill='white', colour='white'),
1105
          strip.text=element_text(size=14, angle=90, colour='black'),
1106
          legend.text = element_text(size = 18), legend.title=element_text(size=18),
1107
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1108
    # guides(fill=guide_legend(title="Cell type")) +
1109
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_barchart-full.pdf",
1110
           height=5.25, width=8.95, useDingbats=FALSE) +
1111
    NULL
1112
```
1113
1114
Make dot plots.
1115
1116
```{r, warning=FALSE, message=FALSE}
1117
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=tcell.prot.merge)
1118
rownames(clust.model.matrix) <- tcell.prot.merge$CellID
1119
1120
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))])
1121
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
1122
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Sub\\.Annotation", replacement="")
1123
1124
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0)
1125
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X)))
1126
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0
1127
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Sub\\.Annotation", replacement="")
1128
```
1129
1130
1131
```{r, message=FALSE, warning=FALSE}
1132
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP)))
1133
gene.ave.label.df$Sub.Annotation <- rownames(gene.ave.label.df)
1134
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Sub.Annotation')
1135
colnames(gene.ave.label.melt) <- c("Sub.Annotation", "Gene", "AveExprs")
1136
1137
gene.bin.label.df <- gene.bin.by.label
1138
gene.bin.label.df$Sub.Annotation <- rownames(gene.bin.label.df)
1139
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Sub.Annotation')
1140
colnames(gene.bin.label.melt) <- c("Sub.Annotation", "Gene", "PropExprs")
1141
```
1142
1143
1144
```{r, message=FALSE, warning=FALSE, fig.height=4.65, fig.width=8.95}
1145
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene'))
1146
dot.plot.df$Gene <- as.character(dot.plot.df$Gene)
1147
1148
dot.plot.genes <- c("CD3G", "CD8A", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "TBX21",
1149
                    "PDCD1", "GZMB", "RORC", "IKZF2", "GATA3", "TNFRSF9", "IL7R", "KLRG1",
1150
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "NCAM1", "CCR6", 
1151
                    "CXCR5", "HLA.DRB1", "LAG3", "FOXP3", "MKI67", "NCR1", "GARP", "FCGR3A")
1152
gene.cluster <- hclust(dist(t(gene.ave.by.label[, intersect(colnames(gene.ave.by.label), dot.plot.genes)])))$order
1153
1154
# manually set the gene order
1155
gene.order <- c("CD3G", "CCR7", "CD27", "CD4", "CD28", "TNFRSF9", "CCR6", "CD40LG", "MKI67", "TBX21", "GATA3", "RORC", "IKZF2",
1156
                "FOXP3", "CTLA4", "PDCD1", "CXCR5", "CD8A", "GZMB", "HLA.DRB1", "IFNG", "LAG3", "TOX", "IL7R", "KLRG1", "TRDV2", 
1157
                "TRGV9", "TRAV1.2", "NCAM1", "NCR1", "FCGR3A")
1158
1159
dot.plot.df <- dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ]
1160
dot.plot.df$Gene <- factor(dot.plot.df$Gene,
1161
                           levels=gene.order)
1162
1163
dot.plot.df <- dot.plot.df[dot.plot.df$Sub.Annotation %in% cell.order, ]
1164
dot.plot.df$Sub.Annotation <- factor(dot.plot.df$Sub.Annotation,
1165
                                     levels=rev(cell.order))
1166
1167
ggplot(dot.plot.df,
1168
       aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
1169
    geom_point() +
1170
    theme_cowplot() +
1171
    scale_colour_gradient(low='grey70', high='blue') +
1172
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
1173
          axis.text.y=element_text(colour='black', size=14),
1174
          legend.text=element_text(size=14),
1175
          legend.title=element_text(size=14),
1176
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1177
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1178
    guides(colour=guide_colorbar(title='Mean\nExpression'),
1179
           size=guide_legend(title='% Express')) +
1180
    labs(x="Gene", y="Cell type") +
1181
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-genes.pdf",
1182
           height=4.65, width=8.95, useDingbats=FALSE)
1183
```
1184
1185
Plot the cytokine profiles to figure out if Th1/2/17 dominate (or all of them).
1186
1187
```{r, message=FALSE, warning=FALSE, fig.height=4.15, fig.width=7.0}
1188
cytokine.dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene'))
1189
cytokine.dot.plot.df$Gene <- as.character(cytokine.dot.plot.df$Gene)
1190
1191
cytokine.genes <- intersect(c("IL2", "IL1A", "IL1B", "IL10", "IL22",  "TNF", "IFNG", "LTA",
1192
                              "IL4", "IL5", "IL13", "IL17A", "IL17F", "IL21",
1193
                              "IL12A", "IL12B", "IL25"), colnames(tcell.prot.merge))
1194
1195
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Gene %in% cytokine.genes, ]
1196
cytokine.dot.plot.df$Gene <- factor(cytokine.dot.plot.df$Gene,
1197
                                    levels=cytokine.genes)
1198
1199
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Sub.Annotation %in% cell.order, ]
1200
cytokine.dot.plot.df$Sub.Annotation <- factor(cytokine.dot.plot.df$Sub.Annotation,
1201
                                              levels=rev(cell.order))
1202
1203
ggplot(cytokine.dot.plot.df,
1204
       aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
1205
    geom_point() +
1206
    theme_cowplot() +
1207
    scale_colour_gradient(low='grey70', high='blue') +
1208
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
1209
          axis.text.y=element_text(colour='black', size=14),
1210
          legend.text=element_text(size=14),
1211
          legend.title=element_text(size=14),
1212
          legend.key.size=unit(0.4, "cm"),
1213
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1214
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1215
    guides(colour=guide_colorbar(title='Mean\nExpression'),
1216
           size=guide_legend(title='% Express')) +
1217
    labs(x="Cytokine", y="Cell type") +
1218
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-cytokines.pdf",
1219
           height=4.65, width=7.0, useDingbats=FALSE)
1220
```
1221
1222
I want to see how these cytokines change over the course of severity.
1223
1224
```{r, warning=FALSE, message=FALSE}
1225
cytokine.melt <- melt(tcell.prot.merge[, c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID",
1226
                                           cytokine.genes)],
1227
                      variable.name="Cytokine", 
1228
                      id.vars=c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID"))
1229
cytokine.means <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("Mean.Exprs"=mean(value))
1230
cytokine.var <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("Var.Exprs"=var(value))
1231
cytokine.se <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0)))
1232
1233
cytokine.df <- Reduce(x=list("mean"=cytokine.means, "var"=cytokine.var, "se"=cytokine.se),
1234
                      f=function(x, y) merge(x, y, by=c('D0_status_summary', 'Sub.Annotation', 'Cytokine')))
1235
# cytokine.df <- merge(cytokine.df, covid.meta, by='sample_id')
1236
 
1237
# remove superflous or malignant samples
1238
# cytokine.df <- cytokine.df[!cytokine.df$patient_id %in% c("CV0198"), ]
1239
# cytokine.df <- cytokine.df[!cytokine.df$sample_id %in% c("BGCV01_CV0902"), ]
1240
# cytokine.df <- cytokine.df[!cytokine.df$D0_status_summary %in% c("Non_Covid"), ]
1241
1242
cytokine.df$D0_satus_summary <- factor(cytokine.df$D0_status_summary,
1243
                                       levels=c("Healthy", "Asymptomatic", "Mild",
1244
                                                "Moderate", "Severe", "Critical", "LPS"))
1245
```
1246
1247
1248
Maybe visualise this as a heatmap?
1249
1250
```{r, fig.height=7.15, fig.width=7.95}
1251
cytokine.df <- cytokine.df[!cytokine.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") &
1252
                               !is.na(cytokine.df$D0_satus_summary), ]
1253
1254
ggplot(cytokine.df[!cytokine.df$Cytokine %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F", "IL10", "IL6"), ], 
1255
       aes(x=D0_satus_summary, y=Cytokine, fill=Mean.Exprs)) +
1256
    geom_tile() +
1257
    facet_wrap(~Sub.Annotation) +
1258
    scale_fill_viridis(option="magma") +
1259
    theme_cowplot() +
1260
    guides(fill=guide_colourbar(title="Mean Expression")) +
1261
    labs(y="Cytokine Gene", x="Severity") +
1262
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1),
1263
          strip.background=element_rect(fill='white', colour='white')) +
1264
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_cytokines_heatmap.pdf",
1265
           height=7.15, width=7.95, useDingbats=FALSE) +
1266
    NULL
1267
```
1268
1269
1270
What about exhaustion markers?
1271
1272
1273
```{r, warning=FALSE, message=FALSE}
1274
exhaustion.genes <- c("TOX", "TIGIT", "LAG3", "HAVCR2", "PDCD1", "FOXP3", "MKI67")
1275
exhaustion.melt <- melt(tcell.prot.merge[, c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID",
1276
                                           exhaustion.genes)],
1277
                      variable.name="Gene", 
1278
                      id.vars=c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID"))
1279
exhaustion.means <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("Mean.Exprs"=mean(value))
1280
exhaustion.var <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("Var.Exprs"=var(value))
1281
exhaustion.se <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0)))
1282
1283
exhaustion.df <- Reduce(x=list("mean"=exhaustion.means, "var"=exhaustion.var, "se"=exhaustion.se),
1284
                      f=function(x, y) merge(x, y, by=c('D0_status_summary', 'Sub.Annotation', 'Gene')))
1285
# exhaustion.df <- merge(exhaustion.df, covid.meta, by='sample_id')
1286
 
1287
# remove superflous or malignant samples
1288
# exhaustion.df <- exhaustion.df[!exhaustion.df$patient_id %in% c("CV0198"), ]
1289
# exhaustion.df <- exhaustion.df[!exhaustion.df$sample_id %in% c("BGCV01_CV0902"), ]
1290
# exhaustion.df <- exhaustion.df[!exhaustion.df$D0_status_summary %in% c("Non_Covid"), ]
1291
1292
exhaustion.df$D0_satus_summary <- factor(exhaustion.df$D0_status_summary,
1293
                                       levels=c("Healthy", "Asymptomatic", "Mild",
1294
                                                "Moderate", "Severe", "Critical", "LPS"))
1295
```
1296
1297
1298
```{r, fig.height=7.15, fig.width=7.95}
1299
exhaustion.df <- exhaustion.df[!exhaustion.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") &
1300
                               !is.na(exhaustion.df$D0_satus_summary), ]
1301
1302
ggplot(exhaustion.df[!exhaustion.df$Gene %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F"), ], 
1303
       aes(x=D0_satus_summary, y=Gene, fill=Mean.Exprs)) +
1304
    geom_tile() +
1305
    facet_wrap(~Sub.Annotation) +
1306
    scale_fill_viridis(option="magma") +
1307
    theme_cowplot() +
1308
    guides(fill=guide_colourbar(title="Mean Expression")) +
1309
    labs(x="Severity", "Gene") +
1310
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1),
1311
          strip.background=element_rect(fill='white', colour='white')) +
1312
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_exhaustion_heatmap.pdf",
1313
           height=7.15, width=7.95, useDingbats=FALSE) +
1314
    NULL
1315
```
1316
1317
Do the same for the proteins.
1318
1319
```{r, warning=FALSE, message=FALSE}
1320
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=tcell.prot.merge)
1321
rownames(clust.model.matrix) <- tcell.prot.merge$CellID
1322
1323
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
1324
                                                                              pattern="\\.x", replacement="")])
1325
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
1326
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Annotation", replacement="")
1327
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
1328
1329
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
1330
                                                                              pattern="\\.x", replacement="")] > 0.01)
1331
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
1332
bin.by.label[is.na(bin.by.label)] <- 0
1333
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Annotation", replacement="")
1334
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
1335
```
1336
1337
1338
```{r, message=FALSE, warning=FALSE}
1339
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP))))
1340
ave.label.df$Sub.Annotation <- rownames(ave.label.df)
1341
ave.label.melt <- melt(ave.label.df, id.vars='Sub.Annotation')
1342
colnames(ave.label.melt) <- c("Sub.Annotation", "Protein", "AveExprs")
1343
1344
bin.label.df <- bin.by.label
1345
bin.label.df$Sub.Annotation <- rownames(bin.label.df)
1346
bin.label.melt <- melt(bin.label.df, id.vars='Sub.Annotation')
1347
colnames(bin.label.melt) <- c("Sub.Annotation", "Protein", "PropExprs")
1348
```
1349
1350
1351
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.95}
1352
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Sub.Annotation', 'Protein'))
1353
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein)
1354
1355
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
1356
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD25", "LAMP1", "CXCR3", "CCR6", "IL7R", 
1357
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
1358
1359
protein.order <- c("CD3", "CD4", "CCR7", "CD45RA", "CD45RO", "CD27", "CD28", "CD62L", "CD25", "CTLA4",
1360
                   "LAMP1", "CCR6", "IL7R", 
1361
                   "CXCR5", "CD40LG", "ICOS", "CXCR3", "CD8", "PD1", "CD274", "TCR_Vg9", "TCR_Vg2",
1362
                   "TCR_Va7.2", "TCR_Va24.Ja18", "CD56", "CD16")
1363
1364
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ]
1365
protein.dot.plot.df$Protein <- factor(protein.dot.plot.df$Protein,
1366
                                      levels=protein.order)
1367
1368
protein.dot.plot.df <- protein.dot.plot.df[!protein.dot.plot.df$Sub.Annotation %in% c("NK", "ILCs", 
1369
                                                                                     "Doublets:Platelet", "Doublets", "Doublets:Bcell"), ]
1370
protein.dot.plot.df$Sub.Annotation <- factor(protein.dot.plot.df$Sub.Annotation,
1371
                                             levels=rev(cell.order))
1372
1373
ggplot(protein.dot.plot.df, 
1374
       aes(x=Protein, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
1375
    geom_point() +
1376
    theme_cowplot() +
1377
    scale_colour_gradient(low='grey70', high=darken('red')) +
1378
     theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
1379
          axis.text.y=element_text(colour='black', size=14),
1380
          legend.text=element_text(size=14),
1381
          legend.title=element_text(size=14),
1382
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1383
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1384
    guides(colour=guide_colorbar(title='Mean\nExpression'),
1385
           size=guide_legend(title='% Express')) +
1386
    labs(x="Protein", y="Cell type") +
1387
    ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-protein.pdf",
1388
           height=5.15, width=8.95, useDingbats=FALSE)
1389
```
1390
1391
I also want to see some of the CD8 sub-phenotype mRNA and protein expression, namely _KLRG1_ and _CD217_ (_IL7R_).
1392
1393
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.95}
1394
repeat.samps <- covid.meta$patient_id[duplicated(covid.meta$patient_id)]
1395
cell.freq.merge$Day <- ordered(cell.freq.merge$Collection_Day,
1396
                               levels=c("D0", "D7", "D9", "D12", "D13", "D28"))
1397
1398
ggplot(cell.freq.merge[cell.freq.merge$patient_id %in% repeat.samps &
1399
                           !cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets") &
1400
                           !cell.freq.merge$D0_status_summary %in% c("Non_covid", "LPS"), ],
1401
       aes(x=Day, y=Freq, fill=D0_status_summary, group=patient_id)) +
1402
    #geom_boxplot() +
1403
    geom_line(aes(colour=D0_status_summary)) +
1404
    geom_point(shape=21, size=3) +
1405
    theme_cowplot() +
1406
    facet_wrap(~CellType, scales="free_y") +
1407
    scale_fill_manual(values=group.cols) +
1408
    scale_colour_manual(values=group.cols) +
1409
    expand_limits(y=c(0)) +
1410
    theme(strip.background=element_rect(fill='white', colour='white'),
1411
          strip.text=element_text(size=14)) +
1412
    labs(x="Day", y="Proportion") +
1413
    guides(fill=guide_legend(title="Severity")) +
1414
    # ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions-boxplot.pdf",
1415
    #        height=5.95, width=11.15, useDingbats=FALSE) +
1416
     NULL
1417
```
1418
1419
1420
Correlate T cell and B cell proportions.
1421
1422
```{r}
1423
initial.clusts <- read.table("~/Dropbox/COVID19/Data/combined_dec_MetaData.txt",
1424
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
1425
```
1426
1427
```{r}
1428
bcell.nums <- read.csv("~/Dropbox/COVID19/Data/B_cellnumbers_by_sample_id.csv",
1429
                       header=TRUE, stringsAsFactors=FALSE)
1430
rownames(bcell.nums) <- bcell.nums$sample_id
1431
bcell.nums <- bcell.nums[, -1]
1432
1433
# add an aggreated plasmacell column
1434
bcell.nums$Plasmacell <- rowSums(bcell.nums[, c("Plasma_cell_IgM", "Plasma_cell_IgA", "Plasma_cell_IgG")])
1435
bcell.nums <- bcell.nums[intersect(rownames(bcell.nums), names(n.cell.vecc)), ]
1436
# b.cell.nums <- as.data.frame(t(sapply(rownames(bcell.nums), FUN=function(X) bcell.nums[X, ]/n.cell.vecc[X])))
1437
1438
# bcell.nums <- as.data.frame(t(apply(bcell.nums, 1, function(X) X/sum(X))))
1439
bcell.nums$sample_id <- rownames(bcell.nums)
1440
bcell.num.df <- melt(bcell.nums, id.vars='sample_id')
1441
bcell.num.df$sample_id <- as.character(bcell.num.df$sample_id)
1442
bcell.num.df$variable <- as.character(bcell.num.df$variable)
1443
colnames(bcell.num.df) <- c("sample_id", "Celltype", "Freq")
1444
#bcell.num.df <- bcell.num.df[bcell.num.df$Celltype %in% c("B_cell", "Plasmablast"), ]
1445
1446
bcell.num.cast <- dcast(bcell.num.df, sample_id ~ Celltype, value.var='Freq')
1447
rownames(bcell.num.cast) <- bcell.num.cast$sample_id
1448
```
1449
1450
1451
1452
```{r, warning=FALSE, message=FALSE}
1453
tcell.freq.tab <- table(tcell.prot.merge$sample_id[!tcell.prot.merge$Sub.Annotation %in% c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet")],
1454
                        tcell.prot.merge$Sub.Annotation[!tcell.prot.merge$Sub.Annotation %in% c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet")])
1455
tcell.freq.tab <- as.data.frame(tcell.freq.tab[!rownames(tcell.freq.tab) %in%
1456
                                                   c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet"), ])
1457
tcell.nums <- dcast(tcell.freq.tab, Var1 ~ Var2, value.var='Freq')
1458
1459
colnames(tcell.nums) <- c("sample_id", colnames(tcell.nums)[-1])
1460
1461
tcell.bcell.nums <- merge(tcell.nums, bcell.num.cast, by='sample_id')
1462
rownames(tcell.bcell.nums) <- tcell.bcell.nums$sample_id
1463
tcell.bcell.nums <- as.matrix(tcell.bcell.nums[, -1])
1464
1465
tcell.bcell.props <- sapply(rownames(tcell.bcell.nums), FUN=function(X) as.numeric(tcell.bcell.nums[X, ]/n.cell.vecc[X]),
1466
                                        simplify=TRUE)
1467
rownames(tcell.bcell.props) <- colnames(tcell.bcell.nums)
1468
```
1469
1470
Cross-compare the T and B cell proportions.
1471
1472
```{r, fig.height=5.95, fig.width=6.95}
1473
bcell.types <- colnames(bcell.num.cast)[-1]
1474
tcell.bcell.corr <- cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), ]))
1475
pheatmap(tcell.bcell.corr, show_rownames=TRUE, show_colnames=TRUE,
1476
         filename="~/Dropbox/COVID19/plot.dir/Tcell_Bcell_corr_heatmap.pdf",
1477
         height=5.95, width=6.95, useDingbats=FALSE)
1478
1479
pheatmap(tcell.bcell.corr, show_rownames=TRUE, show_colnames=TRUE)
1480
```
1481
1482
How different is this for each severity group, or final outcome?
1483
1484
```{r, fig.height=5.95, fig.width=6.95}
1485
control.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Healthy")]
1486
asymp.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Asymptomatic")]
1487
mild.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Mild")]
1488
mod.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Moderate")]
1489
severe.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Severe")]
1490
critical.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Critical")]
1491
```
1492
1493
Differential correlation? Should I do this across samples, ranked by severity?  
1494
1495
```{r, warning=FALSE, message=FALSE}
1496
library(DCARS)
1497
library(kableExtra)
1498
library(knitr)
1499
covid.meta$OrderedSeverity <- factor(covid.meta$D0_status_summary,
1500
                                     levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
1501
samp.order <- covid.meta[order(covid.meta$OrderedSeverity[!is.na(covid.meta$OrderedSeverity)]),]$sample_id
1502
1503
bcell.tcell.dcars <- sapply(c(bcell.types, "CD4.Tfh"), FUN=function(XT) 
1504
    DCARS(dat=tcell.bcell.props[c(bcell.types, "CD4.Tfh"), intersect(colnames(tcell.bcell.props), samp.order)],
1505
          xname=XT, yname="CD4.Tfh", plot=FALSE, niter=10000,
1506
          extractPermutationTestStatistics=FALSE))
1507
bcell.tcell.dcars.adjust <- p.adjust(bcell.tcell.dcars)
1508
bcell.tcell.dcars.table <- data.frame("Pvalue"=bcell.tcell.dcars)
1509
1510
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>%
1511
    save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Bcell-Tfh_props.pdf",
1512
               self_contained=TRUE, keep_tex=TRUE)
1513
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>%
1514
    save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Bcell-Tfh_props.html",
1515
               self_contained=TRUE)
1516
1517
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE)
1518
```
1519
1520
These are the differential correlations between CD4.Tfh cells and B cell proportions across severity groups. Plot the correlation difference 
1521
between asymptomatic and severe, or plot the correlation across all severity groups for each cell type?
1522
1523
```{r, warning=FALSE, message=FALSE, fig.height=2.95, width=3.95}
1524
bcell.cors.list <- list("Healthy"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% control.samps]))["CD4.Tfh", ],
1525
                        "Asymptomatic"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% asymp.samps]))["CD4.Tfh", ],
1526
                        "Mild"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% mild.samps]))["CD4.Tfh", ],
1527
                        "Moderate"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% mod.samps]))["CD4.Tfh", ],
1528
                        "Severe"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% severe.samps]))["CD4.Tfh", ],
1529
                        "Critical"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% critical.samps]))["CD4.Tfh", ])
1530
1531
bcell.cors.df <- do.call(cbind.data.frame, bcell.cors.list)
1532
bcell.cors.df$CellType <- rownames(bcell.cors.df)
1533
bcell.cors.melt <- melt(bcell.cors.df, id.vars=c("CellType"))
1534
bcell.cors.melt$CellType <- as.character(bcell.cors.melt$CellType)
1535
colnames(bcell.cors.melt) <- c("CellType", "Severity", "Corr")
1536
bcell.cors.melt$Severity <- factor(bcell.cors.melt$Severity,
1537
                                   levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
1538
1539
# maybe only plot the significant diff correlations.
1540
diff.cors <- names(bcell.tcell.dcars)[bcell.tcell.dcars <= 0.05]
1541
1542
plasma.cols <- c("#49006a", "#ae017e", "#d4b9da", "#fa9fb5", "blue")
1543
names(plasma.cols) <- c("Plasmablast", "Plasma_cell_IgM", "Plasma_cell_IgG", "Plasma_cell_IgA")
1544
1545
ggplot(bcell.cors.melt[bcell.cors.melt$CellType %in% diff.cors, ], 
1546
       aes(x=Severity, y=Corr, fill=CellType, colour=CellType, group=CellType)) +
1547
    geom_hline(yintercept=0, lty=2, colour='grey80', alpha=0.75) +
1548
    geom_line() +
1549
    geom_point(shape=21, size=3) +
1550
    scale_fill_manual(values=plasma.cols) +
1551
    scale_colour_manual(values=plasma.cols) +
1552
    theme_cowplot() +
1553
    theme(axis.text.x=element_text(size=14, angle=90, vjust=0.5, hjust=1)) +
1554
    labs(x="Severity", y="Pearson\nCorrelation") +
1555
    ggsave("~/Dropbox/COVID19/plot.dir/Tfh_diff_corr.pdf",
1556
           height=2.95, width=3.95, useDingbats=FALSE) +
1557
    NULL
1558
    
1559
```
1560
1561
1562
Also have the Ig isotypes from Kelvin to correlate.
1563
1564
```{r, warning=FALSE, message=FALSE}
1565
ig.isotypes <- read.csv("~/Dropbox/COVID19/Data/vdj_isotypes (1).csv",
1566
                        header=TRUE, stringsAsFactors=FALSE)
1567
isotype.counts <- dcast(ig.isotypes, sample_id ~ celltype + isotype, value.var='total')
1568
isotype.counts[is.na(isotype.counts)] <- 0
1569
rownames(isotype.counts) <- isotype.counts$sample_id
1570
isotype.counts <- isotype.counts[, -1]
1571
isotype.counts <- isotype.counts[intersect(rownames(isotype.counts), names(n.cell.vecc)), ]
1572
1573
bcell.counts <- rowSums(bcell.nums[, -ncol(bcell.nums)])
1574
1575
isotype.counts.props <- t(sapply(rownames(isotype.counts), FUN=function(X) as.numeric(isotype.counts[X, ]/n.cell.vecc[X]),
1576
                                 simplify=TRUE))
1577
colnames(isotype.counts.props) <- colnames(isotype.counts)
1578
```
1579
1580
1581
```{r}
1582
tcell.bcell.df <- as.data.frame(tcell.bcell.nums)
1583
tcell.bcell.df$sample_id <- rownames(tcell.bcell.nums)
1584
isotype.counts.props <- as.data.frame(isotype.counts.props)
1585
isotype.counts.props$sample_id <- rownames(isotype.counts.props)
1586
1587
tcell.isotype.df <- merge(isotype.counts.props, tcell.bcell.df[, colnames(tcell.nums)], by='sample_id')
1588
rownames(tcell.isotype.df) <- tcell.isotype.df$sample_id
1589
```
1590
1591
1592
```{r, fig.height=9.95, fig.width=9.95}
1593
isotypes <- colnames(isotype.counts)
1594
tcell.isotype.corr <- cor(tcell.isotype.df[, c(isotypes, "CD4.Tfh")])
1595
pheatmap(tcell.isotype.corr, show_rownames=TRUE, show_colnames=TRUE,
1596
         filename="~/Dropbox/COVID19/plot.dir/Tcell_isotype_corr_heatmap.pdf",
1597
         height=5.95, width=6.95, useDingbats=FALSE)
1598
1599
pheatmap(tcell.isotype.corr, show_rownames=TRUE, show_colnames=TRUE)
1600
```
1601
1602
1603
Differential correlation? Should I do this across samples, ranked by severity?  
1604
1605
```{r, warning=FALSE, message=FALSE}
1606
# need to remove all 0 rows
1607
isotype.dat <- t(tcell.isotype.df[intersect(rownames(tcell.isotype.df), samp.order), c(isotypes, "CD4.Tfh")])
1608
isotype.dat <- isotype.dat[rowSums(isotype.dat)> 1e-2, ]
1609
1610
isotype.tcell.dcars <- sapply(rownames(isotype.dat), FUN=function(XT)
1611
    DCARS(dat=isotype.dat,
1612
          xname=XT, yname="CD4.Tfh", plot=FALSE, niter=10000,
1613
          extractPermutationTestStatistics=FALSE))
1614
1615
isotype.tcell.dcars.table <- data.frame("Pvalue"=isotype.tcell.dcars)
1616
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>%
1617
    save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Isotype-Tfh_props.pdf",
1618
               self_contained=TRUE, keep_tex=TRUE)
1619
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>%
1620
    save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Isotype-Tfh_props.html",
1621
               self_contained=TRUE)
1622
1623
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE)
1624
```
1625
1626
These are the differential correlations between CD4.Tfh cells and B cell proportions across severity groups. Plot the correlation difference 
1627
between asymptomatic and severe, or plot the correlation across all severity groups for each cell type?
1628
1629
```{r, warning=FALSE, message=FALSE, fig.height=2.95, width=3.95}
1630
isotype.cors.list <- list("Healthy"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), control.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")],
1631
                        "Asymptomatic"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), asymp.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")],
1632
                        "Mild"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), mild.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")],
1633
                        "Moderate"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), mod.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")],
1634
                        "Severe"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), severe.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")],
1635
                        "Critical"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), critical.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")])
1636
1637
isotype.cors.df <- do.call(cbind.data.frame, isotype.cors.list)
1638
isotype.cors.df$CellType <- rownames(isotype.cors.df)
1639
isotype.cors.melt <- melt(isotype.cors.df, id.vars=c("CellType"))
1640
isotype.cors.melt$CellType <- as.character(isotype.cors.melt$CellType)
1641
colnames(isotype.cors.melt) <- c("CellType", "Severity", "Corr")
1642
isotype.cors.melt$Severity <- gsub(isotype.cors.melt$Severity,
1643
                                   pattern="^([A-Za-z]+)\\.(\\S*)", replacement="\\1")
1644
isotype.cors.melt$Severity <- factor(isotype.cors.melt$Severity,
1645
                                   levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
1646
1647
# maybe only plot the significant diff correlations.
1648
diff.cors <- names(isotype.tcell.dcars)[isotype.tcell.dcars <= 0.05]
1649
1650
ggplot(isotype.cors.melt[isotype.cors.melt$CellType %in% setdiff(diff.cors, "CD4.Tfh"), ],
1651
       aes(x=Severity, y=Corr, fill=CellType, colour=CellType, group=CellType)) +
1652
    geom_hline(yintercept=0, lty=2, colour='grey80', alpha=0.75) +
1653
    geom_line() +
1654
    geom_point(shape=21, size=3) +
1655
    # scale_fill_manual(values=plasma.cols) +
1656
    # scale_colour_manual(values=plasma.cols) +
1657
    scale_fill_aaas() +
1658
    scale_colour_aaas() +
1659
    theme_cowplot() +
1660
    theme(axis.text.x=element_text(size=14, angle=90, vjust=0.5, hjust=1)) +
1661
    labs(x="Severity", y="Pearson\nCorrelation") +
1662
    ggsave("~/Dropbox/COVID19/plot.dir/Tfh_Isotype-diff_corr.pdf",
1663
           height=2.95, width=3.95, useDingbats=FALSE) +
1664
    NULL
1665
```
1666
1667