Switch to unified view

a b/TcellAnalysis/notebooks/COVID_Tcell_annotations-Update.Rmd
1
---
2
title: "COVID19: T cell annotation - updated"
3
output: html_notebook
4
---
5
6
Annotating T cells subtypes.
7
8
```{r, warning=FALSE, message=FALSE}
9
library(ggplot2)
10
library(scran)
11
library(ggsci)
12
library(cowplot)
13
library(scattermore)
14
library(ComplexHeatmap)
15
library(RColorBrewer)
16
library(pheatmap)
17
library(scater)
18
library(reshape2)
19
library(colorspace)
20
library(dplyr)
21
library(ggthemes)
22
library(edgeR)
23
library(scales)
24
library(ggrepel)
25
library(viridis)
26
library(MASS)
27
```
28
29
I've taken all of the full clusters and selected the ones that I think contain T cells.
30
31
```{r, warning=FALSE, message=FALSE}
32
# tcell.umap <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle_Tcell_UMAPs.tsv",
33
#                          sep="\t", header=TRUE, stringsAsFactors=FALSE)
34
# fix the Sanger barcodes
35
# tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")] <- gsub(tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")],
36
#                                                                pattern="^(CV[0-9]+_[A-Z0-9]+-CV[0-9]+_[A-Z0-9]+)_", replacement="")
37
38
tcell.clusters <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv",
39
                             sep="\t", header=TRUE, stringsAsFactors=FALSE)
40
41
# tcell.meta <- merge(tcell.umap, tcell.clusters, by='CellID')
42
```
43
44
45
```{r, warning=FALSE, message=FALSE}
46
all.meta <- read.table("~/Dropbox/COVID19/Data/Updated/COVID19_scMeta-data.tsv",
47
                       sep="\t", header=TRUE, stringsAsFactors=FALSE)
48
rownames(all.meta) <- all.meta$CellID
49
50
# remove BGCV01_CV0209 and CV0198
51
all.meta <- all.meta[!all.meta$sample_id %in% c("BGCV01_CV0902"), ]
52
all.meta <- all.meta[!all.meta$patient_id %in% c("CV0198"), ]
53
```
54
55
To normalise the cell type proportions, I need to know how many cells were captured for each patient-sample combo.
56
57
```{r, warning=FALSE, message=FALSE}
58
n.cell.vecc <- table(all.meta$sample_id)
59
```
60
61
62
63
```{r, warning=FALSE, message=FALSE}
64
# tcell.merge <- merge(tcell.clusters, tcell.umap, by='CellID')
65
tcell.merge <- tcell.clusters
66
# tcell.merge <- merge(tcell.clusters, all.meta, by='CellID')
67
# tcell.merge <- merge(tcell.merge, tcell.umap, by='CellID')
68
```
69
70
71
```{r}
72
gene.clust <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells-cluster_gene-Ave.tsv",
73
                         sep="\t", header=TRUE, stringsAsFactors=FALSE)
74
75
protein.clust <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells-cluster_protein-Ave.tsv",
76
                            sep="\t", header=TRUE, stringsAsFactors=FALSE)
77
```
78
79
80
```{r, warning=FALSE, message=FALSE}
81
plot.proteins <- unique(c("CD19", "CD86", "CD11b", "CD56", "CD8", "CD4", "CD3", "CD16", "CD14", "CD335", "CD158", "SIGLEC7",
82
                          "FCER2", "TCR", "CTLA4", "CXCR5", "ICOSL", "CD40LG",
83
                          "CX3CR1", "CD79b", "CD20", "CD1c", "CD25", "CD45RO", "HLA-DR", "CD21", "CD5", "CD64", "CD127",
84
                          "CD34", "CD40", "CD123", "CD27", "CD1c", "TCR_Va7.2", "TCR_Vg2", "CD235ab", "CD10", "CD2", "CD71",
85
                          "TCR_Vg9", "TCR_Va24-Ja18", "CCR7", "CD45RA", "CD62L", "CD138", "CD38", "CD274", "CD279", "TIGIT"))
86
87
plot.genes <- unique(c("CD3G", "CD3E", "FCGR3A", "CD8A", "CD8B", "CD4", "CD80", "CD86", "CXCL10", "HLA.DRA", "HLA.DRB1", "VEGFA",
88
                       "CCR7", "CCR2", "CCR5", "CXCR3", "CD36", "FOXP3", "MS4A1", "LYZ", "GZMB", "CD68", "GATA3", "PECAM1", "RORC", 
89
                       "FCGR2A", "FCGR2B", "FCGR3B", "TPO", "HBB", "NCR1", "NCR2", "F8", "HIF1A", "CTLA4", "CD28", "CXCR1",
90
                       "PDCD1", "TIM3", "LAG3", "TYMS", "PITX1", "TIGIT", "CXCL13", "BCL6", "TRAV1.2",
91
                       #"IGHD", "IGHM", "IGHA1", "IGHE", "IGHG1", "IGHG2", "IGHG3", "IGHG4",
92
                       "ITGA2B", "ITGAX", "TOX", "FYN", "NCAM1", "LRRC32", "SELL", "CXCR5", "CSF1", "IL7R", "CD40LG", "MYC", "MKI67",
93
                       "IFNG", "IL6", "THRB", "CD19", "CD38", "CD27", "F2", "F5", "THRB", "CD44", "IKZF2", "IKZF4"))
94
```
95
96
97
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
98
gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1,
99
                        FUN=function(XP) XP/max(XP)))
100
gene.plot.hm[is.na(gene.plot.hm)] <- 0
101
colnames(gene.plot.hm) <- gene.clust$GeneID
102
103
pdf("~/Dropbox/COVID19/Updated_plots/Gene_heatmap-Tcells.pdf",
104
    height=10.95, width=9.95, useDingbats=FALSE)
105
Heatmap(gene.plot.hm, name="Z-score")
106
dev.off()
107
108
Heatmap(gene.plot.hm, name="Z-score")
109
```
110
111
I will take these clusters and compare them to the profiles of the original annotations.
112
113
114
115
```{r, warning=FALSE, message=FALSE}
116
table(tcell.merge$Centre, tcell.merge$Sub.Cluster)
117
```
118
119
```{r, warning=FALSE, message=FALSE}
120
table(tcell.merge$initial_clustering, tcell.merge$Sub.Cluster)
121
```
122
123
124
125
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95}
126
rownames(protein.clust) <- protein.clust$Protein
127
protein.plot.hm <- protein.clust[intersect(protein.clust$Protein, plot.proteins), 
128
                                 setdiff(colnames(protein.clust), "Protein")]
129
#protein.plot.hm[protein.plot.hm < 0] <- 0
130
131
# gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1,
132
#                         FUN=function(XP) XP/max(XP)))
133
134
# protein.plot.hm <- t(apply(t(protein.plot.hm), 1, FUN=function(XP) (XP - mean(XP))/sd(XP)))
135
136
protein.plot.hm[protein.plot.hm <= 0] <- 0
137
protein.plot.hm <- t(apply(protein.plot.hm, 1, FUN=function(XP) (XP - mean(XP))/sd(XP)))
138
protein.plot.hm[is.na(protein.plot.hm)] <- 0
139
140
rownames(protein.plot.hm) <- intersect(protein.clust$Protein, plot.proteins)
141
142
# pdf("~/Dropbox/COVID19/Updated_plots/protein_heatmap-celltypes.pdf",
143
#     height=10.95, width=9.95, useDingbats=FALSE)
144
# Heatmap(protein.plot.hm, name="Z-score")
145
# dev.off()
146
147
Heatmap(protein.plot.hm, name="Z-score")
148
```
149
150
Compare the previous annotations.
151
152
```{r, warning=FALSE, message=FALSE}
153
old.annots <- read.csv("~/Dropbox/COVID19/Data/Annotations/annotation_200112.csv",
154
                       header=TRUE, stringsAsFactors=FALSE)
155
156
old.clusters <- read.table("~/Dropbox/COVID19/Data/Annotations/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv",
157
                           sep="\t", header=TRUE, stringsAsFactors = TRUE)
158
colnames(old.clusters) <- c("CellID", "old.Sub.Cluster", "old.Louvain.Cluster")
159
160
tcell.annot.merge <- merge(tcell.merge, old.annots, by.x='CellID', by.y='X')
161
tcell.annot.merge <- merge(tcell.annot.merge, old.clusters, by='CellID')
162
```
163
164
```{r, warning=FALSE, message=FALSE}
165
table(tcell.annot.merge$full_clustering, tcell.annot.merge$Sub.Cluster)
166
```
167
168
169
```{r, warning=FALSE, message=FALSE}
170
table(tcell.annot.merge$old.Sub.Cluster, tcell.annot.merge$Sub.Cluster)
171
```
172
173
```{r, fig.height=4.95, fig.width=5.15}
174
tcell.clusts <- c("CD4.Prolif", "CD8.Prolif", "NK_prolif", "ILC1_3", "ILC2", "CD4.Naive", "CD8.Naive",
175
                  "CD4.Th17", "CD4.EM", "CD4.IL22", "CD4.Tfh", "CD4.CM", "CD4.Th1", "CD4.Th2", "Treg",
176
                  "gdT", "MAIT", "NKT", "CD8.EM", "CD8.TE", "NK_56hi", "NK_16hi")
177
comp.tab <- table(tcell.annot.merge$Sub.Cluster, tcell.annot.merge$full_clustering)
178
comp.tab <- apply(comp.tab[, tcell.clusts], 1, FUN=function(X) X/sum(X))
179
180
pheatmap(comp.tab, cluster_rows = FALSE)
181
```
182
183
This shows that the clustering only really identifies the major subsets.
184
185
```{r}
186
tcell.merge$Annotation <- "Unknown"
187
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "PoorQC"
188
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(15)] <- "NK"
189
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "Doublets"
190
191
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "ILCs"
192
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(9)] <- "gdT"
193
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(8, 10, 19)] <- "CD8.TE"
194
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(17)] <- "CD8.EM"
195
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(1, 29)] <- "Tfh-like"
196
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(2)] <- "Treg"
197
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(21)] <- "MAIT"
198
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(4, 5, 16, 18, 20, 22, 26)] <- "CD4.Naive"
199
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(6, 24)] <- "CD8.Naive"
200
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(3, 7, 12, 13, 14, 23, 27, 28)] <- "CD4.CM"
201
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(11)] <- "CD4.IL22"
202
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "Exhausted"
203
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(25)] <- "Proliferating"
204
205
table(tcell.merge$Annotation, tcell.merge$Sub.Cluster)
206
```
207
208
209
```{r, warning=FALSE, message=FALSE}
210
table(tcell.merge$Annotation, tcell.merge$Site)
211
```
212
213
Plot the markers genes for these groups: CD4, CD8, TCR V$\alpha$7.2, TCR V$\gamma$9, TCR V$\gamma$2, CD45RA, CCR7, CD45RO, CD27, CD28,
214
CTLA4, HLA-DR, FOXP3, CXCR5, GZMB, IFNG, TOX, PDCD1.
215
216
I think these annotations look OK.
217
218
```{r, warning=FALSE, message=FALSE}
219
# read in the gene expression
220
goi.df <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells_GeneGOI.tsv",
221
                     sep="\t", header=TRUE, stringsAsFactors=FALSE)
222
223
tcell.goi.merge <- merge(tcell.merge, goi.df, by='CellID')
224
```
225
226
227
```{r, warning=FALSE, message=FALSE}
228
# re-classify CD8.TE with high NCAM1 * FCGR3A expression
229
tcell.goi.merge$Annotation[tcell.goi.merge$NCAM1 > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE", "NK") &
230
                               tcell.goi.merge$TRAV1.2 > 0] <- "NKT"
231
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE", "NK") &
232
                               tcell.goi.merge$TRAV1.2 > 0] <- "NKT"
233
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") &
234
                               tcell.goi.merge$NCAM1 > 0] <- "NKT"
235
```
236
237
Remove doublets based on expression of markers for different cell lineages.
238
239
```{r}
240
# re-classify CD8.TE with high NCR1 * FCGR3A expression
241
bcell.markers <- tcell.goi.merge$IGHM > 0 | tcell.goi.merge$IGHG3 > 0 | tcell.goi.merge$IGHD > 0 | tcell.goi.merge$IGHG4 > 0 |
242
    tcell.goi.merge$IGHG2 > 0 | tcell.goi.merge$IGHG1 > 0 | tcell.goi.merge$IGHA1 > 0
243
244
tcell.goi.merge$Annotation[bcell.markers] <- "Doublets"
245
246
table(tcell.goi.merge$Annotation, tcell.goi.merge$Site)
247
```
248
249
250
```{r, fig.height=9.75, fig.width=10.95}
251
n.cells <- length(unique(tcell.goi.merge$Sub.Cluster))
252
cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells)
253
names(cell.cols) <- unique(tcell.goi.merge$Sub.Cluster)
254
255
tcell.goi.merge$Sub.Cluster <- factor(tcell.goi.merge$Sub.Cluster,
256
                                      levels=c(1:29))
257
258
ggplot(tcell.goi.merge,
259
       aes(x=UMAP1, y=UMAP2)) +
260
    geom_scattermore(data=tcell.goi.merge[, c("UMAP1", "UMAP2")],
261
                    colour='grey80', alpha=0.5, size=0.1) +
262
    geom_scattermore(aes(colour=Sub.Cluster)) +
263
    theme_cowplot() +
264
    facet_wrap(~Annotation) +
265
    scale_colour_manual(values=cell.cols) +
266
    guides(colour=guide_legend(override.aes = list(size=3)))
267
```
268
269
270
271
```{r, warning=FALSE, message=FALSE}
272
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.goi.merge)
273
rownames(clust.model.matrix) <- tcell.goi.merge$CellID
274
275
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))])
276
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
277
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Annotation", replacement="")
278
279
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0)
280
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X)))
281
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0
282
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Annotation", replacement="")
283
```
284
285
286
```{r, message=FALSE, warning=FALSE}
287
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP)))
288
gene.ave.label.df$Annotation <- rownames(gene.ave.label.df)
289
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Annotation')
290
colnames(gene.ave.label.melt) <- c("Annotation", "Gene", "AveExprs")
291
292
gene.bin.label.df <- gene.bin.by.label
293
gene.bin.label.df$Annotation <- rownames(gene.bin.label.df)
294
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Annotation')
295
colnames(gene.bin.label.melt) <- c("Annotation", "Gene", "PropExprs")
296
297
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Annotation', 'Gene'))
298
dot.plot.df$Gene <- as.character(dot.plot.df$Gene)
299
```
300
301
302
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25}
303
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG",
304
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "NCAM1",
305
                    "CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7")
306
307
ggplot(dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ], 
308
       aes(x=Gene, y=Annotation, colour=AveExprs, size=PropExprs*100)) +
309
    geom_point() +
310
    theme_cowplot() +
311
    scale_colour_gradient(low='grey70', high='blue') +
312
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'),
313
          axis.text.y=element_text(colour='black'),
314
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
315
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
316
    guides(colour=guide_colorbar(title='Mean Expression'),
317
           size=guide_legend(title='% Express')) +
318
    # ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_dotplot-genes.pdf",
319
    #        height=5.15, width=8.25, useDingbats=FALSE) +
320
    NULL
321
```
322
323
Do the same for the proteins.
324
325
```{r, warning=FALSE, message=FALSE}
326
# read in the gene expression
327
prot.df <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells_ProteinGOI.tsv",
328
                     sep="\t", header=TRUE, stringsAsFactors=FALSE)
329
colnames(prot.df) <- c(paste0("Prot.", colnames(prot.df)[!colnames(prot.df) %in% c("CellID")]), "CellID")
330
331
tcell.prot.merge <- merge(tcell.goi.merge, prot.df, by=c('CellID'))
332
```
333
334
335
```{r, warning=FALSE, message=FALSE}
336
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.prot.merge)
337
rownames(clust.model.matrix) <- tcell.prot.merge$CellID
338
339
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
340
                                                                              pattern="\\.x", replacement="")])
341
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
342
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Annotation", replacement="")
343
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
344
345
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
346
                                                                              pattern="\\.x", replacement="")] > 0)
347
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
348
bin.by.label[is.na(bin.by.label)] <- 0
349
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Annotation", replacement="")
350
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
351
```
352
353
354
```{r, message=FALSE, warning=FALSE}
355
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP))))
356
ave.label.df$Annotation <- rownames(ave.label.df)
357
ave.label.melt <- melt(ave.label.df, id.vars='Annotation')
358
colnames(ave.label.melt) <- c("Annotation", "Protein", "AveExprs")
359
360
bin.label.df <- bin.by.label
361
bin.label.df$Annotation <- rownames(bin.label.df)
362
bin.label.melt <- melt(bin.label.df, id.vars='Annotation')
363
colnames(bin.label.melt) <- c("Annotation", "Protein", "PropExprs")
364
365
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Annotation', 'Protein'))
366
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein)
367
```
368
369
370
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25}
371
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL",
372
                    "CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "HAVCR2",
373
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18")
374
375
ggplot(protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ],
376
       aes(x=Protein, y=Annotation, colour=AveExprs, size=PropExprs*100)) +
377
    geom_point() +
378
    theme_cowplot() +
379
    scale_colour_gradient(low='grey70', high=darken('red')) +
380
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'),
381
          axis.text.y=element_text(colour='black'),
382
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
383
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
384
    guides(colour=guide_colorbar(title='Mean Expression'),
385
           size=guide_legend(title='% Express')) +
386
    # ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_dotplot-protein.pdf",
387
    #        height=5.15, width=8.25, useDingbats=FALSE) +
388
    NULL
389
```
390
391
392
Write out the exhausted, proliferating and Tfh-like clusters for sub-clustering.
393
394
```{r}
395
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Proliferating")],
396
            file="~/Dropbox/COVID19/Data/Updated/Proliferating_barcodes.tsv",
397
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
398
399
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Tfh-like")],
400
            file="~/Dropbox/COVID19/Data/Updated/Tfh_barcodes.tsv",
401
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
402
403
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Exhausted")],
404
            file="~/Dropbox/COVID19/Data/Updated/Exhausted_barcodes.tsv",
405
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
406
407
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Doublets")],
408
            file="~/Dropbox/COVID19/Data/Updated/Doublet_barcodes.tsv",
409
            quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
410
```
411
412
Fill in the sub-annotations.
413
414
```{r, warning=FALSE, message=FALSE}
415
rownames(tcell.prot.merge) <- tcell.prot.merge$CellID
416
tcell.prot.merge$Sub.Annotation <- tcell.prot.merge$Annotation
417
418
# first, enforce the original annotations on all previously annotated cells
419
old.annots <- read.csv("~/Dropbox/COVID19/Data/Annotations/annotation_200112.csv",
420
                       header=TRUE, stringsAsFactors=FALSE)
421
rownames(old.annots) <- old.annots$X
422
annot.merge <- merge(tcell.prot.merge, old.annots, by.x='CellID', by.y='X', all.x=TRUE)
423
annot.merge$Sub.Annotation[!is.na(annot.merge$full_clustering)] <- annot.merge$full_clustering[!is.na(annot.merge$full_clustering)]
424
425
annot.merge <- annot.merge[!annot.merge$full_clustering %in% c("CD14_mono", "B_naive", "DC_prolif", "DC3", "HSC", "Mono_prolif",
426
                                                               "NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast",
427
                                                               "Platelets", "RBC"), ]
428
429
# re-classify CD4s within CD8 clusters
430
annot.merge$Sub.Annotation[(annot.merge$CD8A > 0 | annot.merge$CD8B > 0) &
431
                                    annot.merge$CD4 <= 0 &
432
                                    annot.merge$CXCR5 <= 0 &
433
                                    annot.merge$Sub.Annotation %in% c("Tfh-like")] <- "CD8.TE"
434
435
# Tfh classification
436
annot.merge$Sub.Annotation[(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) &
437
                                    annot.merge$CD4 > 0 &
438
                                    annot.merge$CXCR5 > 0 &
439
                                    annot.merge$Sub.Annotation %in% c("CD8.TE")] <- "CD4.Tfh"
440
441
# fix the Treg annotations
442
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.CM") &
443
                                    (annot.merge$CXCR5 > 0) & (annot.merge$CD40LG > 0)] <- "CD4.Tfh"
444
445
# final doublet classification
446
annot.merge$Sub.Annotation[annot.merge$IGHA1 > 0 | annot.merge$IGHD > 0 | annot.merge$IGHE > 0 | annot.merge$IGHG1 > 0 |
447
                                    annot.merge$IGHG2 > 0 | annot.merge$IGHG3 > 0 | annot.merge$IGHG4 > 0] <- "Doublets"
448
449
# final NKT classification
450
annot.merge$Sub.Annotation[annot.merge$FCGR3A > 0 & annot.merge$NCAM1 > 0 &  
451
                                    annot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT"
452
annot.merge$Sub.Annotation[annot.merge$FCGR3A > 0 & annot.merge$NCR1 > 0 & 
453
                                    annot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT"
454
455
# fix the Treg annotations
456
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.CM", "CD4.Naive", "CD4.Tfh", "Treg") &
457
                                    (annot.merge$IL22 > 0)] <- "CD4.IL22"
458
459
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg") &
460
                                    !(annot.merge$FOXP3 > 0 & annot.merge$IKZF2 > 0)] <- "CD4.IL22"
461
462
# Add in a Th17 phenotype
463
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.Tfh", "CD4.EM", "CD4.Naive", "CD4.IL22") &
464
                                   (annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) &
465
                                    annot.merge$RORC > 0 &
466
                                    (annot.merge$IL17A > 0 | 
467
                                         annot.merge$IL17F > 0 | annot.merge$IL21 > 0)] <- "CD4.Th17"
468
469
# Add in a Th1 phenotype
470
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") &
471
                                    (annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) &
472
                                    (annot.merge$IL2 > 0 | annot.merge$TNF > 0 | 
473
                                         annot.merge$LTA) &
474
                                    annot.merge$TBX21 > 0] <- "CD4.Th1"
475
476
# Add in a Th2 phenotype
477
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") &
478
                                    (annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) &
479
                                    annot.merge$GATA3 > 0 &
480
                                    (annot.merge$IL4 > 0 | annot.merge$IL5 > 0)] <- "CD4.Th2"
481
482
# merge the Tfh annotation
483
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like", "Tfh-like")] <- "CD4.Tfh"
484
485
# Add in the proliferating/activated/exhausted annotation
486
active.tcells <- read.csv("~/Dropbox/COVID19/Data/prolif_CD4CD8.csv",
487
                          header=FALSE, stringsAsFactors=FALSE)
488
annot.merge$Sub.Annotation[annot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD4")]] <- "CD4.Prolif"
489
annot.merge$Sub.Annotation[annot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD8")]] <- "CD8.Prolif"
490
491
# CD4 prolif
492
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Proliferating") &
493
                                    (annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0)] <- "CD4.Prolif"
494
495
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.Prolif") &
496
                                    !(annot.merge$TYMS > 0 | annot.merge$MKI67 > 0)] <- "CD4.CM"
497
498
499
# CD8A
500
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Proliferating") &
501
                                    (annot.merge$CD8A > 0 | annot.merge$CD8B > 0)] <- "CD8.Prolif"
502
503
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD8.Prolif") &
504
                                    !(annot.merge$TYMS > 0 | annot.merge$MKI67 > 0)] <- "CD8.TE"
505
506
# table(annot.merge$Sub.Annotation, annot.merge$full_clustering)
507
table(annot.merge$Sub.Annotation, annot.merge$Site)
508
```
509
510
Compare to MNN-label transfers.
511
512
```{r, warning=FALSE, message=FALSE}
513
# only compare the MNN and new labels.
514
mnn.labels <- read.table("~/Dropbox/COVID19/Data/Updated/Tcell_MNN_labels.tsv",
515
                         sep="\t", header=TRUE, stringsAsFactors=FALSE)
516
mnn.compare <- merge(annot.merge, mnn.labels, by='CellID')
517
518
table(mnn.compare$Sub.Annotation.x, mnn.compare$Sub.Annotation.y)
519
```
520
521
522
523
```{r}
524
# write out the annotations, without the doublets, NK cells and ILCs
525
write.table(annot.merge[!annot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono",
526
                                                                   "DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif",
527
                                                                   "NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast",
528
                                                                   "Platelets", "RBC",
529
                                                                   "Doublets:Bcell", "Doublets:Platelet", "Doublets"),
530
                            c("CellID", "Sub.Annotation")],
531
            file="~/Dropbox/COVID19/Data/Updated/Tcell_annotations_ext.tsv",
532
            sep="\t", quote=FALSE, row.names=FALSE)
533
534
write.table(annot.merge[!annot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono",
535
                                                                   "DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif",
536
                                                                   "NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast",
537
                                                                   "Platelets", "RBC",
538
                                                                   "Doublets:Bcell", "Doublets:Platelet", "Doublets"),
539
                             c("CellID")],
540
            file="~/Dropbox/COVID19/Data/Updated/Tcell_barcodes.tsv",
541
            sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
542
543
```
544
545
Plot the proportions of T cells, normalised properly.
546
547
```{r}
548
cell.freq.tab <- table(annot.merge$sample_id,
549
                       annot.merge$Sub.Annotation)
550
551
cell.freq.df <- as.data.frame(sapply(rownames(cell.freq.tab), FUN=function(X) cell.freq.tab[X, ]/n.cell.vecc[X]))
552
cell.freq.df$CellType <- rownames(cell.freq.df)
553
cell.freq.df <- melt(cell.freq.df, id.vars=c("CellType"))
554
cell.freq.df$CellType <- as.character(cell.freq.df$CellType)
555
colnames(cell.freq.df) <- c("CellType", "sample_id", "Freq")
556
# cell.freq.df$Sex <- "Female"
557
# cell.freq.df$Sex[cell.freq.df$sample_id %in% unique(annot.merge$sample_id[annot.merge$Sex %in% c("M", "Male")])] <- "Male"
558
cell.freq.df$sample_id <- as.character(cell.freq.df$sample_id)
559
560
covid.meta <- read.csv("~/Dropbox/COVID19/Data/basic_COVID19_PBMC_metadata_160212.csv",
561
                        header=TRUE, stringsAsFactors=FALSE)
562
old.meta <- read.csv("~/Dropbox/COVID19/Data/Metadata FINAL 10122020.csv",
563
                     header=TRUE, stringsAsFactors=FALSE)
564
old.meta$sample_id[old.meta$sample_id %in% c("BGCV13_CV0201")] <- "BGCV06_CV0201"
565
old.meta$sample_id[old.meta$sample_id %in% c("BGCV06_CV0326")] <- "BGCV13_CV0326"
566
567
covid.meta$AgeRange <- covid.meta$Age
568
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_10hours")] <- "LPS"
569
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_90mins")] <- "LPS"
570
571
covid.meta <- merge(covid.meta[, !colnames(covid.meta) %in% c("Age")], old.meta[, c("sample_id", "patient_id", "Age")],
572
                    by='sample_id')
573
574
cell.freq.merge <- merge(cell.freq.df, covid.meta, by=c('sample_id'))
575
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$Status_on_day_collection_summary %in% c("Non_covid"), ]
576
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$patient_id %in% c("CV0198"), ]
577
# remove doublets, etc
578
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
579
```
580
581
582
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=9.15}
583
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525")
584
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")
585
586
cell.freq.merge$Status_on_day_collection_summary <- factor(cell.freq.merge$Status_on_day_collection_summary,
587
                                            levels=c("Healthy", "Asymptomatic", "Mild", 
588
                                                     "Moderate", "Severe", "Critical", "LPS", "Non_covid"))
589
```
590
591
592
```{r}
593
write.table(cell.freq.merge,
594
            file="~/Dropbox/COVID19/Data/Updated/Tcell_proportions.tsv",
595
            sep="\t", quote=FALSE, row.names=FALSE)
596
```
597
598
How do these compare to the previous frequencies?
599
600
```{r, fig.height=9.95, fig.width=10.95}
601
old.freqs <- read.table("~/Dropbox/COVID19/Data/Tcell_proportions.tsv",
602
                        sep="\t", header=TRUE, stringsAsFactors = FALSE)
603
old.freqs$sample_id[old.freqs$sample_id %in% c("BGCV13_CV0201")] <- "BGCV06_CV0201"
604
old.freqs$sample_id[old.freqs$sample_id %in% c("BGCV06_CV0326")] <- "BGCV13_CV0326"
605
606
old.freqs <- old.freqs[, c("sample_id", "CellType", "Freq")]
607
colnames(old.freqs) <- c("sample_id", "CellType", "old.Freq")
608
609
comp.freqs <- merge(cell.freq.merge, old.freqs, by=c('sample_id', 'CellType'))
610
comp.freqs$Switch <- comp.freqs$sample_id %in% c("BGCV06_CV0201", "BGCV13_CV0326")
611
612
ggplot(comp.freqs, aes(x=Freq, y=old.Freq, colour=Switch)) +
613
    geom_point() +
614
    theme_cowplot() +
615
    scale_colour_manual(values=c("black", "red")) +
616
    facet_wrap(~CellType, scales="free") +
617
    NULL
618
```
619
620
621
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=10.15}
622
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
623
                           !cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono",
624
                                                            "DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif",
625
                                                            "NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast",
626
                                                            "Platelets", "RBC",
627
                                                            "Doublets:Bcell", "Doublets:Platelet", "Doublets"), ],
628
       aes(x=Status_on_day_collection_summary, y=Freq, fill=Status_on_day_collection_summary)) +
629
    geom_boxplot() +
630
    theme_cowplot() +
631
    facet_wrap(~CellType, scales="free_y", ncol=4) +
632
    scale_fill_manual(values=group.cols) +
633
    expand_limits(y=c(0)) +
634
    theme(axis.text.x=element_blank(),
635
          strip.background=element_rect(fill='white', colour='white'),
636
          strip.text=element_text(size=14)) +
637
    labs(x="Severity Group", y="Proportion") +
638
    guides(fill=guide_legend(title="Severity")) +
639
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-boxplot.pdf",
640
           height=5.95, width=10.15, useDingbats=FALSE) +
641
    NULL
642
```
643
644
For the figures these need to be a filled barchart (?!). Focus in on the Tfh-like cells.
645
646
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55}
647
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
648
                           cell.freq.merge$CellType %in% c("CD4.Tfh"), ],
649
       aes(x=Status_on_day_collection_summary, y=Freq, fill=Status_on_day_collection_summary)) +
650
    geom_boxplot() +
651
    theme_cowplot() +
652
    facet_wrap(~CellType, scales="free_y") +
653
    scale_fill_manual(values=group.cols) +
654
    expand_limits(y=c(0)) +
655
    theme(axis.text.x=element_blank(),
656
          strip.background=element_rect(fill='white', colour='white'),
657
          strip.text=element_text(size=14)) +
658
    labs(x="Severity Group", y="Proportion") +
659
    guides(fill=guide_legend(title="Severity")) +
660
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions_Tfh-boxplot.pdf",
661
           height=2.15, width=3.55, useDingbats=FALSE) +
662
    NULL
663
```
664
665
Run a LM comparing categories.
666
667
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55}
668
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
669
                           cell.freq.merge$CellType %in% c("CD4.Tfh"), ],
670
       aes(x=Status_on_day_collection_summary, y=Freq, fill=Sex)) +
671
    geom_boxplot() +
672
    theme_cowplot() +
673
    facet_wrap(~CellType, scales="free_y") +
674
    scale_fill_colorblind() +
675
    expand_limits(y=c(0)) +
676
    theme(axis.text.x=element_blank(),
677
          strip.background=element_rect(fill='white', colour='white'),
678
          strip.text=element_text(size=14)) +
679
    labs(x="Severity Group", y="Proportion") +
680
    guides(fill=guide_legend(title="Severity")) +
681
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions_Tfh-byGender_boxplot.pdf",
682
           height=2.15, width=3.55, useDingbats=FALSE) +
683
    NULL
684
```
685
686
Compare healthy vs. asymptomatic, and asymptomatic > critical.
687
688
```{r, warning=FALSE, message=FALSE}
689
healthy.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") &
690
                                        cell.freq.merge$Status_on_day_collection_summary %in% c("Healthy") &
691
                                        cell.freq.merge$Collection_Day %in% c("D0")]
692
asymp.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") &
693
                                        cell.freq.merge$Status_on_day_collection_summary %in% c("Asymptomatic") &
694
                                        cell.freq.merge$Collection_Day %in% c("D0")]
695
696
tfh.utest <- wilcox.test(x=healthy.tfh, y=asymp.tfh, alternative="two.sided")
697
tfh.utest
698
```
699
700
I should model the counts, normalized by the total numbers of captured cells.
701
702
```{r}
703
tfh.lm.list <- list()
704
covid.tfh.merge <- cell.freq.merge[!cell.freq.merge$Status_on_day_collection_summary %in% c("Healthy"), ]
705
covid.tfh.merge$OrderedStatus <- ordered(covid.tfh.merge$Status_on_day_collection_summary,
706
                                         levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
707
covid.tfh.merge$Days_from_onset <- as.numeric(covid.tfh.merge$Days_from_onset)
708
709
c.types <- c("CD4.Tfh")
710
711
for(i in seq_along(c.types)){
712
    i.c <- c.types[i]
713
    
714
    i.lm.df <- data.frame("sample_id"=rownames(cell.freq.tab), "Count"=cell.freq.tab[, i.c])
715
    i.lm.df <- merge(i.lm.df, covid.tfh.merge[covid.tfh.merge$CellType %in% i.c, ], by='sample_id')
716
    
717
    # i.tfh.lm <- glm(Count ~ Sex + Age + Site + OrderedStatus,
718
    #                data=i.lm.df, family=poisson(link="log"))
719
    i.tfh.lm <- glm.nb(Count ~ Sex + Age + Site + OrderedStatus, data=i.lm.df, init.theta=1)
720
    
721
    i.lm.res <- summary(i.tfh.lm)
722
    tfh.lm.list[[i.c]] <- data.frame("CellType"=rep(i.c, nrow(i.lm.res$coefficients)),
723
                                     "Beta"=i.lm.res$coefficients[, 1],
724
                                     "SE"=i.lm.res$coefficients[, 2],
725
                                     "STAT"=i.lm.res$coefficients[, 3],
726
                                     "Pvalue"=i.lm.res$coefficients[, 4],
727
                                     "Predictor"=rownames(i.lm.res$coefficients))
728
}
729
730
tfh.lm.df <- do.call(rbind.data.frame, tfh.lm.list)
731
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="OrderedStatus", replacement="Severity")
732
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Male", replacement="")
733
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Site", replacement="")
734
tfh.lm.df <- tfh.lm.df[!tfh.lm.df$Predictor %in% c("(Intercept)"), ]
735
tfh.lm.df$P.Adjust <- p.adjust(tfh.lm.df$Pvalue)
736
```
737
738
739
```{r, warning=FALSE, message=FALSE, fig.height=3.95, fig.width=4.95}
740
ggplot(tfh.lm.df, aes(x=Predictor, y=Beta, fill=Pvalue < 0.05)) +
741
    geom_hline(yintercept=0, lty=2, col='grey70') +
742
    geom_errorbar(aes(ymin=Beta-SE, ymax=Beta+SE)) +
743
    geom_point(shape=22, size=3) +
744
    theme_cowplot() +
745
     theme(strip.background=element_rect(fill='white', colour='white'),
746
          strip.text=element_text(size=14)) +
747
    coord_flip() +
748
    scale_fill_manual(values=c("blue", "orange")) +
749
    facet_wrap(~CellType, scales="free_x")  + 
750
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_Tfh_props-LM.pdf",
751
           height=3.95, width=4.95, useDingbats=FALSE)
752
```
753
754
755
```{r, warning=FALSE, message=FALSE}
756
cellfreq.df <- cell.freq.merge %>% group_by(Status_on_day_collection_summary, CellType) %>% summarise(Mean = mean(Freq))
757
cellfreq.df$Status_on_day_collection_summary <- factor(cellfreq.df$Status_on_day_collection_summary,
758
                                        levels=c("Healthy", "Asymptomatic", "Mild", 
759
                                                 "Moderate", "Severe", "Critical", "LPS"))
760
```
761
762
Create a new colour palette.
763
764
```{r}
765
cell.order <- c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Prolif", "CD4.Th1", "CD4.Th2", "CD4.Th17", "CD4.Tfh",
766
                "Treg", "CD8.Naive", "CD8.Activated", "CD8.Prolif", "CD8.CM", "CD8.TE", "CD8.EM", "gdT", "MAIT", "NKT")
767
768
all.qual.cols <- brewer.pal.info[brewer.pal.info$category %in% c("qual") & brewer.pal.info$colorblind, ]
769
col_vector = unlist(mapply(brewer.pal, all.qual.cols$maxcolors, rownames(all.qual.cols)))
770
771
# cell.cols <- col_vector[c(1:7, 9:19)]
772
cell.cols <- col_vector[c(1:17, 19, 20)]
773
names(cell.cols) <- cell.order
774
```
775
776
777
```{r}
778
pie(rep(1, 18), col=cell.cols)
779
```
780
781
782
```{r, warning=FALSE, message=FALSE, fig.height=4.25, fig.width=6.95}
783
# n.cells <- length(cell.order)
784
# cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells)
785
# names(cell.cols) <- cell.order
786
787
cellfreq.df$CellType <- factor(cellfreq.df$CellType,
788
                               levels=cell.order)
789
790
ggplot(cellfreq.df,
791
       aes(x=Status_on_day_collection_summary, y = Mean, fill=CellType))+ 
792
    geom_bar(position="fill", stat="identity", width = 0.9, colour = "black")+
793
    scale_fill_manual(values = cell.cols) +
794
    theme(aspect.ratio = 1/1.5, 
795
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 14, colour = "black"),
796
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
797
          axis.text.y = element_text(size = 14, colour = "black"),
798
          legend.key.size=unit(0.4, "cm"),
799
          axis.title.x=element_blank(), axis.title.y=element_blank(),
800
          legend.text = element_text(size = 14), legend.title=element_text(size=14),
801
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
802
    guides(fill=guide_legend(title="Cell type", ncol=1)) +
803
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_barchart.pdf",
804
           height=4.25, width=6.95, useDingbats=FALSE)
805
```
806
807
I have a misgiving about this, as it masks the distribution of the proportions - maybe a series of filled boxplots, e.g. a waterfall?
808
809
```{r, warning=FALSE, message=FALSE, fig.height=5.25, fig.width=12.95}
810
cell.freq.merge$CellType <- factor(cell.freq.merge$CellType,
811
                                   levels=cell.order)
812
813
ggplot(cell.freq.merge,
814
       aes(x=sample_id, y = Freq, fill=CellType))+ 
815
    geom_bar(position="fill", stat="identity", width = 0.8, colour = "black")+
816
    scale_fill_manual(values = cell.cols) +
817
    facet_grid(. ~ Status_on_day_collection_summary, scales="free_x", space="free") +
818
    theme(axis.text.x = element_blank(),
819
          panel.spacing=unit(0, "lines"),
820
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
821
          axis.text.y = element_text(size = 18),
822
          axis.title.x=element_blank(), axis.title.y=element_blank(),
823
          axis.ticks.x=element_blank(),
824
          strip.background=element_rect(fill='white', colour='white'),
825
          strip.text=element_text(size=14, angle=90, colour='black'),
826
          legend.text = element_text(size = 18), legend.title=element_text(size=18),
827
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
828
    # guides(fill=guide_legend(title="Cell type")) +
829
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_barchart-full.pdf",
830
           height=5.25, width=8.95, useDingbats=FALSE) +
831
    NULL
832
```
833
834
Make dot plots.
835
836
```{r, warning=FALSE, message=FALSE}
837
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=annot.merge)
838
rownames(clust.model.matrix) <- annot.merge$CellID
839
840
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(annot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))])
841
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
842
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Sub\\.Annotation", replacement="")
843
844
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(annot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0)
845
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X)))
846
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0
847
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Sub\\.Annotation", replacement="")
848
```
849
850
851
```{r, message=FALSE, warning=FALSE}
852
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP)))
853
gene.ave.label.df$Sub.Annotation <- rownames(gene.ave.label.df)
854
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Sub.Annotation')
855
colnames(gene.ave.label.melt) <- c("Sub.Annotation", "Gene", "AveExprs")
856
857
gene.bin.label.df <- gene.bin.by.label
858
gene.bin.label.df$Sub.Annotation <- rownames(gene.bin.label.df)
859
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Sub.Annotation')
860
colnames(gene.bin.label.melt) <- c("Sub.Annotation", "Gene", "PropExprs")
861
```
862
863
864
```{r, message=FALSE, warning=FALSE, fig.height=4.65, fig.width=8.95}
865
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene'))
866
dot.plot.df$Gene <- as.character(dot.plot.df$Gene)
867
868
dot.plot.genes <- c("CD3G", "CD8A", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "TBX21",
869
                    "PDCD1", "GZMB", "RORC", "IKZF2", "GATA3", "TNFRSF9", "IL7R", "KLRG1",
870
                    "TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "NCAM1", "CCR6", 
871
                    "CXCR5", "HLA.DRB1", "LAG3", "FOXP3", "MKI67", "NCR1", "GARP", "FCGR3A")
872
gene.cluster <- hclust(dist(t(gene.ave.by.label[, intersect(colnames(gene.ave.by.label), dot.plot.genes)])))$order
873
874
# manually set the gene order
875
gene.order <- c("CD3G", "CCR7", "CD27", "CD4", "CD28", "TNFRSF9", "CCR6", "CD40LG", "MKI67", "TBX21", "GATA3", "RORC", "IKZF2",
876
                "FOXP3", "CTLA4", "PDCD1", "CXCR5", "CD8A", "GZMB", "HLA.DRB1", "IFNG", "LAG3", "TOX", "IL7R", "KLRG1", "TRDV2", 
877
                "TRGV9", "TRAV1.2", "NCAM1", "NCR1", "FCGR3A")
878
879
dot.plot.df <- dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ]
880
dot.plot.df$Gene <- factor(dot.plot.df$Gene,
881
                           levels=gene.order)
882
883
dot.plot.df <- dot.plot.df[dot.plot.df$Sub.Annotation %in% cell.order, ]
884
dot.plot.df$Sub.Annotation <- factor(dot.plot.df$Sub.Annotation,
885
                                     levels=rev(cell.order))
886
887
ggplot(dot.plot.df,
888
       aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
889
    geom_point() +
890
    theme_cowplot() +
891
    scale_colour_gradient(low='grey70', high='blue') +
892
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
893
          axis.text.y=element_text(colour='black', size=14),
894
          legend.text=element_text(size=14),
895
          legend.title=element_text(size=14),
896
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
897
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
898
    guides(colour=guide_colorbar(title='Mean\nExpression'),
899
           size=guide_legend(title='% Express')) +
900
    labs(x="Gene", y="Cell type") +
901
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-genes.pdf",
902
           height=4.65, width=8.95, useDingbats=FALSE)
903
```
904
905
Plot the cytokine profiles to figure out if Th1/2/17 dominate (or all of them).
906
907
```{r, message=FALSE, warning=FALSE, fig.height=4.15, fig.width=7.0}
908
cytokine.dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene'))
909
cytokine.dot.plot.df$Gene <- as.character(cytokine.dot.plot.df$Gene)
910
911
cytokine.genes <- intersect(c("IL2", "IL1A", "IL1B", "IL10", "IL22",  "TNF", "IFNG", "LTA",
912
                              "IL4", "IL5", "IL13", "IL17A", "IL17F", "IL21",
913
                              "IL12A", "IL12B", "IL25"), colnames(annot.merge))
914
915
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Gene %in% cytokine.genes, ]
916
cytokine.dot.plot.df$Gene <- factor(cytokine.dot.plot.df$Gene,
917
                                    levels=cytokine.genes)
918
919
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Sub.Annotation %in% cell.order, ]
920
cytokine.dot.plot.df$Sub.Annotation <- factor(cytokine.dot.plot.df$Sub.Annotation,
921
                                              levels=rev(cell.order))
922
923
ggplot(cytokine.dot.plot.df,
924
       aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
925
    geom_point() +
926
    theme_cowplot() +
927
    scale_colour_gradient(low='grey70', high='blue') +
928
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
929
          axis.text.y=element_text(colour='black', size=14),
930
          legend.text=element_text(size=14),
931
          legend.title=element_text(size=14),
932
          legend.key.size=unit(0.4, "cm"),
933
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
934
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
935
    guides(colour=guide_colorbar(title='Mean\nExpression'),
936
           size=guide_legend(title='% Express')) +
937
    labs(x="Cytokine", y="Cell type") +
938
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-cytokines.pdf",
939
           height=4.65, width=7.0, useDingbats=FALSE) +
940
    NULL
941
```
942
943
I want to see how these cytokines change over the course of severity.
944
945
```{r, warning=FALSE, message=FALSE}
946
cytokine.melt <- melt(annot.merge[, c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID",
947
                                           cytokine.genes)],
948
                      variable.name="Cytokine", 
949
                      id.vars=c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID"))
950
cytokine.means <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("Mean.Exprs"=mean(value))
951
cytokine.var <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("Var.Exprs"=var(value))
952
cytokine.se <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0)))
953
954
cytokine.df <- Reduce(x=list("mean"=cytokine.means, "var"=cytokine.var, "se"=cytokine.se),
955
                      f=function(x, y) merge(x, y, by=c('Status_on_day_collection_summary', 'Sub.Annotation', 'Cytokine')))
956
# cytokine.df <- merge(cytokine.df, covid.meta, by='sample_id')
957
 
958
# remove superflous or malignant samples
959
# cytokine.df <- cytokine.df[!cytokine.df$patient_id %in% c("CV0198"), ]
960
# cytokine.df <- cytokine.df[!cytokine.df$sample_id %in% c("BGCV01_CV0902"), ]
961
# cytokine.df <- cytokine.df[!cytokine.df$Status_on_day_collection_summary %in% c("Non_Covid"), ]
962
963
cytokine.df$D0_satus_summary <- factor(cytokine.df$Status_on_day_collection_summary,
964
                                       levels=c("Healthy", "Asymptomatic", "Mild",
965
                                                "Moderate", "Severe", "Critical", "LPS"))
966
```
967
968
969
Maybe visualise this as a heatmap?
970
971
```{r, fig.height=7.15, fig.width=7.95}
972
cytokine.df <- cytokine.df[!cytokine.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") &
973
                               !is.na(cytokine.df$D0_satus_summary), ]
974
975
ggplot(cytokine.df[!cytokine.df$Cytokine %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F", "IL10", "IL6"), ], 
976
       aes(x=D0_satus_summary, y=Cytokine, fill=Mean.Exprs)) +
977
    geom_tile() +
978
    facet_wrap(~Sub.Annotation) +
979
    scale_fill_viridis(option="magma") +
980
    theme_cowplot() +
981
    guides(fill=guide_colourbar(title="Mean Expression")) +
982
    labs(y="Cytokine Gene", x="Severity") +
983
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1),
984
          strip.background=element_rect(fill='white', colour='white')) +
985
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_cytokines_heatmap.pdf",
986
           height=7.15, width=7.95, useDingbats=FALSE) +
987
    NULL
988
```
989
990
991
What about exhaustion markers?
992
993
994
```{r, warning=FALSE, message=FALSE}
995
exhaustion.genes <- c("TOX", "TIGIT", "LAG3", "HAVCR2", "PDCD1", "FOXP3", "MKI67")
996
exhaustion.melt <- melt(annot.merge[, c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID",
997
                                           exhaustion.genes)],
998
                      variable.name="Gene", 
999
                      id.vars=c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID"))
1000
exhaustion.means <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("Mean.Exprs"=mean(value))
1001
exhaustion.var <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("Var.Exprs"=var(value))
1002
exhaustion.se <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0)))
1003
1004
exhaustion.df <- Reduce(x=list("mean"=exhaustion.means, "var"=exhaustion.var, "se"=exhaustion.se),
1005
                      f=function(x, y) merge(x, y, by=c('Status_on_day_collection_summary', 'Sub.Annotation', 'Gene')))
1006
# exhaustion.df <- merge(exhaustion.df, covid.meta, by='sample_id')
1007
 
1008
# remove superflous or malignant samples
1009
# exhaustion.df <- exhaustion.df[!exhaustion.df$patient_id %in% c("CV0198"), ]
1010
# exhaustion.df <- exhaustion.df[!exhaustion.df$sample_id %in% c("BGCV01_CV0902"), ]
1011
# exhaustion.df <- exhaustion.df[!exhaustion.df$Status_on_day_collection_summary %in% c("Non_Covid"), ]
1012
1013
exhaustion.df$D0_satus_summary <- factor(exhaustion.df$Status_on_day_collection_summary,
1014
                                       levels=c("Healthy", "Asymptomatic", "Mild",
1015
                                                "Moderate", "Severe", "Critical", "LPS"))
1016
```
1017
1018
1019
```{r, fig.height=5.45, fig.width=9.95}
1020
exhaustion.df <- exhaustion.df[!exhaustion.df$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono",
1021
                                                            "DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif",
1022
                                                            "NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast",
1023
                                                            "Platelets", "RBC",
1024
                                                            "Doublets:Bcell", "Doublets:Platelet", "Doublets") &
1025
                               !is.na(exhaustion.df$D0_satus_summary), ]
1026
1027
ggplot(exhaustion.df[!exhaustion.df$Gene %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F"), ], 
1028
       aes(x=D0_satus_summary, y=Gene, fill=Mean.Exprs)) +
1029
    geom_tile() +
1030
    facet_wrap(~Sub.Annotation, nrow=3) +
1031
    scale_fill_viridis(option="magma") +
1032
    theme_cowplot() +
1033
    guides(fill=guide_colourbar(title="Mean Expression")) +
1034
    labs(x="Severity", "Gene") +
1035
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1),
1036
          strip.background=element_rect(fill='white', colour='white')) +
1037
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_exhaustion_heatmap.pdf",
1038
           height=5.45, width=9.95, useDingbats=FALSE) +
1039
    NULL
1040
```
1041
1042
Do the same for the proteins.
1043
1044
```{r, warning=FALSE, message=FALSE}
1045
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=annot.merge)
1046
rownames(clust.model.matrix) <- annot.merge$CellID
1047
1048
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(annot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
1049
                                                                              pattern="\\.x", replacement="")])
1050
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix)))
1051
1052
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Annotation", replacement="")
1053
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="")
1054
1055
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(annot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)),
1056
                                                                              pattern="\\.x", replacement="")] > 0.01)
1057
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X)))
1058
bin.by.label[is.na(bin.by.label)] <- 0
1059
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Annotation", replacement="")
1060
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="")
1061
```
1062
1063
1064
```{r, message=FALSE, warning=FALSE}
1065
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP))))
1066
ave.label.df$Sub.Annotation <- rownames(ave.label.df)
1067
ave.label.melt <- melt(ave.label.df, id.vars='Sub.Annotation')
1068
colnames(ave.label.melt) <- c("Sub.Annotation", "Protein", "AveExprs")
1069
1070
bin.label.df <- bin.by.label
1071
bin.label.df$Sub.Annotation <- rownames(bin.label.df)
1072
bin.label.melt <- melt(bin.label.df, id.vars='Sub.Annotation')
1073
colnames(bin.label.melt) <- c("Sub.Annotation", "Protein", "PropExprs")
1074
```
1075
1076
1077
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.95}
1078
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Sub.Annotation', 'Protein'))
1079
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein)
1080
1081
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16",
1082
                    "CD274", "CXCR3", "CD40LG", "ICOS", "PD1", "CD25", "LAMP1", "CXCR3", "CCR6", "IL7R", 
1083
                    "CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "TCR_Va24.Ja18")
1084
1085
protein.order <- c("CD3", "CD4", "CCR7", "CD45RA", "CD45RO", "CD27", "CD28", "CD62L", "CD25", "CTLA4",
1086
                   "LAMP1", "CCR6", "IL7R",
1087
                   "CXCR5", "CD40LG", "ICOS", "CXCR3", "CD8", "PD1", "CD274", "TCR_Vg9", "TCR_Vg2",
1088
                   "TCR_Va7.2", "TCR_Va24.Ja18", "CD56", "CD16")
1089
1090
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ]
1091
protein.dot.plot.df$Protein <- factor(protein.dot.plot.df$Protein,
1092
                                      levels=protein.order)
1093
1094
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Sub.Annotation %in% cell.order, ]
1095
protein.dot.plot.df$Sub.Annotation <- factor(protein.dot.plot.df$Sub.Annotation,
1096
                                             levels=rev(cell.order))
1097
1098
ggplot(protein.dot.plot.df, 
1099
       aes(x=Protein, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) +
1100
    geom_point() +
1101
    theme_cowplot() +
1102
    scale_colour_gradient(low='grey70', high=darken('red')) +
1103
     theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14),
1104
          axis.text.y=element_text(colour='black', size=14),
1105
          legend.text=element_text(size=14),
1106
          legend.title=element_text(size=14),
1107
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
1108
          panel.background = element_blank(), axis.line = element_line(colour = "black")) +
1109
    guides(colour=guide_colorbar(title='Mean\nExpression'),
1110
           size=guide_legend(title='% Express')) +
1111
    labs(x="Protein", y="Cell type") +
1112
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-protein.pdf",
1113
           height=5.15, width=8.95, useDingbats=FALSE)
1114
```
1115
1116
I also want to see some of the CD8 sub-phenotype mRNA and protein expression, namely _KLRG1_ and _CD217_ (_IL7R_).
1117
1118
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.95}
1119
repeat.samps <- covid.meta$patient_id[duplicated(covid.meta$patient_id)]
1120
cell.freq.merge$Day <- ordered(cell.freq.merge$Collection_Day,
1121
                               levels=c("D0", "D7", "D9", "D12", "D13", "D28"))
1122
1123
ggplot(cell.freq.merge[cell.freq.merge$patient_id %in% repeat.samps &
1124
                           !cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets") &
1125
                           !cell.freq.merge$Status_on_day_collection_summary %in% c("Non_covid", "LPS"), ],
1126
       aes(x=Day, y=Freq, fill=Status_on_day_collection_summary, group=patient_id)) +
1127
    #geom_boxplot() +
1128
    geom_line(aes(colour=Status_on_day_collection_summary)) +
1129
    geom_point(shape=21, size=3) +
1130
    theme_cowplot() +
1131
    facet_wrap(~CellType, scales="free_y") +
1132
    scale_fill_manual(values=group.cols) +
1133
    scale_colour_manual(values=group.cols) +
1134
    expand_limits(y=c(0)) +
1135
    theme(strip.background=element_rect(fill='white', colour='white'),
1136
          strip.text=element_text(size=14)) +
1137
    labs(x="Day", y="Proportion") +
1138
    guides(fill=guide_legend(title="Severity"),
1139
           colour=FALSE) +
1140
    # ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-boxplot.pdf",
1141
    #        height=5.95, width=11.15, useDingbats=FALSE) +
1142
     NULL
1143
```
1144