a b/TcellAnalysis/notebooks/COVID19_Tcell_DA-Updated.Rmd
1
---
2
title: "COVID19: T cell differential abundance - Updated"
3
output: html_notebook
4
---
5
6
Performing differential abundance testing using a negative binomial GLM. Cell counts per donor sample are used as input, and the total number of cells 
7
captured (after QC) are used to normalize the model counts.
8
9
10
```{r, warning=FALSE, message=FALSE}
11
library(ggplot2)
12
library(ggsci)
13
library(cowplot)
14
library(RColorBrewer)
15
library(reshape2)
16
library(colorspace)
17
library(ggthemes)
18
library(edgeR)
19
library(scales)
20
library(ggrepel)
21
library(dplyr)
22
```
23
24
25
```{r}
26
covid.meta <- read.csv("~/Dropbox/COVID19/Data/basic_COVID19_PBMC_metadata_160212.csv",
27
                        header=TRUE, stringsAsFactors=FALSE)
28
old.meta <- read.csv("~/Dropbox/COVID19/Data/Metadata FINAL 10122020.csv",
29
                     header=TRUE, stringsAsFactors=FALSE)
30
old.meta$sample_id[old.meta$sample_id %in% c("BGCV13_CV0201")] <- "BGCV06_CV0201"
31
old.meta$sample_id[old.meta$sample_id %in% c("BGCV06_CV0326")] <- "BGCV13_CV0326"
32
33
covid.meta$AgeRange <- covid.meta$Age
34
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_10hours")] <- "LPS"
35
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_90mins")] <- "LPS"
36
37
covid.meta <- merge(covid.meta[, !colnames(covid.meta) %in% c("Age")], old.meta[, c("sample_id", "patient_id", "Age")],
38
                    by='sample_id')
39
40
covid.meta$Days_from_onset[covid.meta$Days_from_onset %in% c("Not_known", "Healthy")] <- NA
41
covid.meta$Days_from_onset <- as.numeric(covid.meta$Days_from_onset)
42
43
cell.freq.merge <- read.table("~/Dropbox/COVID19/Data/Updated/Tcell_proportions.tsv",
44
                              sep="\t", header=TRUE, stringsAsFactors=FALSE)
45
46
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$Status_on_day_collection_summary %in% c("Non_covid"), ]
47
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$patient_id %in% c("CV0198"), ]
48
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$sample_id %in% c("BGCV01_CV0902"), ]
49
# remove doublets, etc
50
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$CellType %in% c("Doublets:Bcell", "ILC1_3", "ILC2", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
51
```
52
53
54
```{r, warning=FALSE, message=FALSE}
55
all.meta <- read.table("~/Dropbox/COVID19/Data/Updated/COVID19_scMeta-data.tsv",
56
                       sep="\t", header=TRUE, stringsAsFactors=FALSE)
57
rownames(all.meta) <- all.meta$CellID
58
59
# remove BGCV01_CV0209 and CV0198
60
all.meta <- all.meta[!all.meta$sample_id %in% c("BGCV01_CV0902"), ]
61
all.meta <- all.meta[!all.meta$patient_id %in% c("CV0198"), ]
62
63
all.meta$Status_on_day_collection_summary[all.meta$Status_on_day_collection_summary %in% c("LPS_90mins", "LPS_10hours")] <- "LPS"
64
65
all.meta$Days_from_onset[all.meta$Days_from_onset %in% c("Not_known", "Healthy")] <- NA
66
all.meta$Days_from_onset <- as.numeric(all.meta$Days_from_onset)
67
68
n.cell.vecc <- table(all.meta$sample_id)
69
```
70
71
72
```{r}
73
tcell.annots <- read.table("~/Dropbox/COVID19/Data/Updated//Tcell_annotations_ext.tsv",
74
                           sep="\t", header=TRUE, stringsAsFactors=FALSE)
75
tcell.df.merge <- merge(tcell.annots, all.meta, by='CellID')
76
```
77
78
## Cluster-baed DA analysis: Infected vs. controls
79
80
```{r}
81
# set up testing model
82
rownames(covid.meta) <- covid.meta$sample_id
83
tcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary %in% c("Non_covid", "LPS"), ]
84
tcell.meta$OrderedSeverity <- ordered(tcell.meta$Status_on_day_collection_summary,
85
                                      levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
86
tcell.meta$Infected <- ordered(tcell.meta$Status,
87
                               levels=c("Healthy", "Covid"))
88
89
tcell.model <- model.matrix(~ Sex + Age + Infected, data=tcell.meta[tcell.meta$Collection_Day %in% c("D0"), ])
90
91
# count cells
92
cell.freq.tab <- t(table(tcell.df.merge$sample_id[tcell.df.merge$Collection_Day %in% c("D0") &
93
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid")],
94
                         tcell.df.merge$Sub.Annotation[tcell.df.merge$Collection_Day %in% c("D0") &
95
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid")]))
96
cell.freq.tab <- cell.freq.tab[!rownames(cell.freq.tab) %in% c("Doublets:Bcell", "ILC1_3", "ILC2", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
97
test.samps <- intersect(colnames(cell.freq.tab), names(n.cell.vecc))
98
cell.freq.tab <- cell.freq.tab[, test.samps]
99
tcell.model <- tcell.model[colnames(cell.freq.tab), ]
100
101
tcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[test.samps]))
102
103
#estimate dispersions
104
tcell.dge <- estimateDisp(tcell.dge, design=tcell.model)
105
tcell.linear.fit <- glmQLFit(tcell.dge, tcell.model, robust=TRUE)
106
tcell.res <- as.data.frame(topTags(glmQLFTest(tcell.linear.fit, coef=4), sort.by='none', n=Inf))
107
tcell.res$CellType <- rownames(tcell.res)
108
109
tcell.res$Sig <- as.numeric(tcell.res$FDR < 0.1)
110
tcell.res$Diff <- sign(tcell.res$logFC)
111
tcell.res$Diff[tcell.res$FDR > 0.1] <- 0
112
table(tcell.res$Diff)
113
```
114
115
116
117
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=4.15}
118
mx.lfc <- max(abs(tcell.res$logFC))
119
eps <- mx.lfc * 0.05
120
121
infect.resplot.labels <- tcell.res$CellType
122
infect.resplot.labels[tcell.res$Sig == 0] <- ""
123
124
ggplot(tcell.res, aes(x=logFC, y=-log10(FDR), fill=as.character(Sig))) +
125
    geom_point(shape=21, size=4) +
126
    theme_cowplot() +
127
    scale_fill_manual(values=c("black", "red")) +
128
    scale_x_continuous(limits=c(-mx.lfc - eps, mx.lfc + eps), oob=squish) +
129
    guides(fill=guide_legend(title="FDR < 0.1")) +
130
    geom_text_repel(aes(label=infect.resplot.labels),
131
                     fill='white') +
132
    labs(x="log fold change", y=expression(paste("-log"[10], " FDR"))) +
133
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_edgeR_volcano_caseVscontrol-linear.pdf",
134
           height=2.95, width=4.15, useDingbats=FALSE) +
135
    NULL
136
```
137
138
139
## Cluster-baed DA analysis: COVID19 severity
140
141
```{r}
142
# set up testing model
143
rownames(covid.meta) <- covid.meta$sample_id
144
tcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary %in% c("Non_covid", "LPS", "Healthy"), ]
145
tcell.meta$OrderedSeverity <- ordered(tcell.meta$Status_on_day_collection_summary,
146
                                      levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
147
148
tcell.model <- model.matrix(~ Sex + Age + OrderedSeverity, data=tcell.meta[tcell.meta$Collection_Day %in% c("D0"), ])
149
150
# count cells
151
cell.freq.tab <- t(table(tcell.df.merge$sample_id[tcell.df.merge$Collection_Day %in% c("D0") &
152
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid", "Healthy")],
153
                         tcell.df.merge$Sub.Annotation[tcell.df.merge$Collection_Day %in% c("D0") &
154
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid", "Healthy")]))
155
cell.freq.tab <- cell.freq.tab[!rownames(cell.freq.tab) %in% c("Doublets:Bcell", "ILC1_3", "ILC2", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
156
test.samps <- intersect(colnames(cell.freq.tab), names(n.cell.vecc))
157
cell.freq.tab <- cell.freq.tab[, test.samps]
158
tcell.model <- tcell.model[colnames(cell.freq.tab), ]
159
160
tcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[test.samps]))
161
162
#estimate dispersions
163
tcell.dge <- estimateDisp(tcell.dge, design=tcell.model)
164
tcell.linear.fit <- glmQLFit(tcell.dge, tcell.model, robust=TRUE)
165
tcell.res <- as.data.frame(topTags(glmQLFTest(tcell.linear.fit, coef=4), sort.by='none', n=Inf))
166
tcell.res$CellType <- rownames(tcell.res)
167
168
tcell.res$Sig <- as.numeric(tcell.res$FDR < 0.1)
169
tcell.res$Diff <- sign(tcell.res$logFC)
170
tcell.res$Diff[tcell.res$FDR > 0.1] <- 0
171
table(tcell.res$Diff)
172
```
173
174
175
176
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=4.15}
177
mx.lfc <- max(abs(tcell.res$logFC))
178
eps <- mx.lfc * 0.05
179
180
tcell.resplot.labels <- tcell.res$CellType
181
tcell.resplot.labels[tcell.res$Sig == 0] <- ""
182
183
ggplot(tcell.res, aes(x=logFC, y=-log10(FDR), fill=as.character(Sig))) +
184
    geom_point(shape=21, size=4) +
185
    theme_cowplot() +
186
    scale_fill_manual(values=c("black", "red")) +
187
    scale_x_continuous(limits=c(-mx.lfc - eps, mx.lfc + eps), oob=squish) +
188
    guides(fill=guide_legend(title="FDR < 0.1")) +
189
    geom_text_repel(aes(label=tcell.resplot.labels),
190
                     fill='white') +
191
    labs(x="log fold change", y=expression(paste("-log"[10], " FDR"))) +
192
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_edgeR_volcano_caseOnly-linear.pdf",
193
           height=2.95, width=4.15, useDingbats=FALSE) +
194
    NULL
195
```
196
197
198
Quadratic changes.
199
200
201
```{r}
202
tcell.quad.res <- as.data.frame(topTags(glmQLFTest(tcell.linear.fit, coef=5), sort.by='none', n=Inf))
203
tcell.quad.res$CellType <- rownames(tcell.quad.res)
204
205
tcell.quad.res$Sig <- as.numeric(tcell.quad.res$FDR < 0.1)
206
tcell.quad.res$Diff <- sign(tcell.quad.res$logFC)
207
tcell.quad.res$Diff[tcell.quad.res$FDR > 0.1] <- 0
208
table(tcell.quad.res$Diff)
209
```
210
211
212
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=4.15}
213
mx.lfc <- max(abs(tcell.quad.res$logFC))
214
eps <- mx.lfc * 0.05
215
216
tcell.quad.resplot.labels <- tcell.quad.res$CellType
217
tcell.quad.resplot.labels[tcell.quad.res$Sig == 0] <- ""
218
219
ggplot(tcell.quad.res, aes(x=logFC, y=-log10(FDR), fill=as.character(Sig))) +
220
    geom_point(shape=21, size=4) +
221
    theme_cowplot() +
222
    scale_fill_manual(values=c("black", "red")) +
223
    scale_x_continuous(limits=c(-mx.lfc - eps, mx.lfc + eps), oob=squish) +
224
    guides(fill=guide_legend(title="FDR < 0.1")) +
225
    geom_text_repel(aes(label=tcell.quad.resplot.labels),
226
                    force=10,
227
                    fill='white') +
228
    labs(x="log fold change", y=expression(paste("-log"[10], " FDR"))) +
229
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_edgeR_volcano_caseOnly-quadratic.pdf",
230
           height=2.95, width=4.15, useDingbats=FALSE) +
231
    NULL
232
```
233
234
Show a plot of just the DA clusters.
235
236
```{r, warning=FALSE, message=FALSE, fig.height=4.15, fig.width=9.95}
237
da.clusters <- setdiff(unique(c(tcell.resplot.labels, tcell.quad.resplot.labels)), "CD4.Th17")
238
239
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525")
240
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")
241
242
# censor outliers to the 95th quantile for each cell type.
243
plotting.df <- cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
244
                           !cell.freq.merge$Status_on_day_collection_summary %in% c("LPS") &
245
                           cell.freq.merge$CellType %in% da.clusters, ]
246
247
q95.ct <- sapply(unique(plotting.df$CellType),
248
                 FUN=function(x) quantile(plotting.df[plotting.df$CellType %in% x, ]$Freq, c(0.05, 0.95), na.rm=TRUE)[2])
249
q95.df <- data.frame("CellType"=gsub(names(q95.ct), pattern="\\.95%", replacement=""), "Q95"=q95.ct)
250
251
plotting.df <- merge(plotting.df, q95.df, by='CellType')
252
plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Freq <- plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Q95
253
254
plotting.df$Status_on_day_collection_summary <- ordered(plotting.df$Status_on_day_collection_summary,
255
                                         levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
256
257
ggplot(plotting.df[!plotting.df$Status_on_day_collection_summary %in% c("Healthy"), ],
258
       aes(x=Status_on_day_collection_summary, y=Freq, fill=Status_on_day_collection_summary)) +
259
    geom_boxplot(outlier.size=0.5, coef=1.5) +
260
    theme_cowplot() +
261
    facet_wrap(~CellType, scales="free_y", nrow=3) +
262
    scale_fill_manual(values=group.cols) +
263
    expand_limits(y=c(0)) +
264
    theme(aspect=1/1.3,
265
          axis.text.x=element_blank(),
266
          axis.ticks.x=element_blank(),
267
          legend.key.size=unit(0.4, "cm"),
268
          strip.background=element_rect(fill='white', colour='white'),
269
          strip.text=element_text(size=12)) +
270
    labs(x="Severity", y="Proportion") +
271
    guides(fill=guide_legend(title="Severity")) +
272
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-DA_CasesOnly-boxplot.pdf",
273
           height=4.15, width=9.95, useDingbats=FALSE) +
274
    NULL
275
```
276
277
Also show the equivalent plot of healthy vs. infected.
278
279
280
```{r, warning=FALSE, message=FALSE, fig.height=4.15, fig.width=9.95}
281
infect.da.clusters <- setdiff(unique(c(infect.resplot.labels)), "CD4.Th17")
282
283
group.cols <- c("#2ca02c", "#252525")
284
names(group.cols) <- c("Healthy", "COVID-19")
285
286
# censor outliers to the 95th quantile for each cell type.
287
plotting.df <- cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
288
                           !cell.freq.merge$Status_on_day_collection_summary %in% c("LPS") &
289
                           cell.freq.merge$CellType %in% infect.da.clusters, ]
290
291
q95.ct <- sapply(unique(plotting.df$CellType),
292
                 FUN=function(x) quantile(plotting.df[plotting.df$CellType %in% x, ]$Freq, c(0.05, 0.95), na.rm=TRUE)[2])
293
q95.df <- data.frame("CellType"=gsub(names(q95.ct), pattern="\\.95%", replacement=""), "Q95"=q95.ct)
294
295
plotting.df <- merge(plotting.df, q95.df, by='CellType')
296
plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Freq <- plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Q95
297
298
plotting.df$Status <- factor(plotting.df$Status,
299
                             levels=c("Healthy", "Covid"),
300
                             labels=c("Healthy", "COVID-19"))
301
302
ggplot(plotting.df,
303
       aes(x=Status, y=Freq, fill=Status)) +
304
    geom_boxplot(outlier.size=0.5, coef=1.5) +
305
    theme_cowplot() +
306
    facet_wrap(~CellType, scales="free_y", nrow=3) +
307
    scale_fill_manual(values=group.cols) +
308
    expand_limits(y=c(0)) +
309
    theme(aspect=1/1.3,
310
          axis.text.x=element_blank(),
311
          axis.ticks.x=element_blank(),
312
          legend.key.size=unit(0.4, "cm"),
313
          strip.background=element_rect(fill='white', colour='white'),
314
          strip.text=element_text(size=12)) +
315
    labs(x="", y="Proportion") +
316
    guides(fill=guide_legend(title="Status")) +
317
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-DA_CaseVsControls-boxplot.pdf",
318
           height=4.15, width=9.95, useDingbats=FALSE) +
319
    NULL
320
```
321
322
323
## Cluster-based DA analysis: days since symptom onset
324
325
```{r}
326
# set up testing model
327
rownames(covid.meta) <- covid.meta$sample_id
328
tcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary %in% c("Non_covid", "LPS", "Healthy"), ]
329
tcell.meta$OrderedSeverity <- ordered(tcell.meta$Status_on_day_collection_summary,
330
                                      levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
331
tcell.meta <- tcell.meta[tcell.meta$Status_on_day_collection_summary %in% c("Mild", "Moderate", "Severe", "Critical"), ]
332
333
tcell.meta$Days_from_onset[tcell.meta$Days_from_onset %in% c("Not_known")] <- NA
334
tcell.meta$Days_from_onset <- as.numeric(tcell.meta$Days_from_onset)
335
tcell.meta <- tcell.meta[!is.na(tcell.meta$Days_from_onset), ]
336
337
338
tcell.model <- model.matrix(~ Sex + Age + Days_from_onset, 
339
                            data=tcell.meta[tcell.meta$Collection_Day %in% c("D0"), ])
340
341
# count cells
342
cell.freq.tab <- t(table(tcell.df.merge$sample_id[tcell.df.merge$Collection_Day %in% c("D0") &
343
                                                      !is.na(tcell.df.merge$Days_from_onset) &
344
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid", "Healthy", "Asymptomatic")],
345
                         tcell.df.merge$Sub.Annotation[tcell.df.merge$Collection_Day %in% c("D0") &
346
                                                           !is.na(tcell.df.merge$Days_from_onset) &
347
                                                        !tcell.df.merge$Status_on_day_collection_summary %in% c("LPS", "Non_covid", "Healthy", "Asymptomatic")]))
348
cell.freq.tab <- cell.freq.tab[!rownames(cell.freq.tab) %in% c("Doublets:Bcell", "ILC1_3", "ILC2", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ]
349
test.samps <- intersect(colnames(cell.freq.tab), names(n.cell.vecc))
350
cell.freq.tab <- cell.freq.tab[, test.samps]
351
tcell.model <- tcell.model[colnames(cell.freq.tab), ]
352
353
tcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[test.samps]))
354
355
#estimate dispersions
356
tcell.dge <- estimateDisp(tcell.dge, design=tcell.model)
357
tcell.linear.fit <- glmQLFit(tcell.dge, tcell.model, robust=TRUE)
358
tcell.res <- as.data.frame(topTags(glmQLFTest(tcell.linear.fit, coef=4), sort.by='none', n=Inf))
359
tcell.res$CellType <- rownames(tcell.res)
360
361
tcell.res$Sig <- as.numeric(tcell.res$FDR < 0.1)
362
tcell.res$Diff <- sign(tcell.res$logFC)
363
tcell.res$Diff[tcell.res$FDR > 0.1] <- 0
364
table(tcell.res$Diff)
365
```
366
367
368
369
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=4.15}
370
mx.lfc <- max(abs(tcell.res$logFC))
371
eps <- mx.lfc * 0.05
372
373
tcell.resplot.labels <- tcell.res$CellType
374
tcell.resplot.labels[tcell.res$Sig == 0] <- ""
375
376
ggplot(tcell.res, aes(x=logFC, y=-log10(FDR), fill=as.character(Sig))) +
377
    geom_point(shape=21, size=4) +
378
    theme_cowplot() +
379
    scale_fill_manual(values=c("black", "red")) +
380
    scale_x_continuous(limits=c(-mx.lfc - eps, mx.lfc + eps), oob=squish) +
381
    guides(fill=guide_legend(title="FDR < 0.1")) +
382
    geom_text_repel(aes(label=tcell.resplot.labels),
383
                     fill='white') +
384
    labs(x="log fold change", y=expression(paste("-log"[10], " FDR"))) +
385
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_edgeR_volcano_daysOnset-linear.pdf",
386
           height=2.95, width=4.15, useDingbats=FALSE) +
387
    NULL
388
```
389
390
391
Show a plot of just the DA clusters.
392
393
```{r, warning=FALSE, message=FALSE, fig.height=4.15, fig.width=9.95}
394
da.clusters <- setdiff(unique(c(tcell.resplot.labels, tcell.quad.resplot.labels)), "CD4.Th17")
395
396
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525")
397
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")
398
399
# censor outliers to the 95th quantile for each cell type.
400
plotting.df <- cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") &
401
                                   !cell.freq.merge$Status_on_day_collection_summary %in% c("LPS") &
402
                                   !is.na(cell.freq.merge$Days_from_onset) &
403
                                   cell.freq.merge$CellType %in% da.clusters, ]
404
405
q95.ct <- sapply(unique(plotting.df$CellType),
406
                 FUN=function(x) quantile(plotting.df[plotting.df$CellType %in% x, ]$Freq, c(0.05, 0.95), na.rm=TRUE)[2])
407
q95.df <- data.frame("CellType"=gsub(names(q95.ct), pattern="\\.95%", replacement=""), "Q95"=q95.ct)
408
409
plotting.df <- merge(plotting.df, q95.df, by='CellType')
410
plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Freq <- plotting.df[plotting.df$Freq >= plotting.df$Q95, ]$Q95
411
412
plotting.df$Status_on_day_collection_summary <- ordered(plotting.df$Status_on_day_collection_summary,
413
                                         levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
414
415
ggplot(plotting.df[!plotting.df$Status_on_day_collection_summary %in% c("Healthy", "Asymptomatic"), ],
416
       aes(x=Days_from_onset, y=Freq, colour=Status_on_day_collection_summary)) +
417
    #geom_boxplot(outlier.size=0.5, coef=1.5) +
418
    geom_point(size=1) +
419
    #stat_smooth(method="lm") +
420
    theme_cowplot() +
421
    facet_wrap(~CellType, scales="free_y", nrow=3) +
422
    scale_colour_manual(values=group.cols) +
423
    expand_limits(y=c(0)) +
424
    theme(aspect=1/1.3,
425
          axis.text.x=element_blank(),
426
          axis.ticks.x=element_blank(),
427
          legend.key.size=unit(0.4, "cm"),
428
          strip.background=element_rect(fill='white', colour='white'),
429
          strip.text=element_text(size=12)) +
430
    labs(x="Symptom duration", y="Proportion") +
431
    guides(fill=guide_legend(title="Symptom duration")) +
432
    ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-DA_daysOnset-points.pdf",
433
           height=4.15, width=9.95, useDingbats=FALSE) +
434
    NULL
435
```
436
437
438