|
a |
|
b/TcellAnalysis/notebooks/COVID_Tcell_annotations-Update.Rmd |
|
|
1 |
--- |
|
|
2 |
title: "COVID19: T cell annotation - updated" |
|
|
3 |
output: html_notebook |
|
|
4 |
--- |
|
|
5 |
|
|
|
6 |
Annotating T cells subtypes. |
|
|
7 |
|
|
|
8 |
```{r, warning=FALSE, message=FALSE} |
|
|
9 |
library(ggplot2) |
|
|
10 |
library(scran) |
|
|
11 |
library(ggsci) |
|
|
12 |
library(cowplot) |
|
|
13 |
library(scattermore) |
|
|
14 |
library(ComplexHeatmap) |
|
|
15 |
library(RColorBrewer) |
|
|
16 |
library(pheatmap) |
|
|
17 |
library(scater) |
|
|
18 |
library(reshape2) |
|
|
19 |
library(colorspace) |
|
|
20 |
library(dplyr) |
|
|
21 |
library(ggthemes) |
|
|
22 |
library(edgeR) |
|
|
23 |
library(scales) |
|
|
24 |
library(ggrepel) |
|
|
25 |
library(viridis) |
|
|
26 |
library(MASS) |
|
|
27 |
``` |
|
|
28 |
|
|
|
29 |
I've taken all of the full clusters and selected the ones that I think contain T cells. |
|
|
30 |
|
|
|
31 |
```{r, warning=FALSE, message=FALSE} |
|
|
32 |
# tcell.umap <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle_Tcell_UMAPs.tsv", |
|
|
33 |
# sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
34 |
# fix the Sanger barcodes |
|
|
35 |
# tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")] <- gsub(tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")], |
|
|
36 |
# pattern="^(CV[0-9]+_[A-Z0-9]+-CV[0-9]+_[A-Z0-9]+)_", replacement="") |
|
|
37 |
|
|
|
38 |
tcell.clusters <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv", |
|
|
39 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
40 |
|
|
|
41 |
# tcell.meta <- merge(tcell.umap, tcell.clusters, by='CellID') |
|
|
42 |
``` |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
```{r, warning=FALSE, message=FALSE} |
|
|
46 |
all.meta <- read.table("~/Dropbox/COVID19/Data/Updated/COVID19_scMeta-data.tsv", |
|
|
47 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
48 |
rownames(all.meta) <- all.meta$CellID |
|
|
49 |
|
|
|
50 |
# remove BGCV01_CV0209 and CV0198 |
|
|
51 |
all.meta <- all.meta[!all.meta$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
52 |
all.meta <- all.meta[!all.meta$patient_id %in% c("CV0198"), ] |
|
|
53 |
``` |
|
|
54 |
|
|
|
55 |
To normalise the cell type proportions, I need to know how many cells were captured for each patient-sample combo. |
|
|
56 |
|
|
|
57 |
```{r, warning=FALSE, message=FALSE} |
|
|
58 |
n.cell.vecc <- table(all.meta$sample_id) |
|
|
59 |
``` |
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
```{r, warning=FALSE, message=FALSE} |
|
|
64 |
# tcell.merge <- merge(tcell.clusters, tcell.umap, by='CellID') |
|
|
65 |
tcell.merge <- tcell.clusters |
|
|
66 |
# tcell.merge <- merge(tcell.clusters, all.meta, by='CellID') |
|
|
67 |
# tcell.merge <- merge(tcell.merge, tcell.umap, by='CellID') |
|
|
68 |
``` |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
```{r} |
|
|
72 |
gene.clust <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells-cluster_gene-Ave.tsv", |
|
|
73 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
74 |
|
|
|
75 |
protein.clust <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells-cluster_protein-Ave.tsv", |
|
|
76 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
77 |
``` |
|
|
78 |
|
|
|
79 |
|
|
|
80 |
```{r, warning=FALSE, message=FALSE} |
|
|
81 |
plot.proteins <- unique(c("CD19", "CD86", "CD11b", "CD56", "CD8", "CD4", "CD3", "CD16", "CD14", "CD335", "CD158", "SIGLEC7", |
|
|
82 |
"FCER2", "TCR", "CTLA4", "CXCR5", "ICOSL", "CD40LG", |
|
|
83 |
"CX3CR1", "CD79b", "CD20", "CD1c", "CD25", "CD45RO", "HLA-DR", "CD21", "CD5", "CD64", "CD127", |
|
|
84 |
"CD34", "CD40", "CD123", "CD27", "CD1c", "TCR_Va7.2", "TCR_Vg2", "CD235ab", "CD10", "CD2", "CD71", |
|
|
85 |
"TCR_Vg9", "TCR_Va24-Ja18", "CCR7", "CD45RA", "CD62L", "CD138", "CD38", "CD274", "CD279", "TIGIT")) |
|
|
86 |
|
|
|
87 |
plot.genes <- unique(c("CD3G", "CD3E", "FCGR3A", "CD8A", "CD8B", "CD4", "CD80", "CD86", "CXCL10", "HLA.DRA", "HLA.DRB1", "VEGFA", |
|
|
88 |
"CCR7", "CCR2", "CCR5", "CXCR3", "CD36", "FOXP3", "MS4A1", "LYZ", "GZMB", "CD68", "GATA3", "PECAM1", "RORC", |
|
|
89 |
"FCGR2A", "FCGR2B", "FCGR3B", "TPO", "HBB", "NCR1", "NCR2", "F8", "HIF1A", "CTLA4", "CD28", "CXCR1", |
|
|
90 |
"PDCD1", "TIM3", "LAG3", "TYMS", "PITX1", "TIGIT", "CXCL13", "BCL6", "TRAV1.2", |
|
|
91 |
#"IGHD", "IGHM", "IGHA1", "IGHE", "IGHG1", "IGHG2", "IGHG3", "IGHG4", |
|
|
92 |
"ITGA2B", "ITGAX", "TOX", "FYN", "NCAM1", "LRRC32", "SELL", "CXCR5", "CSF1", "IL7R", "CD40LG", "MYC", "MKI67", |
|
|
93 |
"IFNG", "IL6", "THRB", "CD19", "CD38", "CD27", "F2", "F5", "THRB", "CD44", "IKZF2", "IKZF4")) |
|
|
94 |
``` |
|
|
95 |
|
|
|
96 |
|
|
|
97 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
98 |
gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1, |
|
|
99 |
FUN=function(XP) XP/max(XP))) |
|
|
100 |
gene.plot.hm[is.na(gene.plot.hm)] <- 0 |
|
|
101 |
colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
102 |
|
|
|
103 |
pdf("~/Dropbox/COVID19/Updated_plots/Gene_heatmap-Tcells.pdf", |
|
|
104 |
height=10.95, width=9.95, useDingbats=FALSE) |
|
|
105 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
106 |
dev.off() |
|
|
107 |
|
|
|
108 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
109 |
``` |
|
|
110 |
|
|
|
111 |
I will take these clusters and compare them to the profiles of the original annotations. |
|
|
112 |
|
|
|
113 |
|
|
|
114 |
|
|
|
115 |
```{r, warning=FALSE, message=FALSE} |
|
|
116 |
table(tcell.merge$Centre, tcell.merge$Sub.Cluster) |
|
|
117 |
``` |
|
|
118 |
|
|
|
119 |
```{r, warning=FALSE, message=FALSE} |
|
|
120 |
table(tcell.merge$initial_clustering, tcell.merge$Sub.Cluster) |
|
|
121 |
``` |
|
|
122 |
|
|
|
123 |
|
|
|
124 |
|
|
|
125 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
126 |
rownames(protein.clust) <- protein.clust$Protein |
|
|
127 |
protein.plot.hm <- protein.clust[intersect(protein.clust$Protein, plot.proteins), |
|
|
128 |
setdiff(colnames(protein.clust), "Protein")] |
|
|
129 |
#protein.plot.hm[protein.plot.hm < 0] <- 0 |
|
|
130 |
|
|
|
131 |
# gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1, |
|
|
132 |
# FUN=function(XP) XP/max(XP))) |
|
|
133 |
|
|
|
134 |
# protein.plot.hm <- t(apply(t(protein.plot.hm), 1, FUN=function(XP) (XP - mean(XP))/sd(XP))) |
|
|
135 |
|
|
|
136 |
protein.plot.hm[protein.plot.hm <= 0] <- 0 |
|
|
137 |
protein.plot.hm <- t(apply(protein.plot.hm, 1, FUN=function(XP) (XP - mean(XP))/sd(XP))) |
|
|
138 |
protein.plot.hm[is.na(protein.plot.hm)] <- 0 |
|
|
139 |
|
|
|
140 |
rownames(protein.plot.hm) <- intersect(protein.clust$Protein, plot.proteins) |
|
|
141 |
|
|
|
142 |
# pdf("~/Dropbox/COVID19/Updated_plots/protein_heatmap-celltypes.pdf", |
|
|
143 |
# height=10.95, width=9.95, useDingbats=FALSE) |
|
|
144 |
# Heatmap(protein.plot.hm, name="Z-score") |
|
|
145 |
# dev.off() |
|
|
146 |
|
|
|
147 |
Heatmap(protein.plot.hm, name="Z-score") |
|
|
148 |
``` |
|
|
149 |
|
|
|
150 |
Compare the previous annotations. |
|
|
151 |
|
|
|
152 |
```{r, warning=FALSE, message=FALSE} |
|
|
153 |
old.annots <- read.csv("~/Dropbox/COVID19/Data/Annotations/annotation_200112.csv", |
|
|
154 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
155 |
|
|
|
156 |
old.clusters <- read.table("~/Dropbox/COVID19/Data/Annotations/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv", |
|
|
157 |
sep="\t", header=TRUE, stringsAsFactors = TRUE) |
|
|
158 |
colnames(old.clusters) <- c("CellID", "old.Sub.Cluster", "old.Louvain.Cluster") |
|
|
159 |
|
|
|
160 |
tcell.annot.merge <- merge(tcell.merge, old.annots, by.x='CellID', by.y='X') |
|
|
161 |
tcell.annot.merge <- merge(tcell.annot.merge, old.clusters, by='CellID') |
|
|
162 |
``` |
|
|
163 |
|
|
|
164 |
```{r, warning=FALSE, message=FALSE} |
|
|
165 |
table(tcell.annot.merge$full_clustering, tcell.annot.merge$Sub.Cluster) |
|
|
166 |
``` |
|
|
167 |
|
|
|
168 |
|
|
|
169 |
```{r, warning=FALSE, message=FALSE} |
|
|
170 |
table(tcell.annot.merge$old.Sub.Cluster, tcell.annot.merge$Sub.Cluster) |
|
|
171 |
``` |
|
|
172 |
|
|
|
173 |
```{r, fig.height=4.95, fig.width=5.15} |
|
|
174 |
tcell.clusts <- c("CD4.Prolif", "CD8.Prolif", "NK_prolif", "ILC1_3", "ILC2", "CD4.Naive", "CD8.Naive", |
|
|
175 |
"CD4.Th17", "CD4.EM", "CD4.IL22", "CD4.Tfh", "CD4.CM", "CD4.Th1", "CD4.Th2", "Treg", |
|
|
176 |
"gdT", "MAIT", "NKT", "CD8.EM", "CD8.TE", "NK_56hi", "NK_16hi") |
|
|
177 |
comp.tab <- table(tcell.annot.merge$Sub.Cluster, tcell.annot.merge$full_clustering) |
|
|
178 |
comp.tab <- apply(comp.tab[, tcell.clusts], 1, FUN=function(X) X/sum(X)) |
|
|
179 |
|
|
|
180 |
pheatmap(comp.tab, cluster_rows = FALSE) |
|
|
181 |
``` |
|
|
182 |
|
|
|
183 |
This shows that the clustering only really identifies the major subsets. |
|
|
184 |
|
|
|
185 |
```{r} |
|
|
186 |
tcell.merge$Annotation <- "Unknown" |
|
|
187 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "PoorQC" |
|
|
188 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(15)] <- "NK" |
|
|
189 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "Doublets" |
|
|
190 |
|
|
|
191 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "ILCs" |
|
|
192 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(9)] <- "gdT" |
|
|
193 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(8, 10, 19)] <- "CD8.TE" |
|
|
194 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(17)] <- "CD8.EM" |
|
|
195 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(1, 29)] <- "Tfh-like" |
|
|
196 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(2)] <- "Treg" |
|
|
197 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(21)] <- "MAIT" |
|
|
198 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(4, 5, 16, 18, 20, 22, 26)] <- "CD4.Naive" |
|
|
199 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(6, 24)] <- "CD8.Naive" |
|
|
200 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(3, 7, 12, 13, 14, 23, 27, 28)] <- "CD4.CM" |
|
|
201 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(11)] <- "CD4.IL22" |
|
|
202 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c()] <- "Exhausted" |
|
|
203 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(25)] <- "Proliferating" |
|
|
204 |
|
|
|
205 |
table(tcell.merge$Annotation, tcell.merge$Sub.Cluster) |
|
|
206 |
``` |
|
|
207 |
|
|
|
208 |
|
|
|
209 |
```{r, warning=FALSE, message=FALSE} |
|
|
210 |
table(tcell.merge$Annotation, tcell.merge$Site) |
|
|
211 |
``` |
|
|
212 |
|
|
|
213 |
Plot the markers genes for these groups: CD4, CD8, TCR V$\alpha$7.2, TCR V$\gamma$9, TCR V$\gamma$2, CD45RA, CCR7, CD45RO, CD27, CD28, |
|
|
214 |
CTLA4, HLA-DR, FOXP3, CXCR5, GZMB, IFNG, TOX, PDCD1. |
|
|
215 |
|
|
|
216 |
I think these annotations look OK. |
|
|
217 |
|
|
|
218 |
```{r, warning=FALSE, message=FALSE} |
|
|
219 |
# read in the gene expression |
|
|
220 |
goi.df <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells_GeneGOI.tsv", |
|
|
221 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
222 |
|
|
|
223 |
tcell.goi.merge <- merge(tcell.merge, goi.df, by='CellID') |
|
|
224 |
``` |
|
|
225 |
|
|
|
226 |
|
|
|
227 |
```{r, warning=FALSE, message=FALSE} |
|
|
228 |
# re-classify CD8.TE with high NCAM1 * FCGR3A expression |
|
|
229 |
tcell.goi.merge$Annotation[tcell.goi.merge$NCAM1 > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE", "NK") & |
|
|
230 |
tcell.goi.merge$TRAV1.2 > 0] <- "NKT" |
|
|
231 |
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE", "NK") & |
|
|
232 |
tcell.goi.merge$TRAV1.2 > 0] <- "NKT" |
|
|
233 |
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") & |
|
|
234 |
tcell.goi.merge$NCAM1 > 0] <- "NKT" |
|
|
235 |
``` |
|
|
236 |
|
|
|
237 |
Remove doublets based on expression of markers for different cell lineages. |
|
|
238 |
|
|
|
239 |
```{r} |
|
|
240 |
# re-classify CD8.TE with high NCR1 * FCGR3A expression |
|
|
241 |
bcell.markers <- tcell.goi.merge$IGHM > 0 | tcell.goi.merge$IGHG3 > 0 | tcell.goi.merge$IGHD > 0 | tcell.goi.merge$IGHG4 > 0 | |
|
|
242 |
tcell.goi.merge$IGHG2 > 0 | tcell.goi.merge$IGHG1 > 0 | tcell.goi.merge$IGHA1 > 0 |
|
|
243 |
|
|
|
244 |
tcell.goi.merge$Annotation[bcell.markers] <- "Doublets" |
|
|
245 |
|
|
|
246 |
table(tcell.goi.merge$Annotation, tcell.goi.merge$Site) |
|
|
247 |
``` |
|
|
248 |
|
|
|
249 |
|
|
|
250 |
```{r, fig.height=9.75, fig.width=10.95} |
|
|
251 |
n.cells <- length(unique(tcell.goi.merge$Sub.Cluster)) |
|
|
252 |
cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells) |
|
|
253 |
names(cell.cols) <- unique(tcell.goi.merge$Sub.Cluster) |
|
|
254 |
|
|
|
255 |
tcell.goi.merge$Sub.Cluster <- factor(tcell.goi.merge$Sub.Cluster, |
|
|
256 |
levels=c(1:29)) |
|
|
257 |
|
|
|
258 |
ggplot(tcell.goi.merge, |
|
|
259 |
aes(x=UMAP1, y=UMAP2)) + |
|
|
260 |
geom_scattermore(data=tcell.goi.merge[, c("UMAP1", "UMAP2")], |
|
|
261 |
colour='grey80', alpha=0.5, size=0.1) + |
|
|
262 |
geom_scattermore(aes(colour=Sub.Cluster)) + |
|
|
263 |
theme_cowplot() + |
|
|
264 |
facet_wrap(~Annotation) + |
|
|
265 |
scale_colour_manual(values=cell.cols) + |
|
|
266 |
guides(colour=guide_legend(override.aes = list(size=3))) |
|
|
267 |
``` |
|
|
268 |
|
|
|
269 |
|
|
|
270 |
|
|
|
271 |
```{r, warning=FALSE, message=FALSE} |
|
|
272 |
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.goi.merge) |
|
|
273 |
rownames(clust.model.matrix) <- tcell.goi.merge$CellID |
|
|
274 |
|
|
|
275 |
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))]) |
|
|
276 |
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
277 |
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Annotation", replacement="") |
|
|
278 |
|
|
|
279 |
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0) |
|
|
280 |
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
281 |
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0 |
|
|
282 |
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Annotation", replacement="") |
|
|
283 |
``` |
|
|
284 |
|
|
|
285 |
|
|
|
286 |
```{r, message=FALSE, warning=FALSE} |
|
|
287 |
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP))) |
|
|
288 |
gene.ave.label.df$Annotation <- rownames(gene.ave.label.df) |
|
|
289 |
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Annotation') |
|
|
290 |
colnames(gene.ave.label.melt) <- c("Annotation", "Gene", "AveExprs") |
|
|
291 |
|
|
|
292 |
gene.bin.label.df <- gene.bin.by.label |
|
|
293 |
gene.bin.label.df$Annotation <- rownames(gene.bin.label.df) |
|
|
294 |
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Annotation') |
|
|
295 |
colnames(gene.bin.label.melt) <- c("Annotation", "Gene", "PropExprs") |
|
|
296 |
|
|
|
297 |
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Annotation', 'Gene')) |
|
|
298 |
dot.plot.df$Gene <- as.character(dot.plot.df$Gene) |
|
|
299 |
``` |
|
|
300 |
|
|
|
301 |
|
|
|
302 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25} |
|
|
303 |
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", |
|
|
304 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "NCAM1", |
|
|
305 |
"CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7") |
|
|
306 |
|
|
|
307 |
ggplot(dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ], |
|
|
308 |
aes(x=Gene, y=Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
309 |
geom_point() + |
|
|
310 |
theme_cowplot() + |
|
|
311 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
312 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'), |
|
|
313 |
axis.text.y=element_text(colour='black'), |
|
|
314 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
315 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
316 |
guides(colour=guide_colorbar(title='Mean Expression'), |
|
|
317 |
size=guide_legend(title='% Express')) + |
|
|
318 |
# ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_dotplot-genes.pdf", |
|
|
319 |
# height=5.15, width=8.25, useDingbats=FALSE) + |
|
|
320 |
NULL |
|
|
321 |
``` |
|
|
322 |
|
|
|
323 |
Do the same for the proteins. |
|
|
324 |
|
|
|
325 |
```{r, warning=FALSE, message=FALSE} |
|
|
326 |
# read in the gene expression |
|
|
327 |
prot.df <- read.table("~/Dropbox/COVID19/Data/Updated/Cambridge_Sanger_Newcastle-Tcells_ProteinGOI.tsv", |
|
|
328 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
329 |
colnames(prot.df) <- c(paste0("Prot.", colnames(prot.df)[!colnames(prot.df) %in% c("CellID")]), "CellID") |
|
|
330 |
|
|
|
331 |
tcell.prot.merge <- merge(tcell.goi.merge, prot.df, by=c('CellID')) |
|
|
332 |
``` |
|
|
333 |
|
|
|
334 |
|
|
|
335 |
```{r, warning=FALSE, message=FALSE} |
|
|
336 |
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.prot.merge) |
|
|
337 |
rownames(clust.model.matrix) <- tcell.prot.merge$CellID |
|
|
338 |
|
|
|
339 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
340 |
pattern="\\.x", replacement="")]) |
|
|
341 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
342 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Annotation", replacement="") |
|
|
343 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
344 |
|
|
|
345 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
346 |
pattern="\\.x", replacement="")] > 0) |
|
|
347 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
348 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
349 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Annotation", replacement="") |
|
|
350 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
351 |
``` |
|
|
352 |
|
|
|
353 |
|
|
|
354 |
```{r, message=FALSE, warning=FALSE} |
|
|
355 |
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP)))) |
|
|
356 |
ave.label.df$Annotation <- rownames(ave.label.df) |
|
|
357 |
ave.label.melt <- melt(ave.label.df, id.vars='Annotation') |
|
|
358 |
colnames(ave.label.melt) <- c("Annotation", "Protein", "AveExprs") |
|
|
359 |
|
|
|
360 |
bin.label.df <- bin.by.label |
|
|
361 |
bin.label.df$Annotation <- rownames(bin.label.df) |
|
|
362 |
bin.label.melt <- melt(bin.label.df, id.vars='Annotation') |
|
|
363 |
colnames(bin.label.melt) <- c("Annotation", "Protein", "PropExprs") |
|
|
364 |
|
|
|
365 |
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Annotation', 'Protein')) |
|
|
366 |
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein) |
|
|
367 |
``` |
|
|
368 |
|
|
|
369 |
|
|
|
370 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25} |
|
|
371 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
372 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "HAVCR2", |
|
|
373 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
374 |
|
|
|
375 |
ggplot(protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ], |
|
|
376 |
aes(x=Protein, y=Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
377 |
geom_point() + |
|
|
378 |
theme_cowplot() + |
|
|
379 |
scale_colour_gradient(low='grey70', high=darken('red')) + |
|
|
380 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'), |
|
|
381 |
axis.text.y=element_text(colour='black'), |
|
|
382 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
383 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
384 |
guides(colour=guide_colorbar(title='Mean Expression'), |
|
|
385 |
size=guide_legend(title='% Express')) + |
|
|
386 |
# ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_dotplot-protein.pdf", |
|
|
387 |
# height=5.15, width=8.25, useDingbats=FALSE) + |
|
|
388 |
NULL |
|
|
389 |
``` |
|
|
390 |
|
|
|
391 |
|
|
|
392 |
Write out the exhausted, proliferating and Tfh-like clusters for sub-clustering. |
|
|
393 |
|
|
|
394 |
```{r} |
|
|
395 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Proliferating")], |
|
|
396 |
file="~/Dropbox/COVID19/Data/Updated/Proliferating_barcodes.tsv", |
|
|
397 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
398 |
|
|
|
399 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Tfh-like")], |
|
|
400 |
file="~/Dropbox/COVID19/Data/Updated/Tfh_barcodes.tsv", |
|
|
401 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
402 |
|
|
|
403 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Exhausted")], |
|
|
404 |
file="~/Dropbox/COVID19/Data/Updated/Exhausted_barcodes.tsv", |
|
|
405 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
406 |
|
|
|
407 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Doublets")], |
|
|
408 |
file="~/Dropbox/COVID19/Data/Updated/Doublet_barcodes.tsv", |
|
|
409 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
410 |
``` |
|
|
411 |
|
|
|
412 |
Fill in the sub-annotations. |
|
|
413 |
|
|
|
414 |
```{r, warning=FALSE, message=FALSE} |
|
|
415 |
rownames(tcell.prot.merge) <- tcell.prot.merge$CellID |
|
|
416 |
tcell.prot.merge$Sub.Annotation <- tcell.prot.merge$Annotation |
|
|
417 |
|
|
|
418 |
# first, enforce the original annotations on all previously annotated cells |
|
|
419 |
old.annots <- read.csv("~/Dropbox/COVID19/Data/Annotations/annotation_200112.csv", |
|
|
420 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
421 |
rownames(old.annots) <- old.annots$X |
|
|
422 |
annot.merge <- merge(tcell.prot.merge, old.annots, by.x='CellID', by.y='X', all.x=TRUE) |
|
|
423 |
annot.merge$Sub.Annotation[!is.na(annot.merge$full_clustering)] <- annot.merge$full_clustering[!is.na(annot.merge$full_clustering)] |
|
|
424 |
|
|
|
425 |
annot.merge <- annot.merge[!annot.merge$full_clustering %in% c("CD14_mono", "B_naive", "DC_prolif", "DC3", "HSC", "Mono_prolif", |
|
|
426 |
"NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast", |
|
|
427 |
"Platelets", "RBC"), ] |
|
|
428 |
|
|
|
429 |
# re-classify CD4s within CD8 clusters |
|
|
430 |
annot.merge$Sub.Annotation[(annot.merge$CD8A > 0 | annot.merge$CD8B > 0) & |
|
|
431 |
annot.merge$CD4 <= 0 & |
|
|
432 |
annot.merge$CXCR5 <= 0 & |
|
|
433 |
annot.merge$Sub.Annotation %in% c("Tfh-like")] <- "CD8.TE" |
|
|
434 |
|
|
|
435 |
# Tfh classification |
|
|
436 |
annot.merge$Sub.Annotation[(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) & |
|
|
437 |
annot.merge$CD4 > 0 & |
|
|
438 |
annot.merge$CXCR5 > 0 & |
|
|
439 |
annot.merge$Sub.Annotation %in% c("CD8.TE")] <- "CD4.Tfh" |
|
|
440 |
|
|
|
441 |
# fix the Treg annotations |
|
|
442 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.CM") & |
|
|
443 |
(annot.merge$CXCR5 > 0) & (annot.merge$CD40LG > 0)] <- "CD4.Tfh" |
|
|
444 |
|
|
|
445 |
# final doublet classification |
|
|
446 |
annot.merge$Sub.Annotation[annot.merge$IGHA1 > 0 | annot.merge$IGHD > 0 | annot.merge$IGHE > 0 | annot.merge$IGHG1 > 0 | |
|
|
447 |
annot.merge$IGHG2 > 0 | annot.merge$IGHG3 > 0 | annot.merge$IGHG4 > 0] <- "Doublets" |
|
|
448 |
|
|
|
449 |
# final NKT classification |
|
|
450 |
annot.merge$Sub.Annotation[annot.merge$FCGR3A > 0 & annot.merge$NCAM1 > 0 & |
|
|
451 |
annot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT" |
|
|
452 |
annot.merge$Sub.Annotation[annot.merge$FCGR3A > 0 & annot.merge$NCR1 > 0 & |
|
|
453 |
annot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT" |
|
|
454 |
|
|
|
455 |
# fix the Treg annotations |
|
|
456 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.CM", "CD4.Naive", "CD4.Tfh", "Treg") & |
|
|
457 |
(annot.merge$IL22 > 0)] <- "CD4.IL22" |
|
|
458 |
|
|
|
459 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg") & |
|
|
460 |
!(annot.merge$FOXP3 > 0 & annot.merge$IKZF2 > 0)] <- "CD4.IL22" |
|
|
461 |
|
|
|
462 |
# Add in a Th17 phenotype |
|
|
463 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.Tfh", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
464 |
(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) & |
|
|
465 |
annot.merge$RORC > 0 & |
|
|
466 |
(annot.merge$IL17A > 0 | |
|
|
467 |
annot.merge$IL17F > 0 | annot.merge$IL21 > 0)] <- "CD4.Th17" |
|
|
468 |
|
|
|
469 |
# Add in a Th1 phenotype |
|
|
470 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
471 |
(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) & |
|
|
472 |
(annot.merge$IL2 > 0 | annot.merge$TNF > 0 | |
|
|
473 |
annot.merge$LTA) & |
|
|
474 |
annot.merge$TBX21 > 0] <- "CD4.Th1" |
|
|
475 |
|
|
|
476 |
# Add in a Th2 phenotype |
|
|
477 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
478 |
(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0) & |
|
|
479 |
annot.merge$GATA3 > 0 & |
|
|
480 |
(annot.merge$IL4 > 0 | annot.merge$IL5 > 0)] <- "CD4.Th2" |
|
|
481 |
|
|
|
482 |
# merge the Tfh annotation |
|
|
483 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like", "Tfh-like")] <- "CD4.Tfh" |
|
|
484 |
|
|
|
485 |
# Add in the proliferating/activated/exhausted annotation |
|
|
486 |
active.tcells <- read.csv("~/Dropbox/COVID19/Data/prolif_CD4CD8.csv", |
|
|
487 |
header=FALSE, stringsAsFactors=FALSE) |
|
|
488 |
annot.merge$Sub.Annotation[annot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD4")]] <- "CD4.Prolif" |
|
|
489 |
annot.merge$Sub.Annotation[annot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD8")]] <- "CD8.Prolif" |
|
|
490 |
|
|
|
491 |
# CD4 prolif |
|
|
492 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Proliferating") & |
|
|
493 |
(annot.merge$CD8A <= 0 & annot.merge$CD8B <= 0)] <- "CD4.Prolif" |
|
|
494 |
|
|
|
495 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD4.Prolif") & |
|
|
496 |
!(annot.merge$TYMS > 0 | annot.merge$MKI67 > 0)] <- "CD4.CM" |
|
|
497 |
|
|
|
498 |
|
|
|
499 |
# CD8A |
|
|
500 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("Proliferating") & |
|
|
501 |
(annot.merge$CD8A > 0 | annot.merge$CD8B > 0)] <- "CD8.Prolif" |
|
|
502 |
|
|
|
503 |
annot.merge$Sub.Annotation[annot.merge$Sub.Annotation %in% c("CD8.Prolif") & |
|
|
504 |
!(annot.merge$TYMS > 0 | annot.merge$MKI67 > 0)] <- "CD8.TE" |
|
|
505 |
|
|
|
506 |
# table(annot.merge$Sub.Annotation, annot.merge$full_clustering) |
|
|
507 |
table(annot.merge$Sub.Annotation, annot.merge$Site) |
|
|
508 |
``` |
|
|
509 |
|
|
|
510 |
Compare to MNN-label transfers. |
|
|
511 |
|
|
|
512 |
```{r, warning=FALSE, message=FALSE} |
|
|
513 |
# only compare the MNN and new labels. |
|
|
514 |
mnn.labels <- read.table("~/Dropbox/COVID19/Data/Updated/Tcell_MNN_labels.tsv", |
|
|
515 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
516 |
mnn.compare <- merge(annot.merge, mnn.labels, by='CellID') |
|
|
517 |
|
|
|
518 |
table(mnn.compare$Sub.Annotation.x, mnn.compare$Sub.Annotation.y) |
|
|
519 |
``` |
|
|
520 |
|
|
|
521 |
|
|
|
522 |
|
|
|
523 |
```{r} |
|
|
524 |
# write out the annotations, without the doublets, NK cells and ILCs |
|
|
525 |
write.table(annot.merge[!annot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono", |
|
|
526 |
"DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif", |
|
|
527 |
"NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast", |
|
|
528 |
"Platelets", "RBC", |
|
|
529 |
"Doublets:Bcell", "Doublets:Platelet", "Doublets"), |
|
|
530 |
c("CellID", "Sub.Annotation")], |
|
|
531 |
file="~/Dropbox/COVID19/Data/Updated/Tcell_annotations_ext.tsv", |
|
|
532 |
sep="\t", quote=FALSE, row.names=FALSE) |
|
|
533 |
|
|
|
534 |
write.table(annot.merge[!annot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono", |
|
|
535 |
"DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif", |
|
|
536 |
"NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast", |
|
|
537 |
"Platelets", "RBC", |
|
|
538 |
"Doublets:Bcell", "Doublets:Platelet", "Doublets"), |
|
|
539 |
c("CellID")], |
|
|
540 |
file="~/Dropbox/COVID19/Data/Updated/Tcell_barcodes.tsv", |
|
|
541 |
sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) |
|
|
542 |
|
|
|
543 |
``` |
|
|
544 |
|
|
|
545 |
Plot the proportions of T cells, normalised properly. |
|
|
546 |
|
|
|
547 |
```{r} |
|
|
548 |
cell.freq.tab <- table(annot.merge$sample_id, |
|
|
549 |
annot.merge$Sub.Annotation) |
|
|
550 |
|
|
|
551 |
cell.freq.df <- as.data.frame(sapply(rownames(cell.freq.tab), FUN=function(X) cell.freq.tab[X, ]/n.cell.vecc[X])) |
|
|
552 |
cell.freq.df$CellType <- rownames(cell.freq.df) |
|
|
553 |
cell.freq.df <- melt(cell.freq.df, id.vars=c("CellType")) |
|
|
554 |
cell.freq.df$CellType <- as.character(cell.freq.df$CellType) |
|
|
555 |
colnames(cell.freq.df) <- c("CellType", "sample_id", "Freq") |
|
|
556 |
# cell.freq.df$Sex <- "Female" |
|
|
557 |
# cell.freq.df$Sex[cell.freq.df$sample_id %in% unique(annot.merge$sample_id[annot.merge$Sex %in% c("M", "Male")])] <- "Male" |
|
|
558 |
cell.freq.df$sample_id <- as.character(cell.freq.df$sample_id) |
|
|
559 |
|
|
|
560 |
covid.meta <- read.csv("~/Dropbox/COVID19/Data/basic_COVID19_PBMC_metadata_160212.csv", |
|
|
561 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
562 |
old.meta <- read.csv("~/Dropbox/COVID19/Data/Metadata FINAL 10122020.csv", |
|
|
563 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
564 |
old.meta$sample_id[old.meta$sample_id %in% c("BGCV13_CV0201")] <- "BGCV06_CV0201" |
|
|
565 |
old.meta$sample_id[old.meta$sample_id %in% c("BGCV06_CV0326")] <- "BGCV13_CV0326" |
|
|
566 |
|
|
|
567 |
covid.meta$AgeRange <- covid.meta$Age |
|
|
568 |
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_10hours")] <- "LPS" |
|
|
569 |
covid.meta$Status_on_day_collection_summary[covid.meta$Status_on_day_collection_summary %in% c("LPS_90mins")] <- "LPS" |
|
|
570 |
|
|
|
571 |
covid.meta <- merge(covid.meta[, !colnames(covid.meta) %in% c("Age")], old.meta[, c("sample_id", "patient_id", "Age")], |
|
|
572 |
by='sample_id') |
|
|
573 |
|
|
|
574 |
cell.freq.merge <- merge(cell.freq.df, covid.meta, by=c('sample_id')) |
|
|
575 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$Status_on_day_collection_summary %in% c("Non_covid"), ] |
|
|
576 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$patient_id %in% c("CV0198"), ] |
|
|
577 |
# remove doublets, etc |
|
|
578 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ] |
|
|
579 |
``` |
|
|
580 |
|
|
|
581 |
|
|
|
582 |
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=9.15} |
|
|
583 |
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525") |
|
|
584 |
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical") |
|
|
585 |
|
|
|
586 |
cell.freq.merge$Status_on_day_collection_summary <- factor(cell.freq.merge$Status_on_day_collection_summary, |
|
|
587 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
588 |
"Moderate", "Severe", "Critical", "LPS", "Non_covid")) |
|
|
589 |
``` |
|
|
590 |
|
|
|
591 |
|
|
|
592 |
```{r} |
|
|
593 |
write.table(cell.freq.merge, |
|
|
594 |
file="~/Dropbox/COVID19/Data/Updated/Tcell_proportions.tsv", |
|
|
595 |
sep="\t", quote=FALSE, row.names=FALSE) |
|
|
596 |
``` |
|
|
597 |
|
|
|
598 |
How do these compare to the previous frequencies? |
|
|
599 |
|
|
|
600 |
```{r, fig.height=9.95, fig.width=10.95} |
|
|
601 |
old.freqs <- read.table("~/Dropbox/COVID19/Data/Tcell_proportions.tsv", |
|
|
602 |
sep="\t", header=TRUE, stringsAsFactors = FALSE) |
|
|
603 |
old.freqs$sample_id[old.freqs$sample_id %in% c("BGCV13_CV0201")] <- "BGCV06_CV0201" |
|
|
604 |
old.freqs$sample_id[old.freqs$sample_id %in% c("BGCV06_CV0326")] <- "BGCV13_CV0326" |
|
|
605 |
|
|
|
606 |
old.freqs <- old.freqs[, c("sample_id", "CellType", "Freq")] |
|
|
607 |
colnames(old.freqs) <- c("sample_id", "CellType", "old.Freq") |
|
|
608 |
|
|
|
609 |
comp.freqs <- merge(cell.freq.merge, old.freqs, by=c('sample_id', 'CellType')) |
|
|
610 |
comp.freqs$Switch <- comp.freqs$sample_id %in% c("BGCV06_CV0201", "BGCV13_CV0326") |
|
|
611 |
|
|
|
612 |
ggplot(comp.freqs, aes(x=Freq, y=old.Freq, colour=Switch)) + |
|
|
613 |
geom_point() + |
|
|
614 |
theme_cowplot() + |
|
|
615 |
scale_colour_manual(values=c("black", "red")) + |
|
|
616 |
facet_wrap(~CellType, scales="free") + |
|
|
617 |
NULL |
|
|
618 |
``` |
|
|
619 |
|
|
|
620 |
|
|
|
621 |
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=10.15} |
|
|
622 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
623 |
!cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono", |
|
|
624 |
"DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif", |
|
|
625 |
"NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast", |
|
|
626 |
"Platelets", "RBC", |
|
|
627 |
"Doublets:Bcell", "Doublets:Platelet", "Doublets"), ], |
|
|
628 |
aes(x=Status_on_day_collection_summary, y=Freq, fill=Status_on_day_collection_summary)) + |
|
|
629 |
geom_boxplot() + |
|
|
630 |
theme_cowplot() + |
|
|
631 |
facet_wrap(~CellType, scales="free_y", ncol=4) + |
|
|
632 |
scale_fill_manual(values=group.cols) + |
|
|
633 |
expand_limits(y=c(0)) + |
|
|
634 |
theme(axis.text.x=element_blank(), |
|
|
635 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
636 |
strip.text=element_text(size=14)) + |
|
|
637 |
labs(x="Severity Group", y="Proportion") + |
|
|
638 |
guides(fill=guide_legend(title="Severity")) + |
|
|
639 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-boxplot.pdf", |
|
|
640 |
height=5.95, width=10.15, useDingbats=FALSE) + |
|
|
641 |
NULL |
|
|
642 |
``` |
|
|
643 |
|
|
|
644 |
For the figures these need to be a filled barchart (?!). Focus in on the Tfh-like cells. |
|
|
645 |
|
|
|
646 |
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55} |
|
|
647 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
648 |
cell.freq.merge$CellType %in% c("CD4.Tfh"), ], |
|
|
649 |
aes(x=Status_on_day_collection_summary, y=Freq, fill=Status_on_day_collection_summary)) + |
|
|
650 |
geom_boxplot() + |
|
|
651 |
theme_cowplot() + |
|
|
652 |
facet_wrap(~CellType, scales="free_y") + |
|
|
653 |
scale_fill_manual(values=group.cols) + |
|
|
654 |
expand_limits(y=c(0)) + |
|
|
655 |
theme(axis.text.x=element_blank(), |
|
|
656 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
657 |
strip.text=element_text(size=14)) + |
|
|
658 |
labs(x="Severity Group", y="Proportion") + |
|
|
659 |
guides(fill=guide_legend(title="Severity")) + |
|
|
660 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions_Tfh-boxplot.pdf", |
|
|
661 |
height=2.15, width=3.55, useDingbats=FALSE) + |
|
|
662 |
NULL |
|
|
663 |
``` |
|
|
664 |
|
|
|
665 |
Run a LM comparing categories. |
|
|
666 |
|
|
|
667 |
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55} |
|
|
668 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
669 |
cell.freq.merge$CellType %in% c("CD4.Tfh"), ], |
|
|
670 |
aes(x=Status_on_day_collection_summary, y=Freq, fill=Sex)) + |
|
|
671 |
geom_boxplot() + |
|
|
672 |
theme_cowplot() + |
|
|
673 |
facet_wrap(~CellType, scales="free_y") + |
|
|
674 |
scale_fill_colorblind() + |
|
|
675 |
expand_limits(y=c(0)) + |
|
|
676 |
theme(axis.text.x=element_blank(), |
|
|
677 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
678 |
strip.text=element_text(size=14)) + |
|
|
679 |
labs(x="Severity Group", y="Proportion") + |
|
|
680 |
guides(fill=guide_legend(title="Severity")) + |
|
|
681 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions_Tfh-byGender_boxplot.pdf", |
|
|
682 |
height=2.15, width=3.55, useDingbats=FALSE) + |
|
|
683 |
NULL |
|
|
684 |
``` |
|
|
685 |
|
|
|
686 |
Compare healthy vs. asymptomatic, and asymptomatic > critical. |
|
|
687 |
|
|
|
688 |
```{r, warning=FALSE, message=FALSE} |
|
|
689 |
healthy.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") & |
|
|
690 |
cell.freq.merge$Status_on_day_collection_summary %in% c("Healthy") & |
|
|
691 |
cell.freq.merge$Collection_Day %in% c("D0")] |
|
|
692 |
asymp.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") & |
|
|
693 |
cell.freq.merge$Status_on_day_collection_summary %in% c("Asymptomatic") & |
|
|
694 |
cell.freq.merge$Collection_Day %in% c("D0")] |
|
|
695 |
|
|
|
696 |
tfh.utest <- wilcox.test(x=healthy.tfh, y=asymp.tfh, alternative="two.sided") |
|
|
697 |
tfh.utest |
|
|
698 |
``` |
|
|
699 |
|
|
|
700 |
I should model the counts, normalized by the total numbers of captured cells. |
|
|
701 |
|
|
|
702 |
```{r} |
|
|
703 |
tfh.lm.list <- list() |
|
|
704 |
covid.tfh.merge <- cell.freq.merge[!cell.freq.merge$Status_on_day_collection_summary %in% c("Healthy"), ] |
|
|
705 |
covid.tfh.merge$OrderedStatus <- ordered(covid.tfh.merge$Status_on_day_collection_summary, |
|
|
706 |
levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical")) |
|
|
707 |
covid.tfh.merge$Days_from_onset <- as.numeric(covid.tfh.merge$Days_from_onset) |
|
|
708 |
|
|
|
709 |
c.types <- c("CD4.Tfh") |
|
|
710 |
|
|
|
711 |
for(i in seq_along(c.types)){ |
|
|
712 |
i.c <- c.types[i] |
|
|
713 |
|
|
|
714 |
i.lm.df <- data.frame("sample_id"=rownames(cell.freq.tab), "Count"=cell.freq.tab[, i.c]) |
|
|
715 |
i.lm.df <- merge(i.lm.df, covid.tfh.merge[covid.tfh.merge$CellType %in% i.c, ], by='sample_id') |
|
|
716 |
|
|
|
717 |
# i.tfh.lm <- glm(Count ~ Sex + Age + Site + OrderedStatus, |
|
|
718 |
# data=i.lm.df, family=poisson(link="log")) |
|
|
719 |
i.tfh.lm <- glm.nb(Count ~ Sex + Age + Site + OrderedStatus, data=i.lm.df, init.theta=1) |
|
|
720 |
|
|
|
721 |
i.lm.res <- summary(i.tfh.lm) |
|
|
722 |
tfh.lm.list[[i.c]] <- data.frame("CellType"=rep(i.c, nrow(i.lm.res$coefficients)), |
|
|
723 |
"Beta"=i.lm.res$coefficients[, 1], |
|
|
724 |
"SE"=i.lm.res$coefficients[, 2], |
|
|
725 |
"STAT"=i.lm.res$coefficients[, 3], |
|
|
726 |
"Pvalue"=i.lm.res$coefficients[, 4], |
|
|
727 |
"Predictor"=rownames(i.lm.res$coefficients)) |
|
|
728 |
} |
|
|
729 |
|
|
|
730 |
tfh.lm.df <- do.call(rbind.data.frame, tfh.lm.list) |
|
|
731 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="OrderedStatus", replacement="Severity") |
|
|
732 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Male", replacement="") |
|
|
733 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Site", replacement="") |
|
|
734 |
tfh.lm.df <- tfh.lm.df[!tfh.lm.df$Predictor %in% c("(Intercept)"), ] |
|
|
735 |
tfh.lm.df$P.Adjust <- p.adjust(tfh.lm.df$Pvalue) |
|
|
736 |
``` |
|
|
737 |
|
|
|
738 |
|
|
|
739 |
```{r, warning=FALSE, message=FALSE, fig.height=3.95, fig.width=4.95} |
|
|
740 |
ggplot(tfh.lm.df, aes(x=Predictor, y=Beta, fill=Pvalue < 0.05)) + |
|
|
741 |
geom_hline(yintercept=0, lty=2, col='grey70') + |
|
|
742 |
geom_errorbar(aes(ymin=Beta-SE, ymax=Beta+SE)) + |
|
|
743 |
geom_point(shape=22, size=3) + |
|
|
744 |
theme_cowplot() + |
|
|
745 |
theme(strip.background=element_rect(fill='white', colour='white'), |
|
|
746 |
strip.text=element_text(size=14)) + |
|
|
747 |
coord_flip() + |
|
|
748 |
scale_fill_manual(values=c("blue", "orange")) + |
|
|
749 |
facet_wrap(~CellType, scales="free_x") + |
|
|
750 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_Tfh_props-LM.pdf", |
|
|
751 |
height=3.95, width=4.95, useDingbats=FALSE) |
|
|
752 |
``` |
|
|
753 |
|
|
|
754 |
|
|
|
755 |
```{r, warning=FALSE, message=FALSE} |
|
|
756 |
cellfreq.df <- cell.freq.merge %>% group_by(Status_on_day_collection_summary, CellType) %>% summarise(Mean = mean(Freq)) |
|
|
757 |
cellfreq.df$Status_on_day_collection_summary <- factor(cellfreq.df$Status_on_day_collection_summary, |
|
|
758 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
759 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
760 |
``` |
|
|
761 |
|
|
|
762 |
Create a new colour palette. |
|
|
763 |
|
|
|
764 |
```{r} |
|
|
765 |
cell.order <- c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Prolif", "CD4.Th1", "CD4.Th2", "CD4.Th17", "CD4.Tfh", |
|
|
766 |
"Treg", "CD8.Naive", "CD8.Activated", "CD8.Prolif", "CD8.CM", "CD8.TE", "CD8.EM", "gdT", "MAIT", "NKT") |
|
|
767 |
|
|
|
768 |
all.qual.cols <- brewer.pal.info[brewer.pal.info$category %in% c("qual") & brewer.pal.info$colorblind, ] |
|
|
769 |
col_vector = unlist(mapply(brewer.pal, all.qual.cols$maxcolors, rownames(all.qual.cols))) |
|
|
770 |
|
|
|
771 |
# cell.cols <- col_vector[c(1:7, 9:19)] |
|
|
772 |
cell.cols <- col_vector[c(1:17, 19, 20)] |
|
|
773 |
names(cell.cols) <- cell.order |
|
|
774 |
``` |
|
|
775 |
|
|
|
776 |
|
|
|
777 |
```{r} |
|
|
778 |
pie(rep(1, 18), col=cell.cols) |
|
|
779 |
``` |
|
|
780 |
|
|
|
781 |
|
|
|
782 |
```{r, warning=FALSE, message=FALSE, fig.height=4.25, fig.width=6.95} |
|
|
783 |
# n.cells <- length(cell.order) |
|
|
784 |
# cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells) |
|
|
785 |
# names(cell.cols) <- cell.order |
|
|
786 |
|
|
|
787 |
cellfreq.df$CellType <- factor(cellfreq.df$CellType, |
|
|
788 |
levels=cell.order) |
|
|
789 |
|
|
|
790 |
ggplot(cellfreq.df, |
|
|
791 |
aes(x=Status_on_day_collection_summary, y = Mean, fill=CellType))+ |
|
|
792 |
geom_bar(position="fill", stat="identity", width = 0.9, colour = "black")+ |
|
|
793 |
scale_fill_manual(values = cell.cols) + |
|
|
794 |
theme(aspect.ratio = 1/1.5, |
|
|
795 |
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 14, colour = "black"), |
|
|
796 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
797 |
axis.text.y = element_text(size = 14, colour = "black"), |
|
|
798 |
legend.key.size=unit(0.4, "cm"), |
|
|
799 |
axis.title.x=element_blank(), axis.title.y=element_blank(), |
|
|
800 |
legend.text = element_text(size = 14), legend.title=element_text(size=14), |
|
|
801 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
802 |
guides(fill=guide_legend(title="Cell type", ncol=1)) + |
|
|
803 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_barchart.pdf", |
|
|
804 |
height=4.25, width=6.95, useDingbats=FALSE) |
|
|
805 |
``` |
|
|
806 |
|
|
|
807 |
I have a misgiving about this, as it masks the distribution of the proportions - maybe a series of filled boxplots, e.g. a waterfall? |
|
|
808 |
|
|
|
809 |
```{r, warning=FALSE, message=FALSE, fig.height=5.25, fig.width=12.95} |
|
|
810 |
cell.freq.merge$CellType <- factor(cell.freq.merge$CellType, |
|
|
811 |
levels=cell.order) |
|
|
812 |
|
|
|
813 |
ggplot(cell.freq.merge, |
|
|
814 |
aes(x=sample_id, y = Freq, fill=CellType))+ |
|
|
815 |
geom_bar(position="fill", stat="identity", width = 0.8, colour = "black")+ |
|
|
816 |
scale_fill_manual(values = cell.cols) + |
|
|
817 |
facet_grid(. ~ Status_on_day_collection_summary, scales="free_x", space="free") + |
|
|
818 |
theme(axis.text.x = element_blank(), |
|
|
819 |
panel.spacing=unit(0, "lines"), |
|
|
820 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
821 |
axis.text.y = element_text(size = 18), |
|
|
822 |
axis.title.x=element_blank(), axis.title.y=element_blank(), |
|
|
823 |
axis.ticks.x=element_blank(), |
|
|
824 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
825 |
strip.text=element_text(size=14, angle=90, colour='black'), |
|
|
826 |
legend.text = element_text(size = 18), legend.title=element_text(size=18), |
|
|
827 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
828 |
# guides(fill=guide_legend(title="Cell type")) + |
|
|
829 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_barchart-full.pdf", |
|
|
830 |
height=5.25, width=8.95, useDingbats=FALSE) + |
|
|
831 |
NULL |
|
|
832 |
``` |
|
|
833 |
|
|
|
834 |
Make dot plots. |
|
|
835 |
|
|
|
836 |
```{r, warning=FALSE, message=FALSE} |
|
|
837 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=annot.merge) |
|
|
838 |
rownames(clust.model.matrix) <- annot.merge$CellID |
|
|
839 |
|
|
|
840 |
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(annot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))]) |
|
|
841 |
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
842 |
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
843 |
|
|
|
844 |
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(annot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0) |
|
|
845 |
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
846 |
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0 |
|
|
847 |
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
848 |
``` |
|
|
849 |
|
|
|
850 |
|
|
|
851 |
```{r, message=FALSE, warning=FALSE} |
|
|
852 |
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP))) |
|
|
853 |
gene.ave.label.df$Sub.Annotation <- rownames(gene.ave.label.df) |
|
|
854 |
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Sub.Annotation') |
|
|
855 |
colnames(gene.ave.label.melt) <- c("Sub.Annotation", "Gene", "AveExprs") |
|
|
856 |
|
|
|
857 |
gene.bin.label.df <- gene.bin.by.label |
|
|
858 |
gene.bin.label.df$Sub.Annotation <- rownames(gene.bin.label.df) |
|
|
859 |
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Sub.Annotation') |
|
|
860 |
colnames(gene.bin.label.melt) <- c("Sub.Annotation", "Gene", "PropExprs") |
|
|
861 |
``` |
|
|
862 |
|
|
|
863 |
|
|
|
864 |
```{r, message=FALSE, warning=FALSE, fig.height=4.65, fig.width=8.95} |
|
|
865 |
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene')) |
|
|
866 |
dot.plot.df$Gene <- as.character(dot.plot.df$Gene) |
|
|
867 |
|
|
|
868 |
dot.plot.genes <- c("CD3G", "CD8A", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "TBX21", |
|
|
869 |
"PDCD1", "GZMB", "RORC", "IKZF2", "GATA3", "TNFRSF9", "IL7R", "KLRG1", |
|
|
870 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "NCAM1", "CCR6", |
|
|
871 |
"CXCR5", "HLA.DRB1", "LAG3", "FOXP3", "MKI67", "NCR1", "GARP", "FCGR3A") |
|
|
872 |
gene.cluster <- hclust(dist(t(gene.ave.by.label[, intersect(colnames(gene.ave.by.label), dot.plot.genes)])))$order |
|
|
873 |
|
|
|
874 |
# manually set the gene order |
|
|
875 |
gene.order <- c("CD3G", "CCR7", "CD27", "CD4", "CD28", "TNFRSF9", "CCR6", "CD40LG", "MKI67", "TBX21", "GATA3", "RORC", "IKZF2", |
|
|
876 |
"FOXP3", "CTLA4", "PDCD1", "CXCR5", "CD8A", "GZMB", "HLA.DRB1", "IFNG", "LAG3", "TOX", "IL7R", "KLRG1", "TRDV2", |
|
|
877 |
"TRGV9", "TRAV1.2", "NCAM1", "NCR1", "FCGR3A") |
|
|
878 |
|
|
|
879 |
dot.plot.df <- dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ] |
|
|
880 |
dot.plot.df$Gene <- factor(dot.plot.df$Gene, |
|
|
881 |
levels=gene.order) |
|
|
882 |
|
|
|
883 |
dot.plot.df <- dot.plot.df[dot.plot.df$Sub.Annotation %in% cell.order, ] |
|
|
884 |
dot.plot.df$Sub.Annotation <- factor(dot.plot.df$Sub.Annotation, |
|
|
885 |
levels=rev(cell.order)) |
|
|
886 |
|
|
|
887 |
ggplot(dot.plot.df, |
|
|
888 |
aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
889 |
geom_point() + |
|
|
890 |
theme_cowplot() + |
|
|
891 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
892 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
893 |
axis.text.y=element_text(colour='black', size=14), |
|
|
894 |
legend.text=element_text(size=14), |
|
|
895 |
legend.title=element_text(size=14), |
|
|
896 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
897 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
898 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
899 |
size=guide_legend(title='% Express')) + |
|
|
900 |
labs(x="Gene", y="Cell type") + |
|
|
901 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-genes.pdf", |
|
|
902 |
height=4.65, width=8.95, useDingbats=FALSE) |
|
|
903 |
``` |
|
|
904 |
|
|
|
905 |
Plot the cytokine profiles to figure out if Th1/2/17 dominate (or all of them). |
|
|
906 |
|
|
|
907 |
```{r, message=FALSE, warning=FALSE, fig.height=4.15, fig.width=7.0} |
|
|
908 |
cytokine.dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene')) |
|
|
909 |
cytokine.dot.plot.df$Gene <- as.character(cytokine.dot.plot.df$Gene) |
|
|
910 |
|
|
|
911 |
cytokine.genes <- intersect(c("IL2", "IL1A", "IL1B", "IL10", "IL22", "TNF", "IFNG", "LTA", |
|
|
912 |
"IL4", "IL5", "IL13", "IL17A", "IL17F", "IL21", |
|
|
913 |
"IL12A", "IL12B", "IL25"), colnames(annot.merge)) |
|
|
914 |
|
|
|
915 |
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Gene %in% cytokine.genes, ] |
|
|
916 |
cytokine.dot.plot.df$Gene <- factor(cytokine.dot.plot.df$Gene, |
|
|
917 |
levels=cytokine.genes) |
|
|
918 |
|
|
|
919 |
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Sub.Annotation %in% cell.order, ] |
|
|
920 |
cytokine.dot.plot.df$Sub.Annotation <- factor(cytokine.dot.plot.df$Sub.Annotation, |
|
|
921 |
levels=rev(cell.order)) |
|
|
922 |
|
|
|
923 |
ggplot(cytokine.dot.plot.df, |
|
|
924 |
aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
925 |
geom_point() + |
|
|
926 |
theme_cowplot() + |
|
|
927 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
928 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
929 |
axis.text.y=element_text(colour='black', size=14), |
|
|
930 |
legend.text=element_text(size=14), |
|
|
931 |
legend.title=element_text(size=14), |
|
|
932 |
legend.key.size=unit(0.4, "cm"), |
|
|
933 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
934 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
935 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
936 |
size=guide_legend(title='% Express')) + |
|
|
937 |
labs(x="Cytokine", y="Cell type") + |
|
|
938 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-cytokines.pdf", |
|
|
939 |
height=4.65, width=7.0, useDingbats=FALSE) + |
|
|
940 |
NULL |
|
|
941 |
``` |
|
|
942 |
|
|
|
943 |
I want to see how these cytokines change over the course of severity. |
|
|
944 |
|
|
|
945 |
```{r, warning=FALSE, message=FALSE} |
|
|
946 |
cytokine.melt <- melt(annot.merge[, c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID", |
|
|
947 |
cytokine.genes)], |
|
|
948 |
variable.name="Cytokine", |
|
|
949 |
id.vars=c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID")) |
|
|
950 |
cytokine.means <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("Mean.Exprs"=mean(value)) |
|
|
951 |
cytokine.var <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("Var.Exprs"=var(value)) |
|
|
952 |
cytokine.se <- cytokine.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Cytokine) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0))) |
|
|
953 |
|
|
|
954 |
cytokine.df <- Reduce(x=list("mean"=cytokine.means, "var"=cytokine.var, "se"=cytokine.se), |
|
|
955 |
f=function(x, y) merge(x, y, by=c('Status_on_day_collection_summary', 'Sub.Annotation', 'Cytokine'))) |
|
|
956 |
# cytokine.df <- merge(cytokine.df, covid.meta, by='sample_id') |
|
|
957 |
|
|
|
958 |
# remove superflous or malignant samples |
|
|
959 |
# cytokine.df <- cytokine.df[!cytokine.df$patient_id %in% c("CV0198"), ] |
|
|
960 |
# cytokine.df <- cytokine.df[!cytokine.df$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
961 |
# cytokine.df <- cytokine.df[!cytokine.df$Status_on_day_collection_summary %in% c("Non_Covid"), ] |
|
|
962 |
|
|
|
963 |
cytokine.df$D0_satus_summary <- factor(cytokine.df$Status_on_day_collection_summary, |
|
|
964 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
965 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
966 |
``` |
|
|
967 |
|
|
|
968 |
|
|
|
969 |
Maybe visualise this as a heatmap? |
|
|
970 |
|
|
|
971 |
```{r, fig.height=7.15, fig.width=7.95} |
|
|
972 |
cytokine.df <- cytokine.df[!cytokine.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") & |
|
|
973 |
!is.na(cytokine.df$D0_satus_summary), ] |
|
|
974 |
|
|
|
975 |
ggplot(cytokine.df[!cytokine.df$Cytokine %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F", "IL10", "IL6"), ], |
|
|
976 |
aes(x=D0_satus_summary, y=Cytokine, fill=Mean.Exprs)) + |
|
|
977 |
geom_tile() + |
|
|
978 |
facet_wrap(~Sub.Annotation) + |
|
|
979 |
scale_fill_viridis(option="magma") + |
|
|
980 |
theme_cowplot() + |
|
|
981 |
guides(fill=guide_colourbar(title="Mean Expression")) + |
|
|
982 |
labs(y="Cytokine Gene", x="Severity") + |
|
|
983 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), |
|
|
984 |
strip.background=element_rect(fill='white', colour='white')) + |
|
|
985 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_cytokines_heatmap.pdf", |
|
|
986 |
height=7.15, width=7.95, useDingbats=FALSE) + |
|
|
987 |
NULL |
|
|
988 |
``` |
|
|
989 |
|
|
|
990 |
|
|
|
991 |
What about exhaustion markers? |
|
|
992 |
|
|
|
993 |
|
|
|
994 |
```{r, warning=FALSE, message=FALSE} |
|
|
995 |
exhaustion.genes <- c("TOX", "TIGIT", "LAG3", "HAVCR2", "PDCD1", "FOXP3", "MKI67") |
|
|
996 |
exhaustion.melt <- melt(annot.merge[, c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID", |
|
|
997 |
exhaustion.genes)], |
|
|
998 |
variable.name="Gene", |
|
|
999 |
id.vars=c("sample_id", "Status_on_day_collection_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID")) |
|
|
1000 |
exhaustion.means <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("Mean.Exprs"=mean(value)) |
|
|
1001 |
exhaustion.var <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("Var.Exprs"=var(value)) |
|
|
1002 |
exhaustion.se <- exhaustion.melt %>% group_by(Status_on_day_collection_summary, Sub.Annotation, Gene) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0))) |
|
|
1003 |
|
|
|
1004 |
exhaustion.df <- Reduce(x=list("mean"=exhaustion.means, "var"=exhaustion.var, "se"=exhaustion.se), |
|
|
1005 |
f=function(x, y) merge(x, y, by=c('Status_on_day_collection_summary', 'Sub.Annotation', 'Gene'))) |
|
|
1006 |
# exhaustion.df <- merge(exhaustion.df, covid.meta, by='sample_id') |
|
|
1007 |
|
|
|
1008 |
# remove superflous or malignant samples |
|
|
1009 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$patient_id %in% c("CV0198"), ] |
|
|
1010 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
1011 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$Status_on_day_collection_summary %in% c("Non_Covid"), ] |
|
|
1012 |
|
|
|
1013 |
exhaustion.df$D0_satus_summary <- factor(exhaustion.df$Status_on_day_collection_summary, |
|
|
1014 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
1015 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
1016 |
``` |
|
|
1017 |
|
|
|
1018 |
|
|
|
1019 |
```{r, fig.height=5.45, fig.width=9.95} |
|
|
1020 |
exhaustion.df <- exhaustion.df[!exhaustion.df$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", "B_naive", "CD14_mono", |
|
|
1021 |
"DC_prolif", "DC3", "HSC", "ILC1_3", "ILC2", "Mono_prolif", |
|
|
1022 |
"NK_16hi", "NK_56hi", "NK_prolif", "pDC", "Plasmablast", |
|
|
1023 |
"Platelets", "RBC", |
|
|
1024 |
"Doublets:Bcell", "Doublets:Platelet", "Doublets") & |
|
|
1025 |
!is.na(exhaustion.df$D0_satus_summary), ] |
|
|
1026 |
|
|
|
1027 |
ggplot(exhaustion.df[!exhaustion.df$Gene %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F"), ], |
|
|
1028 |
aes(x=D0_satus_summary, y=Gene, fill=Mean.Exprs)) + |
|
|
1029 |
geom_tile() + |
|
|
1030 |
facet_wrap(~Sub.Annotation, nrow=3) + |
|
|
1031 |
scale_fill_viridis(option="magma") + |
|
|
1032 |
theme_cowplot() + |
|
|
1033 |
guides(fill=guide_colourbar(title="Mean Expression")) + |
|
|
1034 |
labs(x="Severity", "Gene") + |
|
|
1035 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), |
|
|
1036 |
strip.background=element_rect(fill='white', colour='white')) + |
|
|
1037 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_exhaustion_heatmap.pdf", |
|
|
1038 |
height=5.45, width=9.95, useDingbats=FALSE) + |
|
|
1039 |
NULL |
|
|
1040 |
``` |
|
|
1041 |
|
|
|
1042 |
Do the same for the proteins. |
|
|
1043 |
|
|
|
1044 |
```{r, warning=FALSE, message=FALSE} |
|
|
1045 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=annot.merge) |
|
|
1046 |
rownames(clust.model.matrix) <- annot.merge$CellID |
|
|
1047 |
|
|
|
1048 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(annot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
1049 |
pattern="\\.x", replacement="")]) |
|
|
1050 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
1051 |
|
|
|
1052 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1053 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
1054 |
|
|
|
1055 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(annot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
1056 |
pattern="\\.x", replacement="")] > 0.01) |
|
|
1057 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
1058 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
1059 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1060 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
1061 |
``` |
|
|
1062 |
|
|
|
1063 |
|
|
|
1064 |
```{r, message=FALSE, warning=FALSE} |
|
|
1065 |
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP)))) |
|
|
1066 |
ave.label.df$Sub.Annotation <- rownames(ave.label.df) |
|
|
1067 |
ave.label.melt <- melt(ave.label.df, id.vars='Sub.Annotation') |
|
|
1068 |
colnames(ave.label.melt) <- c("Sub.Annotation", "Protein", "AveExprs") |
|
|
1069 |
|
|
|
1070 |
bin.label.df <- bin.by.label |
|
|
1071 |
bin.label.df$Sub.Annotation <- rownames(bin.label.df) |
|
|
1072 |
bin.label.melt <- melt(bin.label.df, id.vars='Sub.Annotation') |
|
|
1073 |
colnames(bin.label.melt) <- c("Sub.Annotation", "Protein", "PropExprs") |
|
|
1074 |
``` |
|
|
1075 |
|
|
|
1076 |
|
|
|
1077 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.95} |
|
|
1078 |
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Sub.Annotation', 'Protein')) |
|
|
1079 |
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein) |
|
|
1080 |
|
|
|
1081 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", |
|
|
1082 |
"CD274", "CXCR3", "CD40LG", "ICOS", "PD1", "CD25", "LAMP1", "CXCR3", "CCR6", "IL7R", |
|
|
1083 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "TCR_Va24.Ja18") |
|
|
1084 |
|
|
|
1085 |
protein.order <- c("CD3", "CD4", "CCR7", "CD45RA", "CD45RO", "CD27", "CD28", "CD62L", "CD25", "CTLA4", |
|
|
1086 |
"LAMP1", "CCR6", "IL7R", |
|
|
1087 |
"CXCR5", "CD40LG", "ICOS", "CXCR3", "CD8", "PD1", "CD274", "TCR_Vg9", "TCR_Vg2", |
|
|
1088 |
"TCR_Va7.2", "TCR_Va24.Ja18", "CD56", "CD16") |
|
|
1089 |
|
|
|
1090 |
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ] |
|
|
1091 |
protein.dot.plot.df$Protein <- factor(protein.dot.plot.df$Protein, |
|
|
1092 |
levels=protein.order) |
|
|
1093 |
|
|
|
1094 |
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Sub.Annotation %in% cell.order, ] |
|
|
1095 |
protein.dot.plot.df$Sub.Annotation <- factor(protein.dot.plot.df$Sub.Annotation, |
|
|
1096 |
levels=rev(cell.order)) |
|
|
1097 |
|
|
|
1098 |
ggplot(protein.dot.plot.df, |
|
|
1099 |
aes(x=Protein, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
1100 |
geom_point() + |
|
|
1101 |
theme_cowplot() + |
|
|
1102 |
scale_colour_gradient(low='grey70', high=darken('red')) + |
|
|
1103 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
1104 |
axis.text.y=element_text(colour='black', size=14), |
|
|
1105 |
legend.text=element_text(size=14), |
|
|
1106 |
legend.title=element_text(size=14), |
|
|
1107 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1108 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1109 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
1110 |
size=guide_legend(title='% Express')) + |
|
|
1111 |
labs(x="Protein", y="Cell type") + |
|
|
1112 |
ggsave("~/Dropbox/COVID19/Updated_plots/Tcell_ext_dotplot-protein.pdf", |
|
|
1113 |
height=5.15, width=8.95, useDingbats=FALSE) |
|
|
1114 |
``` |
|
|
1115 |
|
|
|
1116 |
I also want to see some of the CD8 sub-phenotype mRNA and protein expression, namely _KLRG1_ and _CD217_ (_IL7R_). |
|
|
1117 |
|
|
|
1118 |
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.95} |
|
|
1119 |
repeat.samps <- covid.meta$patient_id[duplicated(covid.meta$patient_id)] |
|
|
1120 |
cell.freq.merge$Day <- ordered(cell.freq.merge$Collection_Day, |
|
|
1121 |
levels=c("D0", "D7", "D9", "D12", "D13", "D28")) |
|
|
1122 |
|
|
|
1123 |
ggplot(cell.freq.merge[cell.freq.merge$patient_id %in% repeat.samps & |
|
|
1124 |
!cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets") & |
|
|
1125 |
!cell.freq.merge$Status_on_day_collection_summary %in% c("Non_covid", "LPS"), ], |
|
|
1126 |
aes(x=Day, y=Freq, fill=Status_on_day_collection_summary, group=patient_id)) + |
|
|
1127 |
#geom_boxplot() + |
|
|
1128 |
geom_line(aes(colour=Status_on_day_collection_summary)) + |
|
|
1129 |
geom_point(shape=21, size=3) + |
|
|
1130 |
theme_cowplot() + |
|
|
1131 |
facet_wrap(~CellType, scales="free_y") + |
|
|
1132 |
scale_fill_manual(values=group.cols) + |
|
|
1133 |
scale_colour_manual(values=group.cols) + |
|
|
1134 |
expand_limits(y=c(0)) + |
|
|
1135 |
theme(strip.background=element_rect(fill='white', colour='white'), |
|
|
1136 |
strip.text=element_text(size=14)) + |
|
|
1137 |
labs(x="Day", y="Proportion") + |
|
|
1138 |
guides(fill=guide_legend(title="Severity"), |
|
|
1139 |
colour=FALSE) + |
|
|
1140 |
# ggsave("~/Dropbox/COVID19/Updated_plots/Tcells_proportions-boxplot.pdf", |
|
|
1141 |
# height=5.95, width=11.15, useDingbats=FALSE) + |
|
|
1142 |
NULL |
|
|
1143 |
``` |
|
|
1144 |
|