|
a |
|
b/TcellAnalysis/notebooks/COVID_Tcell_annotations.Rmd |
|
|
1 |
--- |
|
|
2 |
title: "COVID19: T cell annotation" |
|
|
3 |
output: html_notebook |
|
|
4 |
--- |
|
|
5 |
|
|
|
6 |
Annotating T cells subtypes. |
|
|
7 |
|
|
|
8 |
```{r, warning=FALSE, message=FALSE} |
|
|
9 |
library(ggplot2) |
|
|
10 |
library(scran) |
|
|
11 |
library(ggsci) |
|
|
12 |
library(cowplot) |
|
|
13 |
library(scattermore) |
|
|
14 |
library(ComplexHeatmap) |
|
|
15 |
library(RColorBrewer) |
|
|
16 |
library(pheatmap) |
|
|
17 |
library(scater) |
|
|
18 |
library(reshape2) |
|
|
19 |
library(colorspace) |
|
|
20 |
library(dplyr) |
|
|
21 |
library(ggthemes) |
|
|
22 |
library(edgeR) |
|
|
23 |
library(scales) |
|
|
24 |
library(ggrepel) |
|
|
25 |
library(viridis) |
|
|
26 |
library(MASS) |
|
|
27 |
``` |
|
|
28 |
|
|
|
29 |
I've taken all of the full clusters and selected the ones that I think contain T cells: 1, 6, 7, 8, 10 & 13. |
|
|
30 |
|
|
|
31 |
```{r, warning=FALSE, message=FALSE} |
|
|
32 |
tcell.umap <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_UMAPs.tsv", |
|
|
33 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
34 |
# fix the Sanger barcodes |
|
|
35 |
tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")] <- gsub(tcell.umap$CellID[tcell.umap$DataSet %in% c("Sanger")], |
|
|
36 |
pattern="^(CV[0-9]+_[A-Z0-9]+-CV[0-9]+_[A-Z0-9]+)_", replacement="") |
|
|
37 |
|
|
|
38 |
tcell.clusters <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Tcell_clusters.tsv", |
|
|
39 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
40 |
|
|
|
41 |
# tcell.meta <- merge(tcell.umap, tcell.clusters, by='CellID') |
|
|
42 |
``` |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
```{r, warning=FALSE, message=FALSE} |
|
|
46 |
all.meta <- read.table("~/Dropbox/COVID19/Data/COVID19_scMeta-data.tsv", |
|
|
47 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
48 |
rownames(all.meta) <- all.meta$CellID |
|
|
49 |
|
|
|
50 |
# remove BGCV01_CV0209 and CV0198 |
|
|
51 |
all.meta <- all.meta[!all.meta$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
52 |
all.meta <- all.meta[!all.meta$patient_id %in% c("CV0198"), ] |
|
|
53 |
``` |
|
|
54 |
|
|
|
55 |
To normalise the cell type proportions, I need to know how many cells were captured for each patient-sample combo. |
|
|
56 |
|
|
|
57 |
```{r, warning=FALSE, message=FALSE} |
|
|
58 |
n.cell.vecc <- table(all.meta$sample_id) |
|
|
59 |
``` |
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
```{r, warning=FALSE, message=FALSE} |
|
|
64 |
tcell.merge <- merge(tcell.clusters, all.meta, by='CellID') |
|
|
65 |
tcell.merge <- merge(tcell.merge, tcell.umap, by='CellID') |
|
|
66 |
``` |
|
|
67 |
|
|
|
68 |
|
|
|
69 |
```{r} |
|
|
70 |
gene.clust <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells-cluster_gene-Ave.tsv", |
|
|
71 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
72 |
|
|
|
73 |
protein.clust <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells-cluster_protein-Ave.tsv", |
|
|
74 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
75 |
``` |
|
|
76 |
|
|
|
77 |
|
|
|
78 |
```{r, warning=FALSE, message=FALSE} |
|
|
79 |
plot.proteins <- unique(c("CD19", "CD86", "CD11b", "CD56", "CD8", "CD4", "CD3", "CD16", "CD14", "CD335", "CD158", "SIGLEC7", |
|
|
80 |
"FCER2", "TCR", "CTLA4", "CXCR5", |
|
|
81 |
"CX3CR1", "CD79b", "CD20", "CD1c", "CD25", "CD45RO", "HLA-DR", "CD21", "CD5", "CD64", "CD127", |
|
|
82 |
"CD34", "CD40", "CD123", "CD27", "CD1c", "TCR_Va7.2", "TCR_Vg2", "CD235ab", "CD10", "CD2", "CD71", |
|
|
83 |
"TCR_Vg9", "TCR_Va24-Ja18", "CCR7", "CD45RA", "CD62L", "CD138", "CD38", "CD274", "CD279", "TIGIT")) |
|
|
84 |
|
|
|
85 |
plot.genes <- unique(c("CD3G", "CD3E", "FCGR3A", "CD8A", "CD8B", "CD4", "CD80", "CD86", "CXCL10", "HLA.DRA", "HLA.DRB1", "VEGFA", |
|
|
86 |
"CCR7", "CCR2", "CCR5", "CXCR3", "CD36", "FOXP3", "MS4A1", "LYZ", "GZMB", "CD68", "GATA3", "PECAM1", "RORC", |
|
|
87 |
"FCGR2A", "FCGR2B", "FCGR3B", "TPO", "HBB", "NCR1", "NCR2", "F8", "HIF1A", "CTLA4", "CD28", "CXCR1", |
|
|
88 |
"PDCD1", "TIM3", "LAG3", "TYMS", "PITX1", "TIGIT", "CXCL13", "BCL6", |
|
|
89 |
"ITGA2B", "ITGAX", "TOX", "FYN", "NCAM1", "LRRC32", "SELL", "CXCR5", "CSF1", "IL7R", "CD40LG", "MYC", "MKI67", |
|
|
90 |
"IFNG", "IL6", "THRB", "CD19", "CD38", "CD27", "F2", "F5", "THRB", "CD44", "IKZF2", "IKZF4")) |
|
|
91 |
``` |
|
|
92 |
|
|
|
93 |
|
|
|
94 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
95 |
gene.plot.hm <- t(apply(t(gene.clust[, intersect(colnames(gene.clust), plot.genes)]), 1, |
|
|
96 |
FUN=function(XP) XP/max(XP))) |
|
|
97 |
gene.plot.hm[is.na(gene.plot.hm)] <- 0 |
|
|
98 |
colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
99 |
|
|
|
100 |
pdf("~/Dropbox/COVID19/plot.dir/Gene_heatmap-Tcells.pdf", |
|
|
101 |
height=10.95, width=9.95, useDingbats=FALSE) |
|
|
102 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
103 |
dev.off() |
|
|
104 |
|
|
|
105 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
106 |
``` |
|
|
107 |
|
|
|
108 |
I'll ignore the clusters that are clearly not T-cells, like any residual monocytes, B cell and NK cells, etc. |
|
|
109 |
|
|
|
110 |
```{r, warning=FALSE, message=FALSE} |
|
|
111 |
table(tcell.merge$Site, tcell.merge$Sub.Cluster) |
|
|
112 |
``` |
|
|
113 |
|
|
|
114 |
|
|
|
115 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
116 |
rownames(protein.clust) <- protein.clust$Protein |
|
|
117 |
protein.plot.hm <- protein.clust[intersect(protein.clust$Protein, plot.proteins), |
|
|
118 |
setdiff(colnames(protein.clust), "Protein")] |
|
|
119 |
#protein.plot.hm[protein.plot.hm < 0] <- 0 |
|
|
120 |
protein.plot.hm <- t(apply(protein.plot.hm, 1, FUN=function(XP) (XP - mean(XP))/sd(XP))) |
|
|
121 |
|
|
|
122 |
protein.plot.hm[is.na(protein.plot.hm)] <- 0 |
|
|
123 |
|
|
|
124 |
rownames(protein.plot.hm) <- intersect(protein.clust$Protein, plot.proteins) |
|
|
125 |
|
|
|
126 |
# pdf("~/Dropbox/COVID19/plot.dir/protein_heatmap-celltypes.pdf", |
|
|
127 |
# height=10.95, width=9.95, useDingbats=FALSE) |
|
|
128 |
# Heatmap(protein.plot.hm, name="Z-score") |
|
|
129 |
# dev.off() |
|
|
130 |
|
|
|
131 |
Heatmap(protein.plot.hm, name="Z-score") |
|
|
132 |
``` |
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
|
|
136 |
```{r} |
|
|
137 |
tcell.merge$Annotation <- "Unknown" |
|
|
138 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(11)] <- "PoorQC" |
|
|
139 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(14, 19)] <- "NK" |
|
|
140 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(8)] <- "Doublets:Bcell" |
|
|
141 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(16)] <- "Doublets:Platelet" |
|
|
142 |
|
|
|
143 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(17)] <- "ILCs" |
|
|
144 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(12)] <- "gdT" |
|
|
145 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(23, 24)] <- "CD8.TE" |
|
|
146 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(7)] <- "CD8.EM" |
|
|
147 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(22)] <- "Tfh-like" |
|
|
148 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(3)] <- "Treg" |
|
|
149 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(15)] <- "MAIT" |
|
|
150 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(5, 6, 9)] <- "CD4.Naive" |
|
|
151 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(1, 20)] <- "CD8.Naive" |
|
|
152 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(2, 4, 10, 11, 21)] <- "CD4.CM" |
|
|
153 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(13, 25)] <- "Exhausted" |
|
|
154 |
tcell.merge$Annotation[tcell.merge$Sub.Cluster %in% c(18)] <- "Proliferating" |
|
|
155 |
|
|
|
156 |
table(tcell.merge$Annotation, tcell.merge$Sub.Cluster) |
|
|
157 |
``` |
|
|
158 |
|
|
|
159 |
```{r, warning=FALSE, message=FALSE} |
|
|
160 |
table(tcell.merge$Annotation, tcell.merge$Site) |
|
|
161 |
``` |
|
|
162 |
|
|
|
163 |
|
|
|
164 |
```{r, fig.height=9.75, fig.width=10.95} |
|
|
165 |
n.cells <- length(unique(tcell.merge$Sub.Cluster)) |
|
|
166 |
cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells) |
|
|
167 |
names(cell.cols) <- unique(tcell.merge$Sub.Cluster) |
|
|
168 |
|
|
|
169 |
tcell.merge$Sub.Cluster <- factor(tcell.merge$Sub.Cluster, |
|
|
170 |
levels=c(1:25)) |
|
|
171 |
|
|
|
172 |
ggplot(tcell.merge, |
|
|
173 |
aes(x=MNN.UMAP1, y=MNN.UMAP2, colour=Sub.Cluster)) + |
|
|
174 |
geom_scattermore() + |
|
|
175 |
theme_cowplot() + |
|
|
176 |
facet_wrap(~Annotation) + |
|
|
177 |
scale_colour_manual(values=cell.cols) + |
|
|
178 |
guides(colour=guide_legend(override.aes = list(size=3))) |
|
|
179 |
``` |
|
|
180 |
|
|
|
181 |
Plot the markers genes for these groups: CD4, CD8, TCR V$\alpha$7.2, TCR V$\gamma$9, TCR V$\gamma$2, CD45RA, CCR7, CD45RO, CD27, CD28, |
|
|
182 |
CTLA4, HLA-DR, FOXP3, CXCR5, GZMB, IFNG, TOX, PDCD1. |
|
|
183 |
|
|
|
184 |
I think these annotations look OK. |
|
|
185 |
|
|
|
186 |
```{r, warning=FALSE, message=FALSE} |
|
|
187 |
# read in the gene expression |
|
|
188 |
goi.df <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells_GeneGOI.tsv", |
|
|
189 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
190 |
|
|
|
191 |
tcell.goi.merge <- merge(tcell.merge, goi.df, by='CellID') |
|
|
192 |
``` |
|
|
193 |
|
|
|
194 |
|
|
|
195 |
```{r, warning=FALSE, message=FALSE} |
|
|
196 |
# re-classify CD8.TE with high NCAM1 * FCGR3A expression |
|
|
197 |
tcell.goi.merge$Annotation[tcell.goi.merge$NCAM1 > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") & |
|
|
198 |
tcell.goi.merge$TRAV1.2 > 0] <- "NKT" |
|
|
199 |
tcell.goi.merge$Annotation[tcell.goi.merge$FCGR3A > 0 & tcell.goi.merge$Annotation %in% c("CD8.TE") & |
|
|
200 |
tcell.goi.merge$TRAV1.2 > 0] <- "NKT" |
|
|
201 |
``` |
|
|
202 |
|
|
|
203 |
Remove doublets based on expression of markers for different cell lineages. |
|
|
204 |
|
|
|
205 |
```{r} |
|
|
206 |
# re-classify CD8.TE with high NCR1 * FCGR3A expression |
|
|
207 |
bcell.markers <- tcell.goi.merge$IGHM > 0 | tcell.goi.merge$IGHG3 > 0 | tcell.goi.merge$IGHD > 0 | tcell.goi.merge$IGHG4 > 0 | |
|
|
208 |
tcell.goi.merge$IGHG2 > 0 | tcell.goi.merge$IGHG1 > 0 | tcell.goi.merge$IGHA1 > 0 |
|
|
209 |
|
|
|
210 |
tcell.goi.merge$Annotation[bcell.markers] <- "Doublets:Bcell" |
|
|
211 |
|
|
|
212 |
table(tcell.goi.merge$Annotation, tcell.goi.merge$Site) |
|
|
213 |
``` |
|
|
214 |
|
|
|
215 |
|
|
|
216 |
|
|
|
217 |
```{r, warning=FALSE, message=FALSE} |
|
|
218 |
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.goi.merge) |
|
|
219 |
rownames(clust.model.matrix) <- tcell.goi.merge$CellID |
|
|
220 |
|
|
|
221 |
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))]) |
|
|
222 |
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
223 |
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Annotation", replacement="") |
|
|
224 |
|
|
|
225 |
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.goi.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0) |
|
|
226 |
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
227 |
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0 |
|
|
228 |
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Annotation", replacement="") |
|
|
229 |
``` |
|
|
230 |
|
|
|
231 |
|
|
|
232 |
```{r, message=FALSE, warning=FALSE} |
|
|
233 |
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP))) |
|
|
234 |
gene.ave.label.df$Annotation <- rownames(gene.ave.label.df) |
|
|
235 |
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Annotation') |
|
|
236 |
colnames(gene.ave.label.melt) <- c("Annotation", "Gene", "AveExprs") |
|
|
237 |
|
|
|
238 |
gene.bin.label.df <- gene.bin.by.label |
|
|
239 |
gene.bin.label.df$Annotation <- rownames(gene.bin.label.df) |
|
|
240 |
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Annotation') |
|
|
241 |
colnames(gene.bin.label.melt) <- c("Annotation", "Gene", "PropExprs") |
|
|
242 |
|
|
|
243 |
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Annotation', 'Gene')) |
|
|
244 |
dot.plot.df$Gene <- as.character(dot.plot.df$Gene) |
|
|
245 |
``` |
|
|
246 |
|
|
|
247 |
|
|
|
248 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25} |
|
|
249 |
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", |
|
|
250 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "NCAM1", |
|
|
251 |
"CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7") |
|
|
252 |
|
|
|
253 |
ggplot(dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ], |
|
|
254 |
aes(x=Gene, y=Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
255 |
geom_point() + |
|
|
256 |
theme_cowplot() + |
|
|
257 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
258 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'), |
|
|
259 |
axis.text.y=element_text(colour='black'), |
|
|
260 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
261 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
262 |
guides(colour=guide_colorbar(title='Mean Expression'), |
|
|
263 |
size=guide_legend(title='% Express')) + |
|
|
264 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_dotplot-genes.pdf", |
|
|
265 |
height=5.15, width=8.25, useDingbats=FALSE) |
|
|
266 |
``` |
|
|
267 |
|
|
|
268 |
Do the same for the proteins. |
|
|
269 |
|
|
|
270 |
```{r, warning=FALSE, message=FALSE} |
|
|
271 |
# read in the gene expression |
|
|
272 |
prot.df <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle-Tcells_ProteinGOI.tsv", |
|
|
273 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
274 |
colnames(prot.df) <- c(paste0("Prot.", colnames(prot.df)[!colnames(prot.df) %in% c("CellID")]), "CellID") |
|
|
275 |
|
|
|
276 |
prot.df$Site <- "Ncl" |
|
|
277 |
prot.df$Site[grepl(prot.df$CellID, pattern="BGCV")] <- "Cambridge" |
|
|
278 |
prot.df$Site[grepl(prot.df$CellID, pattern="^S[0-9]+")] <- "Sanger" |
|
|
279 |
tcell.prot.merge <- merge(tcell.goi.merge, prot.df, by=c('CellID', 'Site')) |
|
|
280 |
``` |
|
|
281 |
|
|
|
282 |
|
|
|
283 |
```{r, warning=FALSE, message=FALSE} |
|
|
284 |
clust.model.matrix <- model.matrix(~ 0 + Annotation, data=tcell.prot.merge) |
|
|
285 |
rownames(clust.model.matrix) <- tcell.prot.merge$CellID |
|
|
286 |
|
|
|
287 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
288 |
pattern="\\.x", replacement="")]) |
|
|
289 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
290 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Annotation", replacement="") |
|
|
291 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
292 |
|
|
|
293 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
294 |
pattern="\\.x", replacement="")] > 0) |
|
|
295 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
296 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
297 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Annotation", replacement="") |
|
|
298 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
299 |
``` |
|
|
300 |
|
|
|
301 |
|
|
|
302 |
```{r, message=FALSE, warning=FALSE} |
|
|
303 |
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP)))) |
|
|
304 |
ave.label.df$Annotation <- rownames(ave.label.df) |
|
|
305 |
ave.label.melt <- melt(ave.label.df, id.vars='Annotation') |
|
|
306 |
colnames(ave.label.melt) <- c("Annotation", "Protein", "AveExprs") |
|
|
307 |
|
|
|
308 |
bin.label.df <- bin.by.label |
|
|
309 |
bin.label.df$Annotation <- rownames(bin.label.df) |
|
|
310 |
bin.label.melt <- melt(bin.label.df, id.vars='Annotation') |
|
|
311 |
colnames(bin.label.melt) <- c("Annotation", "Protein", "PropExprs") |
|
|
312 |
|
|
|
313 |
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Annotation', 'Protein')) |
|
|
314 |
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein) |
|
|
315 |
``` |
|
|
316 |
|
|
|
317 |
|
|
|
318 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.25} |
|
|
319 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
320 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "HAVCR2", |
|
|
321 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
322 |
|
|
|
323 |
ggplot(protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ], |
|
|
324 |
aes(x=Protein, y=Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
325 |
geom_point() + |
|
|
326 |
theme_cowplot() + |
|
|
327 |
scale_colour_gradient(low='grey70', high=darken('red')) + |
|
|
328 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black'), |
|
|
329 |
axis.text.y=element_text(colour='black'), |
|
|
330 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
331 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
332 |
guides(colour=guide_colorbar(title='Mean Expression'), |
|
|
333 |
size=guide_legend(title='% Express')) + |
|
|
334 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_dotplot-protein.pdf", |
|
|
335 |
height=5.15, width=8.25, useDingbats=FALSE) |
|
|
336 |
``` |
|
|
337 |
|
|
|
338 |
|
|
|
339 |
Write out the exhausted, proliferating and Tfh-like clusters for sub-clustering. |
|
|
340 |
|
|
|
341 |
```{r} |
|
|
342 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Proliferating")], |
|
|
343 |
file="~/Dropbox/COVID19/Data/Proliferating_barcodes.tsv", |
|
|
344 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
345 |
|
|
|
346 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Tfh-like")], |
|
|
347 |
file="~/Dropbox/COVID19/Data/Tfh_barcodes.tsv", |
|
|
348 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
349 |
|
|
|
350 |
write.table(tcell.prot.merge$CellID[tcell.prot.merge$Annotation %in% c("Exhausted")], |
|
|
351 |
file="~/Dropbox/COVID19/Data/Exhausted_barcodes.tsv", |
|
|
352 |
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t") |
|
|
353 |
``` |
|
|
354 |
|
|
|
355 |
### Tfh sub-anntotating |
|
|
356 |
|
|
|
357 |
Read in sub-clusters and annotate. |
|
|
358 |
|
|
|
359 |
```{r, warning=FALSE, message=FALSE} |
|
|
360 |
tfh.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Tfh_clusters.tsv", |
|
|
361 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
362 |
tfh.subcluster.merge <- merge(tfh.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID') |
|
|
363 |
table(tfh.subcluster.merge$Sub.Cluster) |
|
|
364 |
``` |
|
|
365 |
|
|
|
366 |
Plot average expression. |
|
|
367 |
|
|
|
368 |
```{r, warning=FALSE, message=FALSE} |
|
|
369 |
tfh.subcluster.merge$Sub.Cluster <- as.character(tfh.subcluster.merge$Sub.Cluster) |
|
|
370 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=tfh.subcluster.merge) |
|
|
371 |
rownames(clust.model.matrix) <- tfh.subcluster.merge$CellID |
|
|
372 |
|
|
|
373 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
374 |
pattern="\\.x", replacement="")]) |
|
|
375 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
376 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
377 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
378 |
|
|
|
379 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
380 |
pattern="\\.x", replacement="")] > 0) |
|
|
381 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
382 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
383 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
384 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
385 |
``` |
|
|
386 |
|
|
|
387 |
|
|
|
388 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
389 |
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", |
|
|
390 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4", |
|
|
391 |
"IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL", "IGHE", "CD19", "MS4A1", "HBB", "PECAM1", "TPO", "THRB", |
|
|
392 |
"CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7") |
|
|
393 |
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1, |
|
|
394 |
FUN=function(XP) XP/max(XP))) |
|
|
395 |
gene.plot.hm[is.na(gene.plot.hm)] <- 0 |
|
|
396 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
397 |
|
|
|
398 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
399 |
``` |
|
|
400 |
|
|
|
401 |
|
|
|
402 |
|
|
|
403 |
```{r, warning=FALSE, message=FALSE} |
|
|
404 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=tfh.subcluster.merge) |
|
|
405 |
rownames(clust.model.matrix) <- tfh.subcluster.merge$CellID |
|
|
406 |
|
|
|
407 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
408 |
pattern="\\.x", replacement="")]) |
|
|
409 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
410 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
411 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
412 |
|
|
|
413 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tfh.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
414 |
pattern="\\.x", replacement="")] > 0) |
|
|
415 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
416 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
417 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
418 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
419 |
``` |
|
|
420 |
|
|
|
421 |
|
|
|
422 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
423 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
424 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20", |
|
|
425 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
426 |
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2, |
|
|
427 |
FUN=function(XP) (XP - min(XP))/max((XP - min(XP))))) |
|
|
428 |
prot.plot.hm[is.na(prot.plot.hm)] <- 0 |
|
|
429 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
430 |
|
|
|
431 |
Heatmap(prot.plot.hm, name="Z-score") |
|
|
432 |
``` |
|
|
433 |
|
|
|
434 |
|
|
|
435 |
```{r} |
|
|
436 |
# not Tfh |
|
|
437 |
tfh.subcluster.merge$Sub.Annotation <- "Tfh-like" |
|
|
438 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(9)] <- "Treg" |
|
|
439 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(14)] <- "NKT" |
|
|
440 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(1)] <- "gdT" |
|
|
441 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(2, 3, 16)] <- "PD1-.Tfh-like" |
|
|
442 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(11, 15)] <- "PD1+.Tfh-like" |
|
|
443 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(6)] <- "CD8.EM" |
|
|
444 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(7)] <- "CD4.EM" |
|
|
445 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(8)] <- "CD4.CM" |
|
|
446 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(13)] <- "NK" |
|
|
447 |
|
|
|
448 |
|
|
|
449 |
|
|
|
450 |
tfh.subcluster.merge$Sub.Annotation[tfh.subcluster.merge$Sub.Cluster %in% c(4, 5, 10, 12)] <- "Doublets" |
|
|
451 |
rownames(tfh.subcluster.merge) <- tfh.subcluster.merge$CellID |
|
|
452 |
table(tfh.subcluster.merge$Sub.Annotation, tfh.subcluster.merge$Sub.Cluster) |
|
|
453 |
``` |
|
|
454 |
|
|
|
455 |
|
|
|
456 |
### Proliferating sub-anntotating |
|
|
457 |
|
|
|
458 |
Read in sub-clusters and annotate. |
|
|
459 |
|
|
|
460 |
```{r, warning=FALSE, message=FALSE} |
|
|
461 |
prolif.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_ProlifTcell_clusters.tsv", |
|
|
462 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
463 |
prolif.subcluster.merge <- merge(prolif.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID') |
|
|
464 |
table(prolif.subcluster.merge$Sub.Cluster) |
|
|
465 |
``` |
|
|
466 |
|
|
|
467 |
Plot average expression. |
|
|
468 |
|
|
|
469 |
```{r, warning=FALSE, message=FALSE} |
|
|
470 |
prolif.subcluster.merge$Sub.Cluster <- as.character(prolif.subcluster.merge$Sub.Cluster) |
|
|
471 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=prolif.subcluster.merge) |
|
|
472 |
rownames(clust.model.matrix) <- prolif.subcluster.merge$CellID |
|
|
473 |
|
|
|
474 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
475 |
pattern="\\.x", replacement="")]) |
|
|
476 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
477 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
478 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
479 |
|
|
|
480 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
481 |
pattern="\\.x", replacement="")] > 0) |
|
|
482 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
483 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
484 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
485 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
486 |
``` |
|
|
487 |
|
|
|
488 |
|
|
|
489 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
490 |
dot.plot.genes <- c("CD3E", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", |
|
|
491 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4", |
|
|
492 |
"IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL", "IGHE", "CD19", "MS4A1", "HBB", "PECAM1", "TPO", "THRB", |
|
|
493 |
"CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7") |
|
|
494 |
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1, |
|
|
495 |
FUN=function(XP) XP/max(XP))) |
|
|
496 |
gene.plot.hm[is.na(gene.plot.hm)] <- 0 |
|
|
497 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
498 |
|
|
|
499 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
500 |
``` |
|
|
501 |
|
|
|
502 |
|
|
|
503 |
|
|
|
504 |
```{r, warning=FALSE, message=FALSE} |
|
|
505 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=prolif.subcluster.merge) |
|
|
506 |
rownames(clust.model.matrix) <- prolif.subcluster.merge$CellID |
|
|
507 |
|
|
|
508 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
509 |
pattern="\\.x", replacement="")]) |
|
|
510 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
511 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
512 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
513 |
|
|
|
514 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(prolif.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
515 |
pattern="\\.x", replacement="")] > 0) |
|
|
516 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
517 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
518 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
519 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
520 |
``` |
|
|
521 |
|
|
|
522 |
|
|
|
523 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
524 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
525 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20", |
|
|
526 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
527 |
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2, |
|
|
528 |
FUN=function(XP) (XP - min(XP))/max((XP - min(XP))))) |
|
|
529 |
prot.plot.hm[is.na(prot.plot.hm)] <- 0 |
|
|
530 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
531 |
|
|
|
532 |
Heatmap(prot.plot.hm, name="Z-score") |
|
|
533 |
``` |
|
|
534 |
|
|
|
535 |
|
|
|
536 |
```{r, warning=FALSE, message=FALSE} |
|
|
537 |
# not Tfh |
|
|
538 |
prolif.subcluster.merge$Sub.Annotation <- "Proliferating" |
|
|
539 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(2, 12)] <- "Doublets:Bcell" |
|
|
540 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(1)] <- "PD1-.Tfh-like" |
|
|
541 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(4, 10)] <- "CD4.Naive" |
|
|
542 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(5, 9)] <- "NK" |
|
|
543 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(6)] <- "gdT" |
|
|
544 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(7)] <- "MAIT" |
|
|
545 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(11)] <- "CD4.CM" |
|
|
546 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(3, 13)] <- "CD4.EM" |
|
|
547 |
prolif.subcluster.merge$Sub.Annotation[prolif.subcluster.merge$Sub.Cluster %in% c(8)] <- "CD8.EM" |
|
|
548 |
|
|
|
549 |
|
|
|
550 |
rownames(prolif.subcluster.merge) <- prolif.subcluster.merge$CellID |
|
|
551 |
table(prolif.subcluster.merge$Sub.Annotation, prolif.subcluster.merge$Sub.Cluster) |
|
|
552 |
``` |
|
|
553 |
|
|
|
554 |
### Exhausted sub-anntotating |
|
|
555 |
|
|
|
556 |
Read in sub-clusters and annotate. |
|
|
557 |
|
|
|
558 |
```{r, warning=FALSE, message=FALSE} |
|
|
559 |
exhaust.subcluster <- read.table("~/Dropbox/COVID19/Data/Cambridge_Sanger_Newcastle_Exhausted_clusters.tsv", |
|
|
560 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
561 |
exhaust.subcluster.merge <- merge(exhaust.subcluster, tcell.prot.merge[, !colnames(tcell.prot.merge) %in% c("Sub.Cluster")], by='CellID') |
|
|
562 |
table(exhaust.subcluster.merge$Sub.Cluster) |
|
|
563 |
``` |
|
|
564 |
|
|
|
565 |
Plot average expression. |
|
|
566 |
|
|
|
567 |
```{r, warning=FALSE, message=FALSE} |
|
|
568 |
exhaust.subcluster.merge$Sub.Cluster <- as.character(exhaust.subcluster.merge$Sub.Cluster) |
|
|
569 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=exhaust.subcluster.merge) |
|
|
570 |
rownames(clust.model.matrix) <- exhaust.subcluster.merge$CellID |
|
|
571 |
|
|
|
572 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
573 |
pattern="\\.x", replacement="")]) |
|
|
574 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
575 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
576 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
577 |
|
|
|
578 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(goi.df), colnames(tcell.merge)), |
|
|
579 |
pattern="\\.x", replacement="")] > 0) |
|
|
580 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
581 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
582 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
583 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
584 |
``` |
|
|
585 |
|
|
|
586 |
|
|
|
587 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
588 |
dot.plot.genes <- c("CD3E", "CD3G", "CD8A", "CD8B", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "PDCD1", "GZMB", "IFNG", "TIGIT", |
|
|
589 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "CD38", "IGHA1", "IGHM", "IGHD", "IGHG1", "ICOS", "IKZF4", "GARP", |
|
|
590 |
"IKZF2", "CXCR3", "CD19", "MS4A1", "TYMS", "SELL", "CD11b", "ITGAX", "ITGAM", "FAS", "PECAM1", "HBB", "IGHE", |
|
|
591 |
"CXCR5", "HLA.DRB1", "LAG3", "CD44", "FOXP3", "MKI67", "NCR1", "GARP", "GZMH", "CXCR4", "FCGR3A", "TCF7", "IL17A") |
|
|
592 |
gene.plot.hm <- t(apply(t(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.genes)]), 1, |
|
|
593 |
FUN=function(XP) XP/max(XP))) |
|
|
594 |
gene.plot.hm[is.na(gene.plot.hm)] <- 0 |
|
|
595 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
596 |
|
|
|
597 |
Heatmap(gene.plot.hm, name="Z-score") |
|
|
598 |
``` |
|
|
599 |
|
|
|
600 |
|
|
|
601 |
|
|
|
602 |
```{r, warning=FALSE, message=FALSE} |
|
|
603 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Cluster, data=exhaust.subcluster.merge) |
|
|
604 |
rownames(clust.model.matrix) <- exhaust.subcluster.merge$CellID |
|
|
605 |
|
|
|
606 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
607 |
pattern="\\.x", replacement="")]) |
|
|
608 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
609 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
610 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
611 |
|
|
|
612 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(exhaust.subcluster.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
613 |
pattern="\\.x", replacement="")] > 0) |
|
|
614 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
615 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
616 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Cluster", replacement="") |
|
|
617 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
618 |
``` |
|
|
619 |
|
|
|
620 |
|
|
|
621 |
```{r, warning=FALSE, message=FALSE, fig.height=10.95, fig.width=9.95} |
|
|
622 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
623 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD19", "CD20", "CD11b", "CD14", "FAS", |
|
|
624 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
625 |
prot.plot.hm <- t(apply(ave.by.label[, intersect(colnames(ave.by.label), dot.plot.prots)], 2, |
|
|
626 |
FUN=function(XP) (XP - min(XP))/max((XP - min(XP))))) |
|
|
627 |
prot.plot.hm[is.na(prot.plot.hm)] <- 0 |
|
|
628 |
#colnames(gene.plot.hm) <- gene.clust$GeneID |
|
|
629 |
|
|
|
630 |
Heatmap(prot.plot.hm, name="Z-score") |
|
|
631 |
``` |
|
|
632 |
|
|
|
633 |
|
|
|
634 |
```{r, warning=FALSE, message=FALSE} |
|
|
635 |
# not Tfh |
|
|
636 |
exhaust.subcluster.merge$Sub.Annotation <- "Exhausted" |
|
|
637 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(2, 4, 9, 10)] <- "Doublets:Bcell" |
|
|
638 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(14)] <- "Doublets" |
|
|
639 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(15, 17, 20)] <- "NKT" |
|
|
640 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(3, 4, 7, 11, 13, 27)] <- "CD4.Naive" |
|
|
641 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(5, 16, 18, 23)] <- "PD1-.Tfh-like" |
|
|
642 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(1)] <- "PD1+.Tfh-like" |
|
|
643 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(6)] <- "CD4.CM" |
|
|
644 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(25)] <- "CD4.EM" |
|
|
645 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(8)] <- "Treg" |
|
|
646 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(12, 19, 21, 22, 24)] <- "gdT" |
|
|
647 |
exhaust.subcluster.merge$Sub.Annotation[exhaust.subcluster.merge$Sub.Cluster %in% c(26)] <- "NK" |
|
|
648 |
|
|
|
649 |
|
|
|
650 |
rownames(exhaust.subcluster.merge) <- exhaust.subcluster.merge$CellID |
|
|
651 |
table(exhaust.subcluster.merge$Sub.Annotation, exhaust.subcluster.merge$Sub.Cluster) |
|
|
652 |
``` |
|
|
653 |
|
|
|
654 |
Fill in the sub-annotations. |
|
|
655 |
|
|
|
656 |
```{r, warning=FALSE, message=FALSE} |
|
|
657 |
rownames(tcell.prot.merge) <- tcell.prot.merge$CellID |
|
|
658 |
tcell.prot.merge$Sub.Annotation <- tcell.prot.merge$Annotation |
|
|
659 |
tcell.prot.merge[rownames(tfh.subcluster.merge), ]$Sub.Annotation <- tfh.subcluster.merge$Sub.Annotation |
|
|
660 |
tcell.prot.merge[rownames(prolif.subcluster.merge), ]$Sub.Annotation <- prolif.subcluster.merge$Sub.Annotation |
|
|
661 |
tcell.prot.merge[rownames(exhaust.subcluster.merge), ]$Sub.Annotation <- exhaust.subcluster.merge$Sub.Annotation |
|
|
662 |
|
|
|
663 |
# If we use the mRNA, there are A LOT of DP & DN T cells. |
|
|
664 |
|
|
|
665 |
# re-classify CD4s within CD8 clusters |
|
|
666 |
tcell.prot.merge$Sub.Annotation[(tcell.prot.merge$CD8A > 0 | tcell.prot.merge$CD8B > 0) & |
|
|
667 |
tcell.prot.merge$CD4 <= 0 & |
|
|
668 |
tcell.prot.merge$CXCR5 <= 0 & |
|
|
669 |
tcell.prot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like")] <- "CD8.TE" |
|
|
670 |
|
|
|
671 |
# final doublet classification |
|
|
672 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$IGHA1 > 0 | tcell.prot.merge$IGHD > 0 | tcell.prot.merge$IGHE > 0 | tcell.prot.merge$IGHG1 > 0 | |
|
|
673 |
tcell.prot.merge$IGHG2 > 0 | tcell.prot.merge$IGHG3 > 0 | tcell.prot.merge$IGHG4 > 0] <- "Doublets:Bcell" |
|
|
674 |
|
|
|
675 |
# final NKT classification |
|
|
676 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$FCGR3A > 0 & tcell.prot.merge$NCAM1 > 0 & |
|
|
677 |
tcell.prot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT" |
|
|
678 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$FCGR3A > 0 & tcell.prot.merge$NCR1 > 0 & |
|
|
679 |
tcell.prot.merge$Sub.Annotation %in% c("CD8.EM", "CD8.TE", "CD8.Naive")] <- "NKT" |
|
|
680 |
|
|
|
681 |
# fix the Treg annotations |
|
|
682 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg") & |
|
|
683 |
!(tcell.prot.merge$FOXP3 > 0 & tcell.prot.merge$IKZF2 > 0)] <- "CD4.IL22" |
|
|
684 |
|
|
|
685 |
# Add in a Th17 phenotype |
|
|
686 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
687 |
(tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) & |
|
|
688 |
tcell.prot.merge$RORC > 0 & |
|
|
689 |
(tcell.prot.merge$IL17A > 0 | |
|
|
690 |
tcell.prot.merge$IL17F > 0 | tcell.prot.merge$IL21 > 0)] <- "CD4.Th17" |
|
|
691 |
|
|
|
692 |
# Add in a Th1 phenotype |
|
|
693 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
694 |
(tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) & |
|
|
695 |
(tcell.prot.merge$IL2 > 0 | tcell.prot.merge$TNF > 0 | |
|
|
696 |
tcell.prot.merge$LTA) & |
|
|
697 |
tcell.prot.merge$TBX21 > 0] <- "CD4.Th1" |
|
|
698 |
|
|
|
699 |
# Add in a Th2 phenotype |
|
|
700 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("Treg", "CD4.CM", "CD4.EM", "CD4.Naive", "CD4.IL22") & |
|
|
701 |
(tcell.prot.merge$CD8A <= 0 & tcell.prot.merge$CD8B <= 0) & |
|
|
702 |
tcell.prot.merge$GATA3 > 0 & |
|
|
703 |
(tcell.prot.merge$IL4 > 0 | tcell.prot.merge$IL5 > 0)] <- "CD4.Th2" |
|
|
704 |
|
|
|
705 |
# merge the Tfh annotation |
|
|
706 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$Sub.Annotation %in% c("PD1-.Tfh-like", "PD1+.Tfh-like")] <- "CD4.Tfh" |
|
|
707 |
|
|
|
708 |
# Add in the proliferating/activated/exhausted annotation |
|
|
709 |
active.tcells <- read.csv("~/Dropbox/COVID19/Data/prolif_CD4CD8.csv", |
|
|
710 |
header=FALSE, stringsAsFactors=FALSE) |
|
|
711 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD4")]] <- "CD4.Prolif" |
|
|
712 |
tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID %in% active.tcells$V1[active.tcells$V2 %in% c("prolif_CD8")]] <- "CD8.Prolif" |
|
|
713 |
|
|
|
714 |
table(tcell.prot.merge$Sub.Annotation, tcell.prot.merge$Site) |
|
|
715 |
``` |
|
|
716 |
|
|
|
717 |
|
|
|
718 |
|
|
|
719 |
```{r, warning=FALSE, message=FALSE} |
|
|
720 |
library(BiocNeighbors) |
|
|
721 |
tcell.pcs <- read.table("~/Dropbox/COVID19/Data/Tcells_MNN.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
722 |
tcell.pcs <- tcell.pcs[tcell.pcs$CellID %in% tcell.prot.merge$CellID, ] |
|
|
723 |
rownames(tcell.pcs) <- tcell.pcs$CellID |
|
|
724 |
``` |
|
|
725 |
|
|
|
726 |
|
|
|
727 |
```{r, message=FALSE, warning=FALSE} |
|
|
728 |
tcell.harmony <- read.csv("~/Dropbox/COVID19/Data/COVID_Harmony_PCA_OnlyT_embeddings.csv", |
|
|
729 |
header=TRUE, stringsAsFactors=FALSE, row.names=1) |
|
|
730 |
tcell.harmony <- tcell.harmony[rownames(tcell.harmony) %in% tcell.prot.merge$CellID, ] |
|
|
731 |
tcell.harmony$CellID <- rownames(tcell.harmony) |
|
|
732 |
|
|
|
733 |
write.table(tcell.harmony, |
|
|
734 |
file="~/Dropbox/COVID19/Data/COVID_Tcells_harmony.tsv", |
|
|
735 |
sep="\t", row.names=FALSE, quote=FALSE) |
|
|
736 |
``` |
|
|
737 |
|
|
|
738 |
|
|
|
739 |
```{r, warning=FALSE, message=FALSE} |
|
|
740 |
# # keep.tcells <- unique(tcell.prot.merge$CellID) |
|
|
741 |
# tcell.kNN <- findKNN(as.matrix(tcell.pcs[, 1:30]), k=7, get.distance=FALSE)$index |
|
|
742 |
# rownames(tcell.kNN) <- tcell.pcs$CellID |
|
|
743 |
``` |
|
|
744 |
|
|
|
745 |
|
|
|
746 |
```{r, warning=FALSE, message=FALSE} |
|
|
747 |
# tcell.harm.kNN <- findKNN(as.matrix(tcell.harmony[, 1:30]), k=7, get.distance=FALSE)$index |
|
|
748 |
# rownames(tcell.harm.kNN) <- rownames(tcell.harmony) |
|
|
749 |
``` |
|
|
750 |
|
|
|
751 |
|
|
|
752 |
```{r, warning=FALSE, message=FALSE} |
|
|
753 |
# keep.tcells <- intersect(rownames(tcell.harmony), |
|
|
754 |
# unique(tcell.prot.merge$CellID[tcell.prot.merge$Sub.Annotation %in% c("CD4.Tfh", "CD4.Th1", "CD4.Th2", "CD4.Th17")])) |
|
|
755 |
# |
|
|
756 |
# refined.harmony.annot.list <- list() |
|
|
757 |
# for(i in seq_along(keep.tcells)){ |
|
|
758 |
# i.cell <- keep.tcells[i] |
|
|
759 |
# i.type <- tcell.prot.merge$Sub.Annotation[tcell.prot.merge$CellID == i.cell] |
|
|
760 |
# |
|
|
761 |
# i.nn <- tcell.pcs$CellID[tcell.harm.kNN[i.cell, ]] |
|
|
762 |
# i.labels <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.nn, ]$Sub.Annotation |
|
|
763 |
# i.tab <- table(i.labels) |
|
|
764 |
# i.max <- max(i.tab) |
|
|
765 |
# i.max.type <- names(i.tab)[which(i.tab == i.max)] |
|
|
766 |
# |
|
|
767 |
# if((i.type == i.max.type) & (length(i.max.type) == 1)){ |
|
|
768 |
# refined.harmony.annot.list[[i.cell]] <- i.type |
|
|
769 |
# } else if((i.type != i.max.type) & (length(i.max.type) == 1)){ |
|
|
770 |
# refined.harmony.annot.list[[i.cell]] <- i.max.type |
|
|
771 |
# } else{ |
|
|
772 |
# # keep the same annotation if it's ambiguous |
|
|
773 |
# refined.harmony.annot.list[[i.cell]] <- i.type |
|
|
774 |
# } |
|
|
775 |
# |
|
|
776 |
# } |
|
|
777 |
# |
|
|
778 |
# refined.harmony.df <- data.frame("CellID"=names(refined.harmony.annot.list), "Refined.Harmony.Annotation"=unlist(refined.harmony.annot.list)) |
|
|
779 |
# refined.harmony.merge <- merge(refined.harmony.df, tcell.prot.merge, by='CellID') |
|
|
780 |
# |
|
|
781 |
# table(refined.harmony.merge$Refined.Harmony.Annotation, refined.harmony.merge$Sub.Annotation) |
|
|
782 |
``` |
|
|
783 |
|
|
|
784 |
|
|
|
785 |
```{r, warning=FALSE, message=FALSE} |
|
|
786 |
# refined.annot.list <- list() |
|
|
787 |
# for(i in seq_along(keep.tcells)){ |
|
|
788 |
# i.cell <- keep.tcells[i] |
|
|
789 |
# i.type <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.cell, ]$Sub.Annotation |
|
|
790 |
# |
|
|
791 |
# i.nn <- tcell.pcs$CellID[tcell.kNN[i.cell, ]] |
|
|
792 |
# i.labels <- tcell.prot.merge[tcell.prot.merge$CellID %in% i.nn, ]$Sub.Annotation |
|
|
793 |
# i.tab <- table(i.labels) |
|
|
794 |
# i.max <- max(i.tab) |
|
|
795 |
# i.max.type <- names(i.tab)[which(i.tab == i.max)] |
|
|
796 |
# |
|
|
797 |
# if((i.type == i.max.type) & (length(i.max.type) == 1)){ |
|
|
798 |
# refined.annot.list[[i.cell]] <- i.type |
|
|
799 |
# } else if((i.type != i.max.type) & (length(i.max.type) == 1)){ |
|
|
800 |
# refined.annot.list[[i.cell]] <- i.max.type |
|
|
801 |
# } else{ |
|
|
802 |
# # keep the same annotation if it's ambiguous |
|
|
803 |
# refined.annot.list[[i.cell]] <- i.type |
|
|
804 |
# } |
|
|
805 |
# |
|
|
806 |
# } |
|
|
807 |
# |
|
|
808 |
# refined.df <- data.frame("CellID"=names(refined.annot.list), "Refined.Annotation"=unlist(refined.annot.list)) |
|
|
809 |
# refined.merge <- merge(refined.df, tcell.prot.merge, by='CellID') |
|
|
810 |
# |
|
|
811 |
# table(refined.merge$Refined.Annotation, refined.merge$Sub.Annotation) |
|
|
812 |
``` |
|
|
813 |
|
|
|
814 |
|
|
|
815 |
```{r} |
|
|
816 |
# write out the annotations, without the doublets, NK cells and ILCs |
|
|
817 |
write.table(tcell.prot.merge[!tcell.prot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", |
|
|
818 |
"Doublets:Platelet", "Doublets:Bcell", "Doublets"), |
|
|
819 |
c("CellID", "Sub.Annotation")], |
|
|
820 |
file="~/Dropbox/COVID19/Data/Tcell_annotations_ext.tsv", |
|
|
821 |
sep="\t", quote=FALSE, row.names=FALSE) |
|
|
822 |
|
|
|
823 |
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("Doublets:Platelet", "Doublets:Bcell", "Doublets"), |
|
|
824 |
c("CellID")], |
|
|
825 |
file="~/Dropbox/COVID19/Data/Tcell_doublets_ext.tsv", |
|
|
826 |
sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) |
|
|
827 |
|
|
|
828 |
write.table(tcell.prot.merge[!tcell.prot.merge$Sub.Annotation %in% c("NK", "Prolif.NK", "ILCs", |
|
|
829 |
"Doublets:Platelet", "Doublets:Bcell", "Doublets"), |
|
|
830 |
c("CellID")], |
|
|
831 |
file="~/Dropbox/COVID19/Data/Tcell_barcodes.tsv", |
|
|
832 |
sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) |
|
|
833 |
|
|
|
834 |
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Th1", "CD4.Th2", "CD4.Th17", |
|
|
835 |
"Treg", "CD4.Prolif", "CD4.Tfh"), |
|
|
836 |
c("CellID")], |
|
|
837 |
file="~/Dropbox/COVID19/Data/Tcell-CD4_barcodes.tsv", |
|
|
838 |
sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) |
|
|
839 |
|
|
|
840 |
write.table(tcell.prot.merge[tcell.prot.merge$Sub.Annotation %in% c("CD8.Naive", "CD8.CM", "CD8.EM", "CD8.Prolif", "CD8.TE", "NKT", "MAIT", "gdT"), |
|
|
841 |
c("CellID")], |
|
|
842 |
file="~/Dropbox/COVID19/Data/Tcell-CD8_barcodes.tsv", |
|
|
843 |
sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) |
|
|
844 |
``` |
|
|
845 |
|
|
|
846 |
Plot the proportions of T cells, normalised properly. |
|
|
847 |
|
|
|
848 |
```{r} |
|
|
849 |
cell.freq.tab <- table(tcell.prot.merge$sample_id, |
|
|
850 |
tcell.prot.merge$Sub.Annotation) |
|
|
851 |
|
|
|
852 |
cell.freq.df <- as.data.frame(sapply(rownames(cell.freq.tab), FUN=function(X) cell.freq.tab[X, ]/n.cell.vecc[X])) |
|
|
853 |
cell.freq.df$CellType <- rownames(cell.freq.df) |
|
|
854 |
cell.freq.df <- melt(cell.freq.df, id.vars=c("CellType")) |
|
|
855 |
cell.freq.df$CellType <- as.character(cell.freq.df$CellType) |
|
|
856 |
colnames(cell.freq.df) <- c("CellType", "sample_id", "Freq") |
|
|
857 |
cell.freq.df$Sex <- "Female" |
|
|
858 |
cell.freq.df$Sex[cell.freq.df$sample_id %in% unique(tcell.prot.merge$sample_id[tcell.prot.merge$Sex %in% c("M", "Male")])] <- "Male" |
|
|
859 |
|
|
|
860 |
covid.meta <- read.csv("~/Dropbox/COVID19/Data/Metadata FINAL 10122020.csv", |
|
|
861 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
862 |
|
|
|
863 |
cell.freq.merge <- merge(cell.freq.df, covid.meta, by=c('sample_id', 'Sex')) |
|
|
864 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$D0_status_summary %in% c("Non_covid"), ] |
|
|
865 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$patient_id %in% c("CV0198"), ] |
|
|
866 |
# remove doublets, etc |
|
|
867 |
cell.freq.merge <- cell.freq.merge[!cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets", "NK", "ILCs"), ] |
|
|
868 |
``` |
|
|
869 |
|
|
|
870 |
|
|
|
871 |
```{r, warning=FALSE, message=FALSE, fig.height=2.95, fig.width=9.15} |
|
|
872 |
group.cols <- c("#2ca02c", "#1f77b4", "#9467bd", "#fed976", "#fd8d3c", "#e31a1c", "#800026", "#252525") |
|
|
873 |
names(group.cols) <- c("Healthy", "LPS", "Non_covid", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical") |
|
|
874 |
|
|
|
875 |
cell.freq.merge$D0_status_summary <- factor(cell.freq.merge$D0_status_summary, |
|
|
876 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
877 |
"Moderate", "Severe", "Critical", "LPS", "Non_covid")) |
|
|
878 |
|
|
|
879 |
# ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
880 |
# cell.freq.merge$CellType %in% c("Doublets:Bcell", "Doublets:Platelet", "Doublets"), ], |
|
|
881 |
# aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) + |
|
|
882 |
# geom_boxplot() + |
|
|
883 |
# theme_cowplot() + |
|
|
884 |
# facet_wrap(~CellType, scales="free_y") + |
|
|
885 |
# scale_fill_manual(values=group.cols) + |
|
|
886 |
# expand_limits(y=c(0)) + |
|
|
887 |
# theme(axis.text.x=element_blank(), |
|
|
888 |
# strip.background=element_rect(fill='white', colour='white'), |
|
|
889 |
# strip.text=element_text(size=14)) + |
|
|
890 |
# labs(x="Severity Group", y="Proportion") + |
|
|
891 |
# guides(fill=guide_legend(title="Severity")) + |
|
|
892 |
# ggsave("~/Dropbox/COVID19/plot.dir/Tcell-doublets_proportions-boxplot.pdf", |
|
|
893 |
# height=2.95, width=9.15, useDingbats=FALSE) + |
|
|
894 |
# NULL |
|
|
895 |
``` |
|
|
896 |
|
|
|
897 |
|
|
|
898 |
```{r} |
|
|
899 |
write.table(cell.freq.merge, |
|
|
900 |
file="~/Dropbox/COVID19/Data/Tcell_proportions.tsv", |
|
|
901 |
sep="\t", quote=FALSE, row.names=FALSE) |
|
|
902 |
``` |
|
|
903 |
|
|
|
904 |
|
|
|
905 |
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.15} |
|
|
906 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
907 |
!cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets"), ], |
|
|
908 |
aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) + |
|
|
909 |
geom_boxplot() + |
|
|
910 |
theme_cowplot() + |
|
|
911 |
facet_wrap(~CellType, scales="free_y") + |
|
|
912 |
scale_fill_manual(values=group.cols) + |
|
|
913 |
expand_limits(y=c(0)) + |
|
|
914 |
theme(axis.text.x=element_blank(), |
|
|
915 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
916 |
strip.text=element_text(size=14)) + |
|
|
917 |
labs(x="Severity Group", y="Proportion") + |
|
|
918 |
guides(fill=guide_legend(title="Severity")) + |
|
|
919 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions-boxplot.pdf", |
|
|
920 |
height=5.95, width=11.15, useDingbats=FALSE) + |
|
|
921 |
NULL |
|
|
922 |
``` |
|
|
923 |
|
|
|
924 |
For the figures these need to be a filled barchart (?!). Focus in on the Tfh-like cells. |
|
|
925 |
|
|
|
926 |
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55} |
|
|
927 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
928 |
cell.freq.merge$CellType %in% c("CD4.Tfh"), ], |
|
|
929 |
aes(x=D0_status_summary, y=Freq, fill=D0_status_summary)) + |
|
|
930 |
geom_boxplot() + |
|
|
931 |
theme_cowplot() + |
|
|
932 |
facet_wrap(~CellType, scales="free_y") + |
|
|
933 |
scale_fill_manual(values=group.cols) + |
|
|
934 |
expand_limits(y=c(0)) + |
|
|
935 |
theme(axis.text.x=element_blank(), |
|
|
936 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
937 |
strip.text=element_text(size=14)) + |
|
|
938 |
labs(x="Severity Group", y="Proportion") + |
|
|
939 |
guides(fill=guide_legend(title="Severity")) + |
|
|
940 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions_Tfh-boxplot.pdf", |
|
|
941 |
height=2.15, width=3.55, useDingbats=FALSE) + |
|
|
942 |
NULL |
|
|
943 |
``` |
|
|
944 |
|
|
|
945 |
Run a LM comparing categories. |
|
|
946 |
|
|
|
947 |
```{r, warning=FALSE, message=FALSE, fig.height=2.15, fig.width=3.55} |
|
|
948 |
ggplot(cell.freq.merge[cell.freq.merge$Collection_Day %in% c("D0") & |
|
|
949 |
cell.freq.merge$CellType %in% c("CD4.Tfh"), ], |
|
|
950 |
aes(x=D0_status_summary, y=Freq, fill=Sex)) + |
|
|
951 |
geom_boxplot() + |
|
|
952 |
theme_cowplot() + |
|
|
953 |
facet_wrap(~CellType, scales="free_y") + |
|
|
954 |
scale_fill_colorblind() + |
|
|
955 |
expand_limits(y=c(0)) + |
|
|
956 |
theme(axis.text.x=element_blank(), |
|
|
957 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
958 |
strip.text=element_text(size=14)) + |
|
|
959 |
labs(x="Severity Group", y="Proportion") + |
|
|
960 |
guides(fill=guide_legend(title="Severity")) + |
|
|
961 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions_Tfh-byGender_boxplot.pdf", |
|
|
962 |
height=2.15, width=3.55, useDingbats=FALSE) + |
|
|
963 |
NULL |
|
|
964 |
``` |
|
|
965 |
|
|
|
966 |
Compare healthy vs. asymptomatic, and asymptomatic > critical. |
|
|
967 |
|
|
|
968 |
```{r, warning=FALSE, message=FALSE} |
|
|
969 |
healthy.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") & |
|
|
970 |
cell.freq.merge$D0_status_summary %in% c("Healthy") & |
|
|
971 |
cell.freq.merge$Collection_Day %in% c("D0")] |
|
|
972 |
asymp.tfh <- cell.freq.merge$Freq[cell.freq.merge$CellType %in% c("CD4.Tfh") & |
|
|
973 |
cell.freq.merge$D0_status_summary %in% c("Asymptomatic") & |
|
|
974 |
cell.freq.merge$Collection_Day %in% c("D0")] |
|
|
975 |
|
|
|
976 |
tfh.utest <- wilcox.test(x=healthy.tfh, y=asymp.tfh, alternative="two.sided") |
|
|
977 |
tfh.utest |
|
|
978 |
``` |
|
|
979 |
|
|
|
980 |
I should model the counts, normalized by the total numbers of captured cells. |
|
|
981 |
|
|
|
982 |
```{r} |
|
|
983 |
tfh.lm.list <- list() |
|
|
984 |
covid.tfh.merge <- cell.freq.merge[!cell.freq.merge$D0_status_summary %in% c("Healthy"), ] |
|
|
985 |
covid.tfh.merge$OrderedStatus <- ordered(covid.tfh.merge$D0_status_summary, |
|
|
986 |
levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical")) |
|
|
987 |
covid.tfh.merge$Days_from_onset <- as.numeric(covid.tfh.merge$Days_from_onset) |
|
|
988 |
|
|
|
989 |
c.types <- c("CD4.Tfh") |
|
|
990 |
|
|
|
991 |
for(i in seq_along(c.types)){ |
|
|
992 |
i.c <- c.types[i] |
|
|
993 |
|
|
|
994 |
i.lm.df <- data.frame("sample_id"=rownames(cell.freq.tab), "Count"=cell.freq.tab[, i.c]) |
|
|
995 |
i.lm.df <- merge(i.lm.df, covid.tfh.merge[covid.tfh.merge$CellType %in% i.c, ], by='sample_id') |
|
|
996 |
|
|
|
997 |
# i.tfh.lm <- glm(Count ~ Sex + Age + Site + OrderedStatus, |
|
|
998 |
# data=i.lm.df, family=poisson(link="log")) |
|
|
999 |
i.tfh.lm <- glm.nb(Count ~ Sex + Age + Site + OrderedStatus, data=i.lm.df, init.theta=1) |
|
|
1000 |
|
|
|
1001 |
i.lm.res <- summary(i.tfh.lm) |
|
|
1002 |
tfh.lm.list[[i.c]] <- data.frame("CellType"=rep(i.c, nrow(i.lm.res$coefficients)), |
|
|
1003 |
"Beta"=i.lm.res$coefficients[, 1], |
|
|
1004 |
"SE"=i.lm.res$coefficients[, 2], |
|
|
1005 |
"STAT"=i.lm.res$coefficients[, 3], |
|
|
1006 |
"Pvalue"=i.lm.res$coefficients[, 4], |
|
|
1007 |
"Predictor"=rownames(i.lm.res$coefficients)) |
|
|
1008 |
} |
|
|
1009 |
|
|
|
1010 |
tfh.lm.df <- do.call(rbind.data.frame, tfh.lm.list) |
|
|
1011 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="OrderedStatus", replacement="Severity") |
|
|
1012 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Male", replacement="") |
|
|
1013 |
tfh.lm.df$Predictor <- gsub(tfh.lm.df$Predictor, pattern="Site", replacement="") |
|
|
1014 |
tfh.lm.df <- tfh.lm.df[!tfh.lm.df$Predictor %in% c("(Intercept)"), ] |
|
|
1015 |
tfh.lm.df$P.Adjust <- p.adjust(tfh.lm.df$Pvalue) |
|
|
1016 |
``` |
|
|
1017 |
|
|
|
1018 |
|
|
|
1019 |
```{r, warning=FALSE, message=FALSE, fig.height=3.95, fig.width=4.95} |
|
|
1020 |
ggplot(tfh.lm.df, aes(x=Predictor, y=Beta, fill=Pvalue < 0.05)) + |
|
|
1021 |
geom_hline(yintercept=0, lty=2, col='grey70') + |
|
|
1022 |
geom_errorbar(aes(ymin=Beta-SE, ymax=Beta+SE)) + |
|
|
1023 |
geom_point(shape=22, size=3) + |
|
|
1024 |
theme_cowplot() + |
|
|
1025 |
theme(strip.background=element_rect(fill='white', colour='white'), |
|
|
1026 |
strip.text=element_text(size=14)) + |
|
|
1027 |
coord_flip() + |
|
|
1028 |
scale_fill_manual(values=c("blue", "orange")) + |
|
|
1029 |
facet_wrap(~CellType, scales="free_x") + |
|
|
1030 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_Tfh_props-LM.pdf", |
|
|
1031 |
height=3.95, width=4.95, useDingbats=FALSE) |
|
|
1032 |
``` |
|
|
1033 |
|
|
|
1034 |
|
|
|
1035 |
```{r, warning=FALSE, message=FALSE} |
|
|
1036 |
cellfreq.df <- cell.freq.merge %>% group_by(D0_status_summary, CellType) %>% summarise(Mean = mean(Freq)) |
|
|
1037 |
cellfreq.df$D0_status_summary <- factor(cellfreq.df$D0_status_summary, |
|
|
1038 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
1039 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
1040 |
``` |
|
|
1041 |
|
|
|
1042 |
Create a new colour palette. |
|
|
1043 |
|
|
|
1044 |
```{r} |
|
|
1045 |
cell.order <- c("CD4.Naive", "CD4.CM", "CD4.EM", "CD4.IL22", "CD4.Prolif", "CD4.Th1", "CD4.Th2", "CD4.Th17", "CD4.Tfh", |
|
|
1046 |
"Treg", "CD8.Naive", "CD8.Activated", "CD8.Prolif", "CD8.CM", "CD8.TE", "CD8.EM", "gdT", "MAIT", "NKT") |
|
|
1047 |
|
|
|
1048 |
all.qual.cols <- brewer.pal.info[brewer.pal.info$category %in% c("qual") & brewer.pal.info$colorblind, ] |
|
|
1049 |
col_vector = unlist(mapply(brewer.pal, all.qual.cols$maxcolors, rownames(all.qual.cols))) |
|
|
1050 |
|
|
|
1051 |
# cell.cols <- col_vector[c(1:7, 9:19)] |
|
|
1052 |
cell.cols <- col_vector[c(1:17, 19, 20)] |
|
|
1053 |
names(cell.cols) <- cell.order |
|
|
1054 |
``` |
|
|
1055 |
|
|
|
1056 |
|
|
|
1057 |
```{r} |
|
|
1058 |
pie(rep(1, 18), col=cell.cols) |
|
|
1059 |
``` |
|
|
1060 |
|
|
|
1061 |
|
|
|
1062 |
```{r, warning=FALSE, message=FALSE, fig.height=4.25, fig.width=6.95} |
|
|
1063 |
# n.cells <- length(cell.order) |
|
|
1064 |
# cell.cols <- colorRampPalette(pal_futurama()(12))(n.cells) |
|
|
1065 |
# names(cell.cols) <- cell.order |
|
|
1066 |
|
|
|
1067 |
cellfreq.df$CellType <- factor(cellfreq.df$CellType, |
|
|
1068 |
levels=cell.order) |
|
|
1069 |
|
|
|
1070 |
ggplot(cellfreq.df, |
|
|
1071 |
aes(x=D0_status_summary, y = Mean, fill=CellType))+ |
|
|
1072 |
geom_bar(position="fill", stat="identity", width = 0.9, colour = "black")+ |
|
|
1073 |
scale_fill_manual(values = cell.cols) + |
|
|
1074 |
theme(aspect.ratio = 1/1.5, |
|
|
1075 |
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 14, colour = "black"), |
|
|
1076 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1077 |
axis.text.y = element_text(size = 14, colour = "black"), |
|
|
1078 |
legend.key.size=unit(0.4, "cm"), |
|
|
1079 |
axis.title.x=element_blank(), axis.title.y=element_blank(), |
|
|
1080 |
legend.text = element_text(size = 14), legend.title=element_text(size=14), |
|
|
1081 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1082 |
guides(fill=guide_legend(title="Cell type", ncol=1)) + |
|
|
1083 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_barchart.pdf", |
|
|
1084 |
height=4.25, width=6.95, useDingbats=FALSE) |
|
|
1085 |
``` |
|
|
1086 |
|
|
|
1087 |
I have a misgiving about this, as it masks the distribution of the proportions - maybe a series of filled boxplots, e.g. a waterfall? |
|
|
1088 |
|
|
|
1089 |
```{r, warning=FALSE, message=FALSE, fig.height=5.25, fig.width=12.95} |
|
|
1090 |
cell.freq.merge$CellType <- factor(cell.freq.merge$CellType, |
|
|
1091 |
levels=cell.order) |
|
|
1092 |
|
|
|
1093 |
ggplot(cell.freq.merge, |
|
|
1094 |
aes(x=sample_id, y = Freq, fill=CellType))+ |
|
|
1095 |
geom_bar(position="fill", stat="identity", width = 0.8, colour = "black")+ |
|
|
1096 |
scale_fill_manual(values = cell.cols) + |
|
|
1097 |
facet_grid(. ~ D0_status_summary, scales="free_x", space="free") + |
|
|
1098 |
theme(axis.text.x = element_blank(), |
|
|
1099 |
panel.spacing=unit(0, "lines"), |
|
|
1100 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1101 |
axis.text.y = element_text(size = 18), |
|
|
1102 |
axis.title.x=element_blank(), axis.title.y=element_blank(), |
|
|
1103 |
axis.ticks.x=element_blank(), |
|
|
1104 |
strip.background=element_rect(fill='white', colour='white'), |
|
|
1105 |
strip.text=element_text(size=14, angle=90, colour='black'), |
|
|
1106 |
legend.text = element_text(size = 18), legend.title=element_text(size=18), |
|
|
1107 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1108 |
# guides(fill=guide_legend(title="Cell type")) + |
|
|
1109 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_barchart-full.pdf", |
|
|
1110 |
height=5.25, width=8.95, useDingbats=FALSE) + |
|
|
1111 |
NULL |
|
|
1112 |
``` |
|
|
1113 |
|
|
|
1114 |
Make dot plots. |
|
|
1115 |
|
|
|
1116 |
```{r, warning=FALSE, message=FALSE} |
|
|
1117 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=tcell.prot.merge) |
|
|
1118 |
rownames(clust.model.matrix) <- tcell.prot.merge$CellID |
|
|
1119 |
|
|
|
1120 |
gene.goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))]) |
|
|
1121 |
gene.ave.by.label <- as.data.frame(apply(gene.goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
1122 |
rownames(gene.ave.by.label) <- gsub(rownames(gene.ave.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1123 |
|
|
|
1124 |
gene.goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, setdiff(colnames(goi.df), colnames(tcell.merge))] > 0) |
|
|
1125 |
gene.bin.by.label <- as.data.frame(apply(gene.goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
1126 |
gene.bin.by.label[is.na(gene.bin.by.label)] <- 0 |
|
|
1127 |
rownames(gene.bin.by.label) <- gsub(rownames(gene.bin.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1128 |
``` |
|
|
1129 |
|
|
|
1130 |
|
|
|
1131 |
```{r, message=FALSE, warning=FALSE} |
|
|
1132 |
gene.ave.label.df <- as.data.frame(apply(gene.ave.by.label, 2, FUN=function(XP) XP/max(XP))) |
|
|
1133 |
gene.ave.label.df$Sub.Annotation <- rownames(gene.ave.label.df) |
|
|
1134 |
gene.ave.label.melt <- melt(gene.ave.label.df, id.vars='Sub.Annotation') |
|
|
1135 |
colnames(gene.ave.label.melt) <- c("Sub.Annotation", "Gene", "AveExprs") |
|
|
1136 |
|
|
|
1137 |
gene.bin.label.df <- gene.bin.by.label |
|
|
1138 |
gene.bin.label.df$Sub.Annotation <- rownames(gene.bin.label.df) |
|
|
1139 |
gene.bin.label.melt <- melt(gene.bin.label.df, id.vars='Sub.Annotation') |
|
|
1140 |
colnames(gene.bin.label.melt) <- c("Sub.Annotation", "Gene", "PropExprs") |
|
|
1141 |
``` |
|
|
1142 |
|
|
|
1143 |
|
|
|
1144 |
```{r, message=FALSE, warning=FALSE, fig.height=4.65, fig.width=8.95} |
|
|
1145 |
dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene')) |
|
|
1146 |
dot.plot.df$Gene <- as.character(dot.plot.df$Gene) |
|
|
1147 |
|
|
|
1148 |
dot.plot.genes <- c("CD3G", "CD8A", "CD4", "CCR7", "CD27", "CD28", "TOX", "CTLA4", "IFNG", "TBX21", |
|
|
1149 |
"PDCD1", "GZMB", "RORC", "IKZF2", "GATA3", "TNFRSF9", "IL7R", "KLRG1", |
|
|
1150 |
"TRAV1.2", "TRDV2", "TRGV9", "CD40LG", "RORC", "NCAM1", "CCR6", |
|
|
1151 |
"CXCR5", "HLA.DRB1", "LAG3", "FOXP3", "MKI67", "NCR1", "GARP", "FCGR3A") |
|
|
1152 |
gene.cluster <- hclust(dist(t(gene.ave.by.label[, intersect(colnames(gene.ave.by.label), dot.plot.genes)])))$order |
|
|
1153 |
|
|
|
1154 |
# manually set the gene order |
|
|
1155 |
gene.order <- c("CD3G", "CCR7", "CD27", "CD4", "CD28", "TNFRSF9", "CCR6", "CD40LG", "MKI67", "TBX21", "GATA3", "RORC", "IKZF2", |
|
|
1156 |
"FOXP3", "CTLA4", "PDCD1", "CXCR5", "CD8A", "GZMB", "HLA.DRB1", "IFNG", "LAG3", "TOX", "IL7R", "KLRG1", "TRDV2", |
|
|
1157 |
"TRGV9", "TRAV1.2", "NCAM1", "NCR1", "FCGR3A") |
|
|
1158 |
|
|
|
1159 |
dot.plot.df <- dot.plot.df[dot.plot.df$Gene %in% dot.plot.genes, ] |
|
|
1160 |
dot.plot.df$Gene <- factor(dot.plot.df$Gene, |
|
|
1161 |
levels=gene.order) |
|
|
1162 |
|
|
|
1163 |
dot.plot.df <- dot.plot.df[dot.plot.df$Sub.Annotation %in% cell.order, ] |
|
|
1164 |
dot.plot.df$Sub.Annotation <- factor(dot.plot.df$Sub.Annotation, |
|
|
1165 |
levels=rev(cell.order)) |
|
|
1166 |
|
|
|
1167 |
ggplot(dot.plot.df, |
|
|
1168 |
aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
1169 |
geom_point() + |
|
|
1170 |
theme_cowplot() + |
|
|
1171 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
1172 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
1173 |
axis.text.y=element_text(colour='black', size=14), |
|
|
1174 |
legend.text=element_text(size=14), |
|
|
1175 |
legend.title=element_text(size=14), |
|
|
1176 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1177 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1178 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
1179 |
size=guide_legend(title='% Express')) + |
|
|
1180 |
labs(x="Gene", y="Cell type") + |
|
|
1181 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-genes.pdf", |
|
|
1182 |
height=4.65, width=8.95, useDingbats=FALSE) |
|
|
1183 |
``` |
|
|
1184 |
|
|
|
1185 |
Plot the cytokine profiles to figure out if Th1/2/17 dominate (or all of them). |
|
|
1186 |
|
|
|
1187 |
```{r, message=FALSE, warning=FALSE, fig.height=4.15, fig.width=7.0} |
|
|
1188 |
cytokine.dot.plot.df <- merge(gene.ave.label.melt, gene.bin.label.melt, by=c('Sub.Annotation', 'Gene')) |
|
|
1189 |
cytokine.dot.plot.df$Gene <- as.character(cytokine.dot.plot.df$Gene) |
|
|
1190 |
|
|
|
1191 |
cytokine.genes <- intersect(c("IL2", "IL1A", "IL1B", "IL10", "IL22", "TNF", "IFNG", "LTA", |
|
|
1192 |
"IL4", "IL5", "IL13", "IL17A", "IL17F", "IL21", |
|
|
1193 |
"IL12A", "IL12B", "IL25"), colnames(tcell.prot.merge)) |
|
|
1194 |
|
|
|
1195 |
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Gene %in% cytokine.genes, ] |
|
|
1196 |
cytokine.dot.plot.df$Gene <- factor(cytokine.dot.plot.df$Gene, |
|
|
1197 |
levels=cytokine.genes) |
|
|
1198 |
|
|
|
1199 |
cytokine.dot.plot.df <- cytokine.dot.plot.df[cytokine.dot.plot.df$Sub.Annotation %in% cell.order, ] |
|
|
1200 |
cytokine.dot.plot.df$Sub.Annotation <- factor(cytokine.dot.plot.df$Sub.Annotation, |
|
|
1201 |
levels=rev(cell.order)) |
|
|
1202 |
|
|
|
1203 |
ggplot(cytokine.dot.plot.df, |
|
|
1204 |
aes(x=Gene, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
1205 |
geom_point() + |
|
|
1206 |
theme_cowplot() + |
|
|
1207 |
scale_colour_gradient(low='grey70', high='blue') + |
|
|
1208 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
1209 |
axis.text.y=element_text(colour='black', size=14), |
|
|
1210 |
legend.text=element_text(size=14), |
|
|
1211 |
legend.title=element_text(size=14), |
|
|
1212 |
legend.key.size=unit(0.4, "cm"), |
|
|
1213 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1214 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1215 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
1216 |
size=guide_legend(title='% Express')) + |
|
|
1217 |
labs(x="Cytokine", y="Cell type") + |
|
|
1218 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-cytokines.pdf", |
|
|
1219 |
height=4.65, width=7.0, useDingbats=FALSE) |
|
|
1220 |
``` |
|
|
1221 |
|
|
|
1222 |
I want to see how these cytokines change over the course of severity. |
|
|
1223 |
|
|
|
1224 |
```{r, warning=FALSE, message=FALSE} |
|
|
1225 |
cytokine.melt <- melt(tcell.prot.merge[, c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID", |
|
|
1226 |
cytokine.genes)], |
|
|
1227 |
variable.name="Cytokine", |
|
|
1228 |
id.vars=c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID")) |
|
|
1229 |
cytokine.means <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("Mean.Exprs"=mean(value)) |
|
|
1230 |
cytokine.var <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("Var.Exprs"=var(value)) |
|
|
1231 |
cytokine.se <- cytokine.melt %>% group_by(D0_status_summary, Sub.Annotation, Cytokine) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0))) |
|
|
1232 |
|
|
|
1233 |
cytokine.df <- Reduce(x=list("mean"=cytokine.means, "var"=cytokine.var, "se"=cytokine.se), |
|
|
1234 |
f=function(x, y) merge(x, y, by=c('D0_status_summary', 'Sub.Annotation', 'Cytokine'))) |
|
|
1235 |
# cytokine.df <- merge(cytokine.df, covid.meta, by='sample_id') |
|
|
1236 |
|
|
|
1237 |
# remove superflous or malignant samples |
|
|
1238 |
# cytokine.df <- cytokine.df[!cytokine.df$patient_id %in% c("CV0198"), ] |
|
|
1239 |
# cytokine.df <- cytokine.df[!cytokine.df$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
1240 |
# cytokine.df <- cytokine.df[!cytokine.df$D0_status_summary %in% c("Non_Covid"), ] |
|
|
1241 |
|
|
|
1242 |
cytokine.df$D0_satus_summary <- factor(cytokine.df$D0_status_summary, |
|
|
1243 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
1244 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
1245 |
``` |
|
|
1246 |
|
|
|
1247 |
|
|
|
1248 |
Maybe visualise this as a heatmap? |
|
|
1249 |
|
|
|
1250 |
```{r, fig.height=7.15, fig.width=7.95} |
|
|
1251 |
cytokine.df <- cytokine.df[!cytokine.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") & |
|
|
1252 |
!is.na(cytokine.df$D0_satus_summary), ] |
|
|
1253 |
|
|
|
1254 |
ggplot(cytokine.df[!cytokine.df$Cytokine %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F", "IL10", "IL6"), ], |
|
|
1255 |
aes(x=D0_satus_summary, y=Cytokine, fill=Mean.Exprs)) + |
|
|
1256 |
geom_tile() + |
|
|
1257 |
facet_wrap(~Sub.Annotation) + |
|
|
1258 |
scale_fill_viridis(option="magma") + |
|
|
1259 |
theme_cowplot() + |
|
|
1260 |
guides(fill=guide_colourbar(title="Mean Expression")) + |
|
|
1261 |
labs(y="Cytokine Gene", x="Severity") + |
|
|
1262 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), |
|
|
1263 |
strip.background=element_rect(fill='white', colour='white')) + |
|
|
1264 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_cytokines_heatmap.pdf", |
|
|
1265 |
height=7.15, width=7.95, useDingbats=FALSE) + |
|
|
1266 |
NULL |
|
|
1267 |
``` |
|
|
1268 |
|
|
|
1269 |
|
|
|
1270 |
What about exhaustion markers? |
|
|
1271 |
|
|
|
1272 |
|
|
|
1273 |
```{r, warning=FALSE, message=FALSE} |
|
|
1274 |
exhaustion.genes <- c("TOX", "TIGIT", "LAG3", "HAVCR2", "PDCD1", "FOXP3", "MKI67") |
|
|
1275 |
exhaustion.melt <- melt(tcell.prot.merge[, c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID", |
|
|
1276 |
exhaustion.genes)], |
|
|
1277 |
variable.name="Gene", |
|
|
1278 |
id.vars=c("sample_id", "D0_status_summary", "Sub.Annotation", "Collection_Day", "Sex", "Age", "CellID")) |
|
|
1279 |
exhaustion.means <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("Mean.Exprs"=mean(value)) |
|
|
1280 |
exhaustion.var <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("Var.Exprs"=var(value)) |
|
|
1281 |
exhaustion.se <- exhaustion.melt %>% group_by(D0_status_summary, Sub.Annotation, Gene) %>% summarise("SE.Exprs"=sqrt(var(value))/sqrt(sum(value >= 0))) |
|
|
1282 |
|
|
|
1283 |
exhaustion.df <- Reduce(x=list("mean"=exhaustion.means, "var"=exhaustion.var, "se"=exhaustion.se), |
|
|
1284 |
f=function(x, y) merge(x, y, by=c('D0_status_summary', 'Sub.Annotation', 'Gene'))) |
|
|
1285 |
# exhaustion.df <- merge(exhaustion.df, covid.meta, by='sample_id') |
|
|
1286 |
|
|
|
1287 |
# remove superflous or malignant samples |
|
|
1288 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$patient_id %in% c("CV0198"), ] |
|
|
1289 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$sample_id %in% c("BGCV01_CV0902"), ] |
|
|
1290 |
# exhaustion.df <- exhaustion.df[!exhaustion.df$D0_status_summary %in% c("Non_Covid"), ] |
|
|
1291 |
|
|
|
1292 |
exhaustion.df$D0_satus_summary <- factor(exhaustion.df$D0_status_summary, |
|
|
1293 |
levels=c("Healthy", "Asymptomatic", "Mild", |
|
|
1294 |
"Moderate", "Severe", "Critical", "LPS")) |
|
|
1295 |
``` |
|
|
1296 |
|
|
|
1297 |
|
|
|
1298 |
```{r, fig.height=7.15, fig.width=7.95} |
|
|
1299 |
exhaustion.df <- exhaustion.df[!exhaustion.df$Sub.Annotation %in% c("NK", "ILCs", "Doublets:Bcell", "Doublets", "Doublets:Platelet") & |
|
|
1300 |
!is.na(exhaustion.df$D0_satus_summary), ] |
|
|
1301 |
|
|
|
1302 |
ggplot(exhaustion.df[!exhaustion.df$Gene %in% c("IL13", "IL22", "IL12A", "IL17A", "IL17F"), ], |
|
|
1303 |
aes(x=D0_satus_summary, y=Gene, fill=Mean.Exprs)) + |
|
|
1304 |
geom_tile() + |
|
|
1305 |
facet_wrap(~Sub.Annotation) + |
|
|
1306 |
scale_fill_viridis(option="magma") + |
|
|
1307 |
theme_cowplot() + |
|
|
1308 |
guides(fill=guide_colourbar(title="Mean Expression")) + |
|
|
1309 |
labs(x="Severity", "Gene") + |
|
|
1310 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), |
|
|
1311 |
strip.background=element_rect(fill='white', colour='white')) + |
|
|
1312 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_exhaustion_heatmap.pdf", |
|
|
1313 |
height=7.15, width=7.95, useDingbats=FALSE) + |
|
|
1314 |
NULL |
|
|
1315 |
``` |
|
|
1316 |
|
|
|
1317 |
Do the same for the proteins. |
|
|
1318 |
|
|
|
1319 |
```{r, warning=FALSE, message=FALSE} |
|
|
1320 |
clust.model.matrix <- model.matrix(~ 0 + Sub.Annotation, data=tcell.prot.merge) |
|
|
1321 |
rownames(clust.model.matrix) <- tcell.prot.merge$CellID |
|
|
1322 |
|
|
|
1323 |
goi.by.cluster <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
1324 |
pattern="\\.x", replacement="")]) |
|
|
1325 |
ave.by.label <- as.data.frame(apply(goi.by.cluster, 2, function(X) X/colSums(clust.model.matrix))) |
|
|
1326 |
rownames(ave.by.label) <- gsub(rownames(ave.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1327 |
colnames(ave.by.label) <- gsub(colnames(ave.by.label), pattern="Prot\\.", replacement="") |
|
|
1328 |
|
|
|
1329 |
goi.by.cluster.bin <- t(clust.model.matrix) %*% as.matrix(tcell.prot.merge[, gsub(setdiff(colnames(prot.df), colnames(tcell.merge)), |
|
|
1330 |
pattern="\\.x", replacement="")] > 0.01) |
|
|
1331 |
bin.by.label <- as.data.frame(apply(goi.by.cluster.bin, 2, function(X) X/sum(X))) |
|
|
1332 |
bin.by.label[is.na(bin.by.label)] <- 0 |
|
|
1333 |
rownames(bin.by.label) <- gsub(rownames(bin.by.label), pattern="Sub\\.Annotation", replacement="") |
|
|
1334 |
colnames(bin.by.label) <- gsub(colnames(bin.by.label), pattern="Prot\\.", replacement="") |
|
|
1335 |
``` |
|
|
1336 |
|
|
|
1337 |
|
|
|
1338 |
```{r, message=FALSE, warning=FALSE} |
|
|
1339 |
ave.label.df <- as.data.frame(apply(ave.by.label, 2, FUN=function(XP) (XP-min(XP))/max(XP-min(XP)))) |
|
|
1340 |
ave.label.df$Sub.Annotation <- rownames(ave.label.df) |
|
|
1341 |
ave.label.melt <- melt(ave.label.df, id.vars='Sub.Annotation') |
|
|
1342 |
colnames(ave.label.melt) <- c("Sub.Annotation", "Protein", "AveExprs") |
|
|
1343 |
|
|
|
1344 |
bin.label.df <- bin.by.label |
|
|
1345 |
bin.label.df$Sub.Annotation <- rownames(bin.label.df) |
|
|
1346 |
bin.label.melt <- melt(bin.label.df, id.vars='Sub.Annotation') |
|
|
1347 |
colnames(bin.label.melt) <- c("Sub.Annotation", "Protein", "PropExprs") |
|
|
1348 |
``` |
|
|
1349 |
|
|
|
1350 |
|
|
|
1351 |
```{r, message=FALSE, warning=FALSE, fig.height=5.15, fig.width=8.95} |
|
|
1352 |
protein.dot.plot.df <- merge(ave.label.melt, bin.label.melt, by=c('Sub.Annotation', 'Protein')) |
|
|
1353 |
protein.dot.plot.df$Protein <- as.character(protein.dot.plot.df$Protein) |
|
|
1354 |
|
|
|
1355 |
dot.plot.prots <- c("CD3", "CD8", "CD4", "CCR7", "CD45RA", "CD45RO", "CD28", "CD27", "CTLA4", "CD56", "CD16", "ICOSL", |
|
|
1356 |
"CD274", "CXCR3", "CD40LG", "CD38", "ICOS", "PD1", "CD25", "LAMP1", "CXCR3", "CCR6", "IL7R", |
|
|
1357 |
"CXCR5", "TCR_Va7.2", "TCR_Vg2", "TCR_Vg9", "TCR_Va24-Ja18", "CD62L", "CD44", "TCR_Va24.Ja18") |
|
|
1358 |
|
|
|
1359 |
protein.order <- c("CD3", "CD4", "CCR7", "CD45RA", "CD45RO", "CD27", "CD28", "CD62L", "CD25", "CTLA4", |
|
|
1360 |
"LAMP1", "CCR6", "IL7R", |
|
|
1361 |
"CXCR5", "CD40LG", "ICOS", "CXCR3", "CD8", "PD1", "CD274", "TCR_Vg9", "TCR_Vg2", |
|
|
1362 |
"TCR_Va7.2", "TCR_Va24.Ja18", "CD56", "CD16") |
|
|
1363 |
|
|
|
1364 |
protein.dot.plot.df <- protein.dot.plot.df[protein.dot.plot.df$Protein %in% dot.plot.prots, ] |
|
|
1365 |
protein.dot.plot.df$Protein <- factor(protein.dot.plot.df$Protein, |
|
|
1366 |
levels=protein.order) |
|
|
1367 |
|
|
|
1368 |
protein.dot.plot.df <- protein.dot.plot.df[!protein.dot.plot.df$Sub.Annotation %in% c("NK", "ILCs", |
|
|
1369 |
"Doublets:Platelet", "Doublets", "Doublets:Bcell"), ] |
|
|
1370 |
protein.dot.plot.df$Sub.Annotation <- factor(protein.dot.plot.df$Sub.Annotation, |
|
|
1371 |
levels=rev(cell.order)) |
|
|
1372 |
|
|
|
1373 |
ggplot(protein.dot.plot.df, |
|
|
1374 |
aes(x=Protein, y=Sub.Annotation, colour=AveExprs, size=PropExprs*100)) + |
|
|
1375 |
geom_point() + |
|
|
1376 |
theme_cowplot() + |
|
|
1377 |
scale_colour_gradient(low='grey70', high=darken('red')) + |
|
|
1378 |
theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, colour='black', size=14), |
|
|
1379 |
axis.text.y=element_text(colour='black', size=14), |
|
|
1380 |
legend.text=element_text(size=14), |
|
|
1381 |
legend.title=element_text(size=14), |
|
|
1382 |
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
1383 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) + |
|
|
1384 |
guides(colour=guide_colorbar(title='Mean\nExpression'), |
|
|
1385 |
size=guide_legend(title='% Express')) + |
|
|
1386 |
labs(x="Protein", y="Cell type") + |
|
|
1387 |
ggsave("~/Dropbox/COVID19/plot.dir/Tcell_ext_dotplot-protein.pdf", |
|
|
1388 |
height=5.15, width=8.95, useDingbats=FALSE) |
|
|
1389 |
``` |
|
|
1390 |
|
|
|
1391 |
I also want to see some of the CD8 sub-phenotype mRNA and protein expression, namely _KLRG1_ and _CD217_ (_IL7R_). |
|
|
1392 |
|
|
|
1393 |
```{r, warning=FALSE, message=FALSE, fig.height=5.95, fig.width=11.95} |
|
|
1394 |
repeat.samps <- covid.meta$patient_id[duplicated(covid.meta$patient_id)] |
|
|
1395 |
cell.freq.merge$Day <- ordered(cell.freq.merge$Collection_Day, |
|
|
1396 |
levels=c("D0", "D7", "D9", "D12", "D13", "D28")) |
|
|
1397 |
|
|
|
1398 |
ggplot(cell.freq.merge[cell.freq.merge$patient_id %in% repeat.samps & |
|
|
1399 |
!cell.freq.merge$CellType %in% c("NK", "Prolif.NK", "ILCs", "Doublets:Bcell", "Doublets:Platelet", "Doublets") & |
|
|
1400 |
!cell.freq.merge$D0_status_summary %in% c("Non_covid", "LPS"), ], |
|
|
1401 |
aes(x=Day, y=Freq, fill=D0_status_summary, group=patient_id)) + |
|
|
1402 |
#geom_boxplot() + |
|
|
1403 |
geom_line(aes(colour=D0_status_summary)) + |
|
|
1404 |
geom_point(shape=21, size=3) + |
|
|
1405 |
theme_cowplot() + |
|
|
1406 |
facet_wrap(~CellType, scales="free_y") + |
|
|
1407 |
scale_fill_manual(values=group.cols) + |
|
|
1408 |
scale_colour_manual(values=group.cols) + |
|
|
1409 |
expand_limits(y=c(0)) + |
|
|
1410 |
theme(strip.background=element_rect(fill='white', colour='white'), |
|
|
1411 |
strip.text=element_text(size=14)) + |
|
|
1412 |
labs(x="Day", y="Proportion") + |
|
|
1413 |
guides(fill=guide_legend(title="Severity")) + |
|
|
1414 |
# ggsave("~/Dropbox/COVID19/plot.dir/Tcells_proportions-boxplot.pdf", |
|
|
1415 |
# height=5.95, width=11.15, useDingbats=FALSE) + |
|
|
1416 |
NULL |
|
|
1417 |
``` |
|
|
1418 |
|
|
|
1419 |
|
|
|
1420 |
Correlate T cell and B cell proportions. |
|
|
1421 |
|
|
|
1422 |
```{r} |
|
|
1423 |
initial.clusts <- read.table("~/Dropbox/COVID19/Data/combined_dec_MetaData.txt", |
|
|
1424 |
sep="\t", header=TRUE, stringsAsFactors=FALSE) |
|
|
1425 |
``` |
|
|
1426 |
|
|
|
1427 |
```{r} |
|
|
1428 |
bcell.nums <- read.csv("~/Dropbox/COVID19/Data/B_cellnumbers_by_sample_id.csv", |
|
|
1429 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
1430 |
rownames(bcell.nums) <- bcell.nums$sample_id |
|
|
1431 |
bcell.nums <- bcell.nums[, -1] |
|
|
1432 |
|
|
|
1433 |
# add an aggreated plasmacell column |
|
|
1434 |
bcell.nums$Plasmacell <- rowSums(bcell.nums[, c("Plasma_cell_IgM", "Plasma_cell_IgA", "Plasma_cell_IgG")]) |
|
|
1435 |
bcell.nums <- bcell.nums[intersect(rownames(bcell.nums), names(n.cell.vecc)), ] |
|
|
1436 |
# b.cell.nums <- as.data.frame(t(sapply(rownames(bcell.nums), FUN=function(X) bcell.nums[X, ]/n.cell.vecc[X]))) |
|
|
1437 |
|
|
|
1438 |
# bcell.nums <- as.data.frame(t(apply(bcell.nums, 1, function(X) X/sum(X)))) |
|
|
1439 |
bcell.nums$sample_id <- rownames(bcell.nums) |
|
|
1440 |
bcell.num.df <- melt(bcell.nums, id.vars='sample_id') |
|
|
1441 |
bcell.num.df$sample_id <- as.character(bcell.num.df$sample_id) |
|
|
1442 |
bcell.num.df$variable <- as.character(bcell.num.df$variable) |
|
|
1443 |
colnames(bcell.num.df) <- c("sample_id", "Celltype", "Freq") |
|
|
1444 |
#bcell.num.df <- bcell.num.df[bcell.num.df$Celltype %in% c("B_cell", "Plasmablast"), ] |
|
|
1445 |
|
|
|
1446 |
bcell.num.cast <- dcast(bcell.num.df, sample_id ~ Celltype, value.var='Freq') |
|
|
1447 |
rownames(bcell.num.cast) <- bcell.num.cast$sample_id |
|
|
1448 |
``` |
|
|
1449 |
|
|
|
1450 |
|
|
|
1451 |
|
|
|
1452 |
```{r, warning=FALSE, message=FALSE} |
|
|
1453 |
tcell.freq.tab <- table(tcell.prot.merge$sample_id[!tcell.prot.merge$Sub.Annotation %in% c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet")], |
|
|
1454 |
tcell.prot.merge$Sub.Annotation[!tcell.prot.merge$Sub.Annotation %in% c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet")]) |
|
|
1455 |
tcell.freq.tab <- as.data.frame(tcell.freq.tab[!rownames(tcell.freq.tab) %in% |
|
|
1456 |
c("NK", "ILCs", "Doublets", "Doublets:Bcell", "Doublets:Platelet"), ]) |
|
|
1457 |
tcell.nums <- dcast(tcell.freq.tab, Var1 ~ Var2, value.var='Freq') |
|
|
1458 |
|
|
|
1459 |
colnames(tcell.nums) <- c("sample_id", colnames(tcell.nums)[-1]) |
|
|
1460 |
|
|
|
1461 |
tcell.bcell.nums <- merge(tcell.nums, bcell.num.cast, by='sample_id') |
|
|
1462 |
rownames(tcell.bcell.nums) <- tcell.bcell.nums$sample_id |
|
|
1463 |
tcell.bcell.nums <- as.matrix(tcell.bcell.nums[, -1]) |
|
|
1464 |
|
|
|
1465 |
tcell.bcell.props <- sapply(rownames(tcell.bcell.nums), FUN=function(X) as.numeric(tcell.bcell.nums[X, ]/n.cell.vecc[X]), |
|
|
1466 |
simplify=TRUE) |
|
|
1467 |
rownames(tcell.bcell.props) <- colnames(tcell.bcell.nums) |
|
|
1468 |
``` |
|
|
1469 |
|
|
|
1470 |
Cross-compare the T and B cell proportions. |
|
|
1471 |
|
|
|
1472 |
```{r, fig.height=5.95, fig.width=6.95} |
|
|
1473 |
bcell.types <- colnames(bcell.num.cast)[-1] |
|
|
1474 |
tcell.bcell.corr <- cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), ])) |
|
|
1475 |
pheatmap(tcell.bcell.corr, show_rownames=TRUE, show_colnames=TRUE, |
|
|
1476 |
filename="~/Dropbox/COVID19/plot.dir/Tcell_Bcell_corr_heatmap.pdf", |
|
|
1477 |
height=5.95, width=6.95, useDingbats=FALSE) |
|
|
1478 |
|
|
|
1479 |
pheatmap(tcell.bcell.corr, show_rownames=TRUE, show_colnames=TRUE) |
|
|
1480 |
``` |
|
|
1481 |
|
|
|
1482 |
How different is this for each severity group, or final outcome? |
|
|
1483 |
|
|
|
1484 |
```{r, fig.height=5.95, fig.width=6.95} |
|
|
1485 |
control.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Healthy")] |
|
|
1486 |
asymp.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Asymptomatic")] |
|
|
1487 |
mild.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Mild")] |
|
|
1488 |
mod.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Moderate")] |
|
|
1489 |
severe.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Severe")] |
|
|
1490 |
critical.samps <- covid.meta$sample_id[covid.meta$D0_status_summary %in% c("Critical")] |
|
|
1491 |
``` |
|
|
1492 |
|
|
|
1493 |
Differential correlation? Should I do this across samples, ranked by severity? |
|
|
1494 |
|
|
|
1495 |
```{r, warning=FALSE, message=FALSE} |
|
|
1496 |
library(DCARS) |
|
|
1497 |
library(kableExtra) |
|
|
1498 |
library(knitr) |
|
|
1499 |
covid.meta$OrderedSeverity <- factor(covid.meta$D0_status_summary, |
|
|
1500 |
levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")) |
|
|
1501 |
samp.order <- covid.meta[order(covid.meta$OrderedSeverity[!is.na(covid.meta$OrderedSeverity)]),]$sample_id |
|
|
1502 |
|
|
|
1503 |
bcell.tcell.dcars <- sapply(c(bcell.types, "CD4.Tfh"), FUN=function(XT) |
|
|
1504 |
DCARS(dat=tcell.bcell.props[c(bcell.types, "CD4.Tfh"), intersect(colnames(tcell.bcell.props), samp.order)], |
|
|
1505 |
xname=XT, yname="CD4.Tfh", plot=FALSE, niter=10000, |
|
|
1506 |
extractPermutationTestStatistics=FALSE)) |
|
|
1507 |
bcell.tcell.dcars.adjust <- p.adjust(bcell.tcell.dcars) |
|
|
1508 |
bcell.tcell.dcars.table <- data.frame("Pvalue"=bcell.tcell.dcars) |
|
|
1509 |
|
|
|
1510 |
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>% |
|
|
1511 |
save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Bcell-Tfh_props.pdf", |
|
|
1512 |
self_contained=TRUE, keep_tex=TRUE) |
|
|
1513 |
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>% |
|
|
1514 |
save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Bcell-Tfh_props.html", |
|
|
1515 |
self_contained=TRUE) |
|
|
1516 |
|
|
|
1517 |
kbl(bcell.tcell.dcars.table) %>% kable_paper(full_width=FALSE) |
|
|
1518 |
``` |
|
|
1519 |
|
|
|
1520 |
These are the differential correlations between CD4.Tfh cells and B cell proportions across severity groups. Plot the correlation difference |
|
|
1521 |
between asymptomatic and severe, or plot the correlation across all severity groups for each cell type? |
|
|
1522 |
|
|
|
1523 |
```{r, warning=FALSE, message=FALSE, fig.height=2.95, width=3.95} |
|
|
1524 |
bcell.cors.list <- list("Healthy"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% control.samps]))["CD4.Tfh", ], |
|
|
1525 |
"Asymptomatic"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% asymp.samps]))["CD4.Tfh", ], |
|
|
1526 |
"Mild"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% mild.samps]))["CD4.Tfh", ], |
|
|
1527 |
"Moderate"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% mod.samps]))["CD4.Tfh", ], |
|
|
1528 |
"Severe"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% severe.samps]))["CD4.Tfh", ], |
|
|
1529 |
"Critical"=cor(t(tcell.bcell.props[c(bcell.types, "CD4.Tfh"), colnames(tcell.bcell.props) %in% critical.samps]))["CD4.Tfh", ]) |
|
|
1530 |
|
|
|
1531 |
bcell.cors.df <- do.call(cbind.data.frame, bcell.cors.list) |
|
|
1532 |
bcell.cors.df$CellType <- rownames(bcell.cors.df) |
|
|
1533 |
bcell.cors.melt <- melt(bcell.cors.df, id.vars=c("CellType")) |
|
|
1534 |
bcell.cors.melt$CellType <- as.character(bcell.cors.melt$CellType) |
|
|
1535 |
colnames(bcell.cors.melt) <- c("CellType", "Severity", "Corr") |
|
|
1536 |
bcell.cors.melt$Severity <- factor(bcell.cors.melt$Severity, |
|
|
1537 |
levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")) |
|
|
1538 |
|
|
|
1539 |
# maybe only plot the significant diff correlations. |
|
|
1540 |
diff.cors <- names(bcell.tcell.dcars)[bcell.tcell.dcars <= 0.05] |
|
|
1541 |
|
|
|
1542 |
plasma.cols <- c("#49006a", "#ae017e", "#d4b9da", "#fa9fb5", "blue") |
|
|
1543 |
names(plasma.cols) <- c("Plasmablast", "Plasma_cell_IgM", "Plasma_cell_IgG", "Plasma_cell_IgA") |
|
|
1544 |
|
|
|
1545 |
ggplot(bcell.cors.melt[bcell.cors.melt$CellType %in% diff.cors, ], |
|
|
1546 |
aes(x=Severity, y=Corr, fill=CellType, colour=CellType, group=CellType)) + |
|
|
1547 |
geom_hline(yintercept=0, lty=2, colour='grey80', alpha=0.75) + |
|
|
1548 |
geom_line() + |
|
|
1549 |
geom_point(shape=21, size=3) + |
|
|
1550 |
scale_fill_manual(values=plasma.cols) + |
|
|
1551 |
scale_colour_manual(values=plasma.cols) + |
|
|
1552 |
theme_cowplot() + |
|
|
1553 |
theme(axis.text.x=element_text(size=14, angle=90, vjust=0.5, hjust=1)) + |
|
|
1554 |
labs(x="Severity", y="Pearson\nCorrelation") + |
|
|
1555 |
ggsave("~/Dropbox/COVID19/plot.dir/Tfh_diff_corr.pdf", |
|
|
1556 |
height=2.95, width=3.95, useDingbats=FALSE) + |
|
|
1557 |
NULL |
|
|
1558 |
|
|
|
1559 |
``` |
|
|
1560 |
|
|
|
1561 |
|
|
|
1562 |
Also have the Ig isotypes from Kelvin to correlate. |
|
|
1563 |
|
|
|
1564 |
```{r, warning=FALSE, message=FALSE} |
|
|
1565 |
ig.isotypes <- read.csv("~/Dropbox/COVID19/Data/vdj_isotypes (1).csv", |
|
|
1566 |
header=TRUE, stringsAsFactors=FALSE) |
|
|
1567 |
isotype.counts <- dcast(ig.isotypes, sample_id ~ celltype + isotype, value.var='total') |
|
|
1568 |
isotype.counts[is.na(isotype.counts)] <- 0 |
|
|
1569 |
rownames(isotype.counts) <- isotype.counts$sample_id |
|
|
1570 |
isotype.counts <- isotype.counts[, -1] |
|
|
1571 |
isotype.counts <- isotype.counts[intersect(rownames(isotype.counts), names(n.cell.vecc)), ] |
|
|
1572 |
|
|
|
1573 |
bcell.counts <- rowSums(bcell.nums[, -ncol(bcell.nums)]) |
|
|
1574 |
|
|
|
1575 |
isotype.counts.props <- t(sapply(rownames(isotype.counts), FUN=function(X) as.numeric(isotype.counts[X, ]/n.cell.vecc[X]), |
|
|
1576 |
simplify=TRUE)) |
|
|
1577 |
colnames(isotype.counts.props) <- colnames(isotype.counts) |
|
|
1578 |
``` |
|
|
1579 |
|
|
|
1580 |
|
|
|
1581 |
```{r} |
|
|
1582 |
tcell.bcell.df <- as.data.frame(tcell.bcell.nums) |
|
|
1583 |
tcell.bcell.df$sample_id <- rownames(tcell.bcell.nums) |
|
|
1584 |
isotype.counts.props <- as.data.frame(isotype.counts.props) |
|
|
1585 |
isotype.counts.props$sample_id <- rownames(isotype.counts.props) |
|
|
1586 |
|
|
|
1587 |
tcell.isotype.df <- merge(isotype.counts.props, tcell.bcell.df[, colnames(tcell.nums)], by='sample_id') |
|
|
1588 |
rownames(tcell.isotype.df) <- tcell.isotype.df$sample_id |
|
|
1589 |
``` |
|
|
1590 |
|
|
|
1591 |
|
|
|
1592 |
```{r, fig.height=9.95, fig.width=9.95} |
|
|
1593 |
isotypes <- colnames(isotype.counts) |
|
|
1594 |
tcell.isotype.corr <- cor(tcell.isotype.df[, c(isotypes, "CD4.Tfh")]) |
|
|
1595 |
pheatmap(tcell.isotype.corr, show_rownames=TRUE, show_colnames=TRUE, |
|
|
1596 |
filename="~/Dropbox/COVID19/plot.dir/Tcell_isotype_corr_heatmap.pdf", |
|
|
1597 |
height=5.95, width=6.95, useDingbats=FALSE) |
|
|
1598 |
|
|
|
1599 |
pheatmap(tcell.isotype.corr, show_rownames=TRUE, show_colnames=TRUE) |
|
|
1600 |
``` |
|
|
1601 |
|
|
|
1602 |
|
|
|
1603 |
Differential correlation? Should I do this across samples, ranked by severity? |
|
|
1604 |
|
|
|
1605 |
```{r, warning=FALSE, message=FALSE} |
|
|
1606 |
# need to remove all 0 rows |
|
|
1607 |
isotype.dat <- t(tcell.isotype.df[intersect(rownames(tcell.isotype.df), samp.order), c(isotypes, "CD4.Tfh")]) |
|
|
1608 |
isotype.dat <- isotype.dat[rowSums(isotype.dat)> 1e-2, ] |
|
|
1609 |
|
|
|
1610 |
isotype.tcell.dcars <- sapply(rownames(isotype.dat), FUN=function(XT) |
|
|
1611 |
DCARS(dat=isotype.dat, |
|
|
1612 |
xname=XT, yname="CD4.Tfh", plot=FALSE, niter=10000, |
|
|
1613 |
extractPermutationTestStatistics=FALSE)) |
|
|
1614 |
|
|
|
1615 |
isotype.tcell.dcars.table <- data.frame("Pvalue"=isotype.tcell.dcars) |
|
|
1616 |
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>% |
|
|
1617 |
save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Isotype-Tfh_props.pdf", |
|
|
1618 |
self_contained=TRUE, keep_tex=TRUE) |
|
|
1619 |
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE) %>% |
|
|
1620 |
save_kable("~/Dropbox/COVID19/plot.dir/DiffCor_Isotype-Tfh_props.html", |
|
|
1621 |
self_contained=TRUE) |
|
|
1622 |
|
|
|
1623 |
kbl(isotype.tcell.dcars.table) %>% kable_paper(full_width=FALSE) |
|
|
1624 |
``` |
|
|
1625 |
|
|
|
1626 |
These are the differential correlations between CD4.Tfh cells and B cell proportions across severity groups. Plot the correlation difference |
|
|
1627 |
between asymptomatic and severe, or plot the correlation across all severity groups for each cell type? |
|
|
1628 |
|
|
|
1629 |
```{r, warning=FALSE, message=FALSE, fig.height=2.95, width=3.95} |
|
|
1630 |
isotype.cors.list <- list("Healthy"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), control.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")], |
|
|
1631 |
"Asymptomatic"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), asymp.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")], |
|
|
1632 |
"Mild"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), mild.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")], |
|
|
1633 |
"Moderate"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), mod.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")], |
|
|
1634 |
"Severe"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), severe.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")], |
|
|
1635 |
"Critical"=cor(tcell.isotype.df[intersect(rownames(tcell.isotype.df), critical.samps), c(isotypes, "CD4.Tfh")])[, c("CD4.Tfh")]) |
|
|
1636 |
|
|
|
1637 |
isotype.cors.df <- do.call(cbind.data.frame, isotype.cors.list) |
|
|
1638 |
isotype.cors.df$CellType <- rownames(isotype.cors.df) |
|
|
1639 |
isotype.cors.melt <- melt(isotype.cors.df, id.vars=c("CellType")) |
|
|
1640 |
isotype.cors.melt$CellType <- as.character(isotype.cors.melt$CellType) |
|
|
1641 |
colnames(isotype.cors.melt) <- c("CellType", "Severity", "Corr") |
|
|
1642 |
isotype.cors.melt$Severity <- gsub(isotype.cors.melt$Severity, |
|
|
1643 |
pattern="^([A-Za-z]+)\\.(\\S*)", replacement="\\1") |
|
|
1644 |
isotype.cors.melt$Severity <- factor(isotype.cors.melt$Severity, |
|
|
1645 |
levels=c("Healthy", "Asymptomatic", "Mild", "Moderate", "Severe", "Critical")) |
|
|
1646 |
|
|
|
1647 |
# maybe only plot the significant diff correlations. |
|
|
1648 |
diff.cors <- names(isotype.tcell.dcars)[isotype.tcell.dcars <= 0.05] |
|
|
1649 |
|
|
|
1650 |
ggplot(isotype.cors.melt[isotype.cors.melt$CellType %in% setdiff(diff.cors, "CD4.Tfh"), ], |
|
|
1651 |
aes(x=Severity, y=Corr, fill=CellType, colour=CellType, group=CellType)) + |
|
|
1652 |
geom_hline(yintercept=0, lty=2, colour='grey80', alpha=0.75) + |
|
|
1653 |
geom_line() + |
|
|
1654 |
geom_point(shape=21, size=3) + |
|
|
1655 |
# scale_fill_manual(values=plasma.cols) + |
|
|
1656 |
# scale_colour_manual(values=plasma.cols) + |
|
|
1657 |
scale_fill_aaas() + |
|
|
1658 |
scale_colour_aaas() + |
|
|
1659 |
theme_cowplot() + |
|
|
1660 |
theme(axis.text.x=element_text(size=14, angle=90, vjust=0.5, hjust=1)) + |
|
|
1661 |
labs(x="Severity", y="Pearson\nCorrelation") + |
|
|
1662 |
ggsave("~/Dropbox/COVID19/plot.dir/Tfh_Isotype-diff_corr.pdf", |
|
|
1663 |
height=2.95, width=3.95, useDingbats=FALSE) + |
|
|
1664 |
NULL |
|
|
1665 |
``` |
|
|
1666 |
|
|
|
1667 |
|