|
a |
|
b/Cydar/03_Cydar_Figure.Rmd |
|
|
1 |
--- |
|
|
2 |
title: "Cydar on Covid ADT data" |
|
|
3 |
author: "Karsten Bach" |
|
|
4 |
date: '`r Sys.Date()`' |
|
|
5 |
output: |
|
|
6 |
html_notebook: |
|
|
7 |
toc: true |
|
|
8 |
toc_depth: 3 |
|
|
9 |
toc_float: true |
|
|
10 |
theme: united |
|
|
11 |
highlight: tango |
|
|
12 |
code_folding: hide |
|
|
13 |
--- |
|
|
14 |
*** |
|
|
15 |
|
|
|
16 |
# Load Data |
|
|
17 |
|
|
|
18 |
Assigning cells into hyperspheres was done already as this is time consuming. |
|
|
19 |
The "expression values" are the reconstructed values after fastMNN on the denoised counts. |
|
|
20 |
|
|
|
21 |
```{r, message=FALSE,warning=FALSE} |
|
|
22 |
|
|
|
23 |
# ---- Parsing ---- |
|
|
24 |
opt <- list() |
|
|
25 |
opt$cd <- "../data/HyperSpheres_8.RDS" |
|
|
26 |
|
|
|
27 |
# ---- Load Data ---- |
|
|
28 |
library(knitr) |
|
|
29 |
library(scran) |
|
|
30 |
library(scater) |
|
|
31 |
library(cydar) |
|
|
32 |
library(randomForest) |
|
|
33 |
library(ggplot2) |
|
|
34 |
library(cowplot) |
|
|
35 |
library(ggplot2) |
|
|
36 |
library(viridis) |
|
|
37 |
theme_set(theme_cowplot()) |
|
|
38 |
|
|
|
39 |
# Read in clinical meta data |
|
|
40 |
meta <- read.csv("../data/meta/Metadata FINAL 10122020v2.csv", |
|
|
41 |
stringsAsFactor=FALSE)[,-1] |
|
|
42 |
|
|
|
43 |
cd <- readRDS(opt$cd) |
|
|
44 |
|
|
|
45 |
#Setting up edgeR object |
|
|
46 |
library(edgeR) |
|
|
47 |
y <- DGEList(assay(cd), lib.size=cd$totals) |
|
|
48 |
|
|
|
49 |
# Remove sample that is "non_covid" |
|
|
50 |
meta <- meta[meta$Collection_Day=="D0" & meta$Status!="LPS" & meta$sample_id != "BGCV02_CV0902",] |
|
|
51 |
rownames(meta) <- meta$patient_id |
|
|
52 |
meta <- meta[rownames(y$samples),] |
|
|
53 |
|
|
|
54 |
y$samples$Age <- as.numeric(meta$Age) |
|
|
55 |
y$samples$Sex <- as.character(meta$Sex) |
|
|
56 |
y$samples$Severity <- factor(meta$Status_on_day_collection_summary, |
|
|
57 |
levels=c("Healthy","Asymptomatic","Mild", |
|
|
58 |
"Moderate","Severe","Critical")) |
|
|
59 |
y$samples$Center <- meta$Site |
|
|
60 |
|
|
|
61 |
# Removing lowly occupied spheres |
|
|
62 |
keep <- filterByExpr(y,group=y$samples$Severity,min.count=5) |
|
|
63 |
cd <- cd[keep,] |
|
|
64 |
y <- y[keep,] |
|
|
65 |
|
|
|
66 |
``` |
|
|
67 |
|
|
|
68 |
# The Samples |
|
|
69 |
- Overview over the samples that I included in the analysis. |
|
|
70 |
```{r, message=FALSE,warning=FALSE} |
|
|
71 |
kable(table(y$samples$Center,y$samples$Severity)) |
|
|
72 |
kable(table(y$samples$Severity,y$samples$Sex)) |
|
|
73 |
ggplot(y$samples, aes(x=Severity, y=Age, fill=Center)) + |
|
|
74 |
geom_boxplot() + |
|
|
75 |
geom_jitter() + |
|
|
76 |
theme(axis.text.x=element_text(angle=60,hjust=1)) + |
|
|
77 |
scale_fill_manual(values=c("blue","dodgerblue","salmon")) |
|
|
78 |
``` |
|
|
79 |
|
|
|
80 |
# The hyperspheres |
|
|
81 |
The cells were then grouped into `r nrow(cd)` hyperspheres containing a median of `r median(rowSums(y$counts))` cells. Note that a cell can be represented in multiple hyperspheres. |
|
|
82 |
We can take mean intensities for all of the hyperspheres and compute a UMAP on them to get an overview of the data. |
|
|
83 |
```{r, message=FALSE,warning=FALSE,fig.width=15,fig.height=10} |
|
|
84 |
# Plot a UMAP with the hyperspheres |
|
|
85 |
coords <- intensities(cd) |
|
|
86 |
colnames(coords) <- gsub("AB_","",colnames(coords)) |
|
|
87 |
#colnames(coords) <- gsub("-",".",colnames(coords)) |
|
|
88 |
#colnames(coords) <- gsub("/",".",colnames(coords)) |
|
|
89 |
|
|
|
90 |
library(irlba) |
|
|
91 |
set.seed(42) |
|
|
92 |
pcs <- prcomp_irlba(coords,n=20) |
|
|
93 |
library(umap) |
|
|
94 |
ump <- umap(pcs$x,random_state=42, |
|
|
95 |
n_neighbors=20, |
|
|
96 |
init='spectral', |
|
|
97 |
n_components=2, |
|
|
98 |
min_dist=0.1, |
|
|
99 |
mehtod='naive') |
|
|
100 |
|
|
|
101 |
|
|
|
102 |
fplot <- data.frame("UMAP1"=ump$layout[,1], |
|
|
103 |
"UMAP2"=ump$layout[,2]) |
|
|
104 |
|
|
|
105 |
for (gn in colnames(coords)) { |
|
|
106 |
fplot[,gn] <- coords[,gn] |
|
|
107 |
} |
|
|
108 |
|
|
|
109 |
fplot$Size <- rowMeans(cpm(y,log=TRUE)) |
|
|
110 |
|
|
|
111 |
p1 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD45)) + |
|
|
112 |
geom_point() + |
|
|
113 |
scale_color_viridis() |
|
|
114 |
p2 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD3)) + |
|
|
115 |
geom_point() + |
|
|
116 |
scale_color_viridis() |
|
|
117 |
p3 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD14)) + |
|
|
118 |
geom_point() + |
|
|
119 |
scale_color_viridis() |
|
|
120 |
p4 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD8)) + |
|
|
121 |
geom_point() + |
|
|
122 |
scale_color_viridis() |
|
|
123 |
p5 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=ITGAX)) + |
|
|
124 |
geom_point() + |
|
|
125 |
scale_color_viridis() |
|
|
126 |
library(cowplot) |
|
|
127 |
plot_grid(p1,p2,p3,p4,p5) |
|
|
128 |
|
|
|
129 |
p1 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=ITGAM)) + |
|
|
130 |
geom_point() + |
|
|
131 |
scale_color_viridis() |
|
|
132 |
p2 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=ITGAX)) + |
|
|
133 |
geom_point() + |
|
|
134 |
scale_color_viridis() |
|
|
135 |
p3 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD71)) + |
|
|
136 |
geom_point() + |
|
|
137 |
scale_color_viridis() |
|
|
138 |
p4 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD123)) + |
|
|
139 |
geom_point() + |
|
|
140 |
scale_color_viridis() |
|
|
141 |
p5 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD19)) + |
|
|
142 |
geom_point() + |
|
|
143 |
scale_color_viridis() |
|
|
144 |
p6 <- ggplot(fplot, aes(x=UMAP1, y=UMAP2, color=CD1C)) + |
|
|
145 |
geom_point() + |
|
|
146 |
scale_color_viridis() |
|
|
147 |
library(cowplot) |
|
|
148 |
plot_grid(p1,p2,p3,p4,p5,p6) |
|
|
149 |
``` |
|
|
150 |
|
|
|
151 |
# Setting up the model for 1-vs-1 comparison |
|
|
152 |
- Setting span to 1 because low number of points |
|
|
153 |
```{r, message=FALSE,warning=FALSE} |
|
|
154 |
desgn <- model.matrix(~ 0 + Sex + Age + Center + Severity, data=y$samples) |
|
|
155 |
y <- estimateDisp(y, desgn,span=1) |
|
|
156 |
fit <- glmQLFit(y, desgn, robust=TRUE) |
|
|
157 |
head(desgn) |
|
|
158 |
``` |
|
|
159 |
|
|
|
160 |
# Estimating LFCs versus healthy |
|
|
161 |
```{r, message=FALSE,warning=FALSE} |
|
|
162 |
# A |
|
|
163 |
resA <- glmQLFTest(fit, coef=ncol(desgn)-4) |
|
|
164 |
resA$table$qvals <- spatialFDR(intensities(cd), resA$table$PValue) |
|
|
165 |
fplot$LFCA <- resA$table$logFC |
|
|
166 |
fplot$LFCA[resA$table$qvals>0.05] <- 0 |
|
|
167 |
|
|
|
168 |
# B |
|
|
169 |
resB <- glmQLFTest(fit, coef=ncol(desgn)-3) |
|
|
170 |
resB$table$qvals <- spatialFDR(intensities(cd), resB$table$PValue) |
|
|
171 |
fplot$LFCB <- resB$table$logFC |
|
|
172 |
fplot$LFCB[resB$table$qvals>0.05] <- 0 |
|
|
173 |
|
|
|
174 |
# C |
|
|
175 |
resC <- glmQLFTest(fit, coef=ncol(desgn)-2) |
|
|
176 |
resC$table$qvals <- spatialFDR(intensities(cd), resC$table$PValue) |
|
|
177 |
fplot$LFCC <- resC$table$logFC |
|
|
178 |
fplot$LFCC[resC$table$qvals>0.05] <- 0 |
|
|
179 |
|
|
|
180 |
# D |
|
|
181 |
resD <- glmQLFTest(fit, coef=ncol(desgn)-1) |
|
|
182 |
resD$table$qvals <- spatialFDR(intensities(cd), resD$table$PValue) |
|
|
183 |
fplot$LFCD <- resD$table$logFC |
|
|
184 |
fplot$LFCD[resD$table$qvals>0.05] <- 0 |
|
|
185 |
|
|
|
186 |
# E |
|
|
187 |
resE <- glmQLFTest(fit, coef=ncol(desgn)) |
|
|
188 |
resE$table$qvals <- spatialFDR(intensities(cd), resE$table$PValue) |
|
|
189 |
fplot$LFCE <- resE$table$logFC |
|
|
190 |
fplot$LFCE[resE$table$qvals>0.05] <- 0 |
|
|
191 |
``` |
|
|
192 |
|
|
|
193 |
# Setting up the model for linear/quadratic changes |
|
|
194 |
```{r, message=FALSE,warning=FALSE} |
|
|
195 |
y$samples$Severity <- ordered(y$samples$Severity) |
|
|
196 |
desgn <- model.matrix(~ 0 + Sex + Center + Age + Severity, data=y$samples) |
|
|
197 |
y <- estimateDisp(y, desgn,span=1) |
|
|
198 |
fit <- glmQLFit(y, desgn, robust=TRUE) |
|
|
199 |
head(desgn) |
|
|
200 |
``` |
|
|
201 |
|
|
|
202 |
|
|
|
203 |
## Defining Significance |
|
|
204 |
- Setting a 1% spatial FDR |
|
|
205 |
```{r, message=FALSE,warning=FALSE} |
|
|
206 |
# Lin |
|
|
207 |
resLin <- glmQLFTest(fit, coef=ncol(desgn)-4) |
|
|
208 |
resLin$table$qvals <- spatialFDR(intensities(cd), resLin$table$PValue) |
|
|
209 |
fplot$LFCLin <- resLin$table$logFC |
|
|
210 |
fplot$LFCLin[resLin$table$qvals>0.05] <- 0 |
|
|
211 |
# B |
|
|
212 |
resQuad <- glmQLFTest(fit, coef=ncol(desgn)-3) |
|
|
213 |
resQuad$table$qvals <- spatialFDR(intensities(cd), resQuad$table$PValue) |
|
|
214 |
fplot$LFCQuad <- resQuad$table$logFC |
|
|
215 |
fplot$LFCQuad[resQuad$table$qvals>0.05] <- 0 |
|
|
216 |
``` |
|
|
217 |
|
|
|
218 |
```{r, message=FALSE,warning=FALSE} |
|
|
219 |
library(dplyr) |
|
|
220 |
ggplot(arrange(fplot,abs(LFCLin)), aes(x=UMAP1, y=UMAP2, fill=LFCLin)) + |
|
|
221 |
geom_point(pch=21,color="grey30",size=4) + |
|
|
222 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
223 |
theme(axis.text=element_text(size=14), |
|
|
224 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
225 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
226 |
strip.background = element_blank(), |
|
|
227 |
axis.text.x=element_blank(), |
|
|
228 |
axis.text.y=element_blank(), |
|
|
229 |
axis.ticks.y=element_blank(), |
|
|
230 |
axis.ticks.x=element_blank()) |
|
|
231 |
ggsave("../figures/LinearChanges.pdf",useDingbats=FALSE) |
|
|
232 |
``` |
|
|
233 |
|
|
|
234 |
## Individual plots |
|
|
235 |
```{r, message=FALSE,warning=FALSE,fig.width=16, fig.height=3} |
|
|
236 |
library(dplyr) |
|
|
237 |
p1 <- ggplot(arrange(fplot,abs(LFCA)), aes(x=UMAP1, y=UMAP2, fill=LFCA)) + |
|
|
238 |
geom_point(pch=21,color="grey30",size=2) + |
|
|
239 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
240 |
theme(axis.text=element_text(size=14), |
|
|
241 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
242 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
243 |
strip.background = element_blank(), |
|
|
244 |
axis.text.x=element_blank(), |
|
|
245 |
axis.text.y=element_blank(), |
|
|
246 |
axis.ticks.y=element_blank(), |
|
|
247 |
axis.ticks.x=element_blank()) + |
|
|
248 |
ggtitle("Asymptomatic") |
|
|
249 |
p2 <- ggplot(arrange(fplot,abs(LFCB)), aes(x=UMAP1, y=UMAP2, fill=LFCB)) + |
|
|
250 |
geom_point(pch=21,color="grey30",size=2) + |
|
|
251 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
252 |
theme(axis.text=element_text(size=14), |
|
|
253 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
254 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
255 |
strip.background = element_blank(), |
|
|
256 |
axis.text.x=element_blank(), |
|
|
257 |
axis.text.y=element_blank(), |
|
|
258 |
axis.ticks.y=element_blank(), |
|
|
259 |
axis.ticks.x=element_blank()) + |
|
|
260 |
ggtitle("Mild") |
|
|
261 |
p3 <- ggplot(arrange(fplot,abs(LFCC)), aes(x=UMAP1, y=UMAP2, fill=LFCC)) + |
|
|
262 |
geom_point(pch=21,color="grey30",size=2) + |
|
|
263 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
264 |
theme(axis.text=element_text(size=14), |
|
|
265 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
266 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
267 |
strip.background = element_blank(), |
|
|
268 |
axis.text.x=element_blank(), |
|
|
269 |
axis.text.y=element_blank(), |
|
|
270 |
axis.ticks.y=element_blank(), |
|
|
271 |
axis.ticks.x=element_blank()) + |
|
|
272 |
ggtitle("Moderate") |
|
|
273 |
p4 <- ggplot(arrange(fplot,abs(LFCD)), aes(x=UMAP1, y=UMAP2, fill=LFCD)) + |
|
|
274 |
geom_point(pch=21,color="grey30",size=2) + |
|
|
275 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
276 |
theme(axis.text=element_text(size=14), |
|
|
277 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
278 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
279 |
strip.background = element_blank(), |
|
|
280 |
axis.text.x=element_blank(), |
|
|
281 |
axis.text.y=element_blank(), |
|
|
282 |
axis.ticks.y=element_blank(), |
|
|
283 |
axis.ticks.x=element_blank()) + |
|
|
284 |
ggtitle("Severe") |
|
|
285 |
p5 <- ggplot(arrange(fplot,abs(LFCE)), aes(x=UMAP1, y=UMAP2, fill=LFCE)) + |
|
|
286 |
geom_point(pch=21,color="grey30",size=2) + |
|
|
287 |
scale_fill_gradient2(low="blue", mid="white", high="red",name="LFC") + |
|
|
288 |
theme(axis.text=element_text(size=14), |
|
|
289 |
strip.text.x = element_text(size=14, face="bold") , |
|
|
290 |
strip.text.y = element_text(size=14, face="bold"), |
|
|
291 |
strip.background = element_blank(), |
|
|
292 |
axis.text.x=element_blank(), |
|
|
293 |
axis.text.y=element_blank(), |
|
|
294 |
axis.ticks.y=element_blank(), |
|
|
295 |
axis.ticks.x=element_blank()) + |
|
|
296 |
ggtitle("Critical") |
|
|
297 |
plot_grid(p1,p2,p3,p4,p5,nrow=1) |
|
|
298 |
ggsave("../figures/LFC_v_Healthy_By_Stage.pdf",useDingbats=FALSE,width=16,height=3) |
|
|
299 |
``` |
|
|
300 |
|
|
|
301 |
# Heatmaps |
|
|
302 |
|
|
|
303 |
## For linear changes |
|
|
304 |
```{r, message=FALSE,warning=FALSE,fig.height=12,fig.width=9} |
|
|
305 |
library(circlize) |
|
|
306 |
library(ComplexHeatmap) |
|
|
307 |
mat <- intensities(cd) |
|
|
308 |
mat <- scale(t(mat)) |
|
|
309 |
#mat <- t(mat)-rowMeans(t(mat)) |
|
|
310 |
rownames(mat) <- gsub("AB_","",rownames(mat)) |
|
|
311 |
|
|
|
312 |
#show some columns |
|
|
313 |
col_fun <- colorRamp2(c(-7, 0, 4), c("blue", "grey80", "red")) |
|
|
314 |
cols <- which(fplot$LFCLin != 0) |
|
|
315 |
ha <- HeatmapAnnotation(LFC_Severity=fplot$LFCLin[cols], |
|
|
316 |
col=list(LFC_Severity=col_fun |
|
|
317 |
) |
|
|
318 |
) |
|
|
319 |
|
|
|
320 |
int <- which(rowMax(abs(mat))>1.3) |
|
|
321 |
#int <- c(which(rownames(mat)=="CD14"),int) |
|
|
322 |
mat[mat > 3] <- 3 |
|
|
323 |
mat[mat < -3] <- -3 |
|
|
324 |
pdf("../figures/Heatmap_LinearChanges.pdf",width=5,height=13) |
|
|
325 |
Heatmap(mat[int,cols], |
|
|
326 |
# show_col_names=FALSE, |
|
|
327 |
heatmap_legend_param = list(title="Scaled Intensity"), |
|
|
328 |
row_names_gp=gpar(fontsize=7), |
|
|
329 |
top_annotation=ha) |
|
|
330 |
dev.off() |
|
|
331 |
``` |