Diff of /figures/Figure3.Rmd [000000] .. [9905a0]

Switch to unified view

a b/figures/Figure3.Rmd
1
---
2
title: "Figure 3"
3
author: Tobias Roider
4
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
5
output: 
6
  rmdformats::readthedown: 
7
  
8
editor_options: 
9
  chunk_output_type: console
10
---
11
12
```{r options, include=FALSE, warning = FALSE}
13
14
library(knitr)
15
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
16
               dpi = 100, cache = FALSE, warning = FALSE)
17
opts_knit$set(root.dir = "../")
18
options(bitmapType = "cairo")
19
20
```
21
22
# Read data, functions and packages
23
```{r read data}
24
25
source("R/ReadPackages.R")
26
source("R/Functions.R")
27
source("R/ReadData.R")
28
source("R/ThemesColors.R")
29
source("R/Helpers.R")
30
31
```
32
33
# Proportions overview
34
## Handle data and calculate p values
35
```{r create matrix}
36
37
df_pop 
38
39
mat_complete <- rbind(
40
  df_facs %>% 
41
    left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>% 
42
    filter(`CITEseq`=="-") %>% 
43
    select(PatientID, Population, Prop=FACS),
44
  df_freq %>% 
45
    mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>% 
46
    select(PatientID, Population, Prop=RNA)) %>% 
47
  filter(Population %in% df_pop$Population) %>% 
48
  left_join(., df_pop) %>% 
49
  select(-Population) %>% 
50
  pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>% 
51
  column_to_rownames("PatientID") 
52
53
cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19)
54
names(labels_cl_parsed) <- as.character(cluster_order)
55
56
df_freqPlot <- 
57
  mat_complete %>% 
58
  rownames_to_column("PatientID") %>% 
59
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
60
  add_entity() %>% 
61
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
62
  mutate(IdentI=factor(IdentI, levels=cl_order)) %>% 
63
  mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>% 
64
  group_by(Entity, IdentI) %>% 
65
  mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5))
66
67
df_medianLines <- df_freqPlot %>% 
68
  filter(Entity=="rLN") %>% 
69
  group_by(IdentI, Label) %>% 
70
  summarise(MedianProp=median(Prop))
71
72
df_freqPlot_pvalues <- 
73
  df_freqPlot %>% 
74
  group_by(IdentI) %>% 
75
  wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>% 
76
  adjust_pvalue(method = "BH") %>% 
77
  select(IdentI, Entity=group2, p.adj, estimate) %>% 
78
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
79
  mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>% 
80
  mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>% 
81
  mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA",
82
                           p.adj==0.05 ~ "0.05",
83
                           p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)),
84
                           p.adj==0.001 ~ "0.001",
85
                           p.adj < 0.001 ~ p.adj_s)) %>% 
86
  filter(!is.na(p.adj)) %>% 
87
  left_join(., df_freqPlot %>% select(IdentI,  Label) %>% distinct) %>% 
88
  left_join(., data.frame(IdentI=factor(cl_order), height=c(52.5, 52.5, 21.5, 21.5, 22, 22, 20, 20, 26, 26, 80, 80, 21, 21)))
89
90
```
91
92
## Plot
93
```{r freq overview, fig.height=13, fig.width=5.2}
94
95
p <- list()
96
97
for(i in c(1:7)){
98
  
99
  y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]]
100
  ylim <- c(70,30,32,28,35,110,28)
101
  
102
  p[[i]] <- 
103
    ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>% 
104
             mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])), 
105
             aes(y=Prop, x=Entity, fill=IdentI))+
106
    geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp),
107
               size=0.25, linetype="dashed", color="grey60")+
108
    geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white", 
109
                 outlier.alpha = 0, show.legend = F, size=0.25)+
110
    ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25, 
111
                              groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+
112
    geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y), 
113
              inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+
114
    scale_fill_manual(values = colors_umap_cl)+
115
    scale_y_continuous(name="% of total T-cells", limits=c(0,ylim[i]))+
116
    scale_x_discrete(expand = c(0.17,0.17))+
117
    facet_wrap(~Label, ncol = 2, labeller = label_parsed)+
118
    mytheme_1+
119
    theme(axis.title.x = element_blank(),
120
          strip.background = element_rect(color=NA),
121
          plot.margin = unit(c(0,0,0,0), units = "cm"),
122
          plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")),
123
          panel.border = element_rect(size = 0.5),
124
          axis.text.x = element_text(angle=45, hjust = 1))
125
  
126
  if(i!=7){
127
    p[[i]] <- p[[i]]+
128
      theme(axis.text.x = element_blank(),
129
            axis.ticks.x = element_blank())
130
  }
131
  
132
  if(i!=4){
133
    p[[i]] <- p[[i]]+
134
      theme(axis.title.y = element_blank())
135
    }
136
  
137
  if(i==1){
138
    p[[i]] <- p[[i]]+
139
      labs(tag = "A")+
140
      theme(plot.tag = element_text(vjust = -0.5))
141
}
142
  
143
  
144
}
145
146
plot_freq <- wrap_plots(p, ncol = 1)
147
plot_freq
148
ggsave(width = 9, height = 21, units = "cm", filename = "Figure3_p1.pdf")
149
150
```
151
152
# Principal component analysis (PCA)
153
```{r pca, fig.width=6, fig.height=4}
154
155
pca_seq <- prcomp(mat_complete, scale. = T, center = T)
156
157
p1 <- 
158
  pca_seq$x %>% 
159
  data.frame() %>% 
160
  rownames_to_column("PatientID") %>% 
161
  add_entity() %>% 
162
  ggplot(aes(x=PC1, y=-PC2, fill=Entity))+
163
  geom_point(size=1.75, shape=21, stroke=0.25, color="white")+
164
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
165
  guides(fill=guide_legend(override.aes = list(size=2)))+
166
  ylab("PC2")+
167
  xlab("PC1")+
168
  mytheme_1+
169
  coord_cartesian(clip = "off")+
170
  theme(legend.position = "top",
171
        legend.title = element_blank(),
172
        legend.spacing.x = unit("cm", x = 0.05),
173
        legend.box.margin = unit(c(0,0,-0.35,0), "cm"),
174
        plot.tag = element_text(vjust = -2.5),
175
        plot.margin = unit(c(0,0.25,0,-0.25), units = "cm"),
176
        plot.background = element_rect(fill = "transparent",
177
                                 colour = NA_character_),
178
        panel.border = element_rect(size=0.25),
179
        legend.key.height = unit("cm", x = 0.36),
180
        axis.title.x =  element_text(margin = unit(c(-1,0,0,0), units = "cm")),
181
        legend.key.width = unit("cm", x = 0.26))+
182
  labs(tag = "B")
183
184
pc1 <- 
185
  pca_seq$rotation %>% 
186
  data.frame %>% 
187
  select(PC1) %>% 
188
  rownames_to_column("IdentI") %>% 
189
  top_n(4, abs(PC1)) %>%
190
  arrange(-PC1)
191
192
p2 <- ggplot(pc1, aes(y=PC1, x=IdentI, fill=IdentI))+
193
  geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
194
  scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
195
                    labels=unlist(labels_cl))+
196
  scale_x_discrete(limits=pc1$IdentI, labels=labels_cl[pc1$IdentI] %>% unlist())+
197
  geom_hline(yintercept = 0, size=0.25)+
198
  scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC1")+
199
  mytheme_1+
200
  coord_cartesian(clip = "off")+
201
  theme(axis.title.x = element_blank(),
202
        plot.margin = unit(c(0.25,0,0,0), units = "cm"),
203
        axis.text = element_text(size=6.5),
204
        plot.tag = element_text(vjust = -2.5),
205
        axis.text.x = element_text(angle=45, hjust = 1))+
206
  labs(tag = "C")
207
208
pc2 <- pca_seq$rotation %>% 
209
  data.frame %>% 
210
  select(PC2) %>% 
211
  rownames_to_column("IdentI") %>% 
212
  top_n(4, abs(PC2)) %>% 
213
  arrange(PC2)
214
215
p3 <- ggplot(pc2, aes(y=-PC2, x=IdentI, fill=IdentI))+
216
  geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
217
  scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
218
                    labels=unlist(labels_cl))+
219
  scale_x_discrete(limits=pc2$IdentI, labels=labels_cl[pc2$IdentI] %>% unlist())+
220
  geom_hline(yintercept = 0, size=0.25)+
221
  scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC2")+
222
  coord_cartesian(clip = "off")+
223
  mytheme_1+
224
  theme(axis.title.x = element_blank(),
225
        plot.margin = unit(c(0,0,0.25,0), units = "cm"),
226
        axis.text.x = element_text(angle=45, hjust = 1))+
227
  labs(tag = "D")
228
229
p1+(p2/p3)+plot_layout(widths = c(1,0.5))
230
#ggsave(width = 9.75, height = 7.5, units = "cm", filename = "Figure3_p2.pdf")
231
232
```
233
234
# Lasso prediction
235
## Dendrogram
236
```{r dendrogram, fig.width=3, fig.height=3}
237
  
238
# Create data frame
239
data <- data.frame(
240
  level1="all",
241
  level2=c("rLN", 
242
           "MZL",
243
           "MCL",
244
           "FL",
245
           "DLBCL")
246
)
247
248
edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2)
249
edge_list=rbind(edges_level1_2)
250
vert <- data.frame(
251
  name=unique(c(data$level1, data$level2))) %>% 
252
  mutate(label=c(NA, "rLN", "MZL", "MCL", "FL", "DLBCL"))
253
254
# Create graph object
255
mygraph_lasso <- graph_from_data_frame( edge_list ,vertices = vert)
256
257
# Plot dendrogramm
258
ggraph(mygraph_lasso, layout = 'tree', circular = FALSE)+ 
259
  geom_edge_diagonal(strength = 1.4, edge_width=0.25)+
260
  geom_node_point(shape=21, size=3.5, color="white", stroke=2, alpha=c(0,1,1,1,1,1),
261
                  fill=c(NA, brewer.pal(name = "Paired", 5)[c(5,4,3,2,1)]))+
262
  coord_flip(clip = "off")+
263
  scale_y_reverse()+
264
  theme_void()+
265
  theme(legend.position = "right",
266
        plot.margin = margin(0.25,0.25,0.25,0, unit = "cm"),  
267
        plot.title = element_text(hjust=0.4, size=7, face = "bold"),)
268
269
#ggsave(width = 3.5, height = 3.4, units = "cm", filename = "Figure3_p3.pdf")
270
271
```
272
273
## Model
274
```{r lasso model, fig.height=3, fig.width=3}
275
276
total <- mat_complete %>% 
277
  rownames_to_column("PatientID") %>% 
278
  left_join(., df_meta %>% select(PatientID, Tcells)) %>% 
279
  add_entity() %>% 
280
  mutate_if(is.numeric, ~./100)
281
282
cell_types <- total %>% select(-Entity, -PatientID) %>% colnames()
283
gt <- my_glmnet(total)
284
285
```
286
287
## Confusion matrix
288
```{r lasso plot, fig.height=3, fig.width=3}
289
290
entities <- c("DLBCL", "MCL", "FL", "MZL", "rLN")
291
292
tbl <- gt$confusion_table
293
class(tbl) = "matrix"
294
tbl = tbl / rowSums(tbl) # convert to probability estimates
295
tbl = tbl[entities, entities]
296
297
tbl %>% data.frame() %>% 
298
  rownames_to_column("truth") %>% 
299
  pivot_longer(cols = 2:ncol(.), names_to = "predicted", values_to = "Prop") %>% 
300
  ggplot(aes(x=truth, y=predicted, fill=Prop))+
301
  geom_tile()+
302
  scale_x_discrete(limits=rev(entities), expand = c(0,0), position = "top")+
303
  scale_y_discrete(limits=entities, expand = c(0,0), name="Truth")+
304
  geom_hline(yintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
305
  geom_vline(xintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
306
  scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Prop", limits=c(0,0.8))+
307
  xlab("Reference")+
308
  mytheme_1+
309
  coord_fixed()+
310
  theme(panel.border = element_blank(),
311
        legend.position = "right",
312
        legend.key.height = unit(0.3, "cm"),
313
        legend.key.width = unit(0.3, "cm"),
314
        legend.box.spacing = unit(0.1, "cm"),
315
        legend.box.margin = unit(c(0,-0.25,0,0.05), units = "cm"),
316
        plot.margin = unit(c(0.1,0.1,0.1,0.1), units = "cm"),
317
        axis.text.x = element_text(angle=45, hjust = 0),
318
        axis.ticks = element_blank(),
319
        axis.title.y = element_blank())
320
321
#ggsave(width = 5.5, height = 5.5, units = "cm", filename = "Figure3_p4.pdf")
322
323
```
324
325
# Patient characteristics
326
```{r Patient characteristics, fig.width=5.25}
327
328
df_char <- mat_complete %>% 
329
  rownames_to_column("PatientID") %>% 
330
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
331
  left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Entity, Age) %>% distinct, by="PatientID") %>% 
332
  mutate(Entity=factor(Entity, levels = c("DLBCL", "MCL", "FL", "MZL", "rLN")))
333
334
plot_status_ident1 <- 
335
  df_char %>% 
336
  filter(IdentI %in% c(1), Entity!="rLN") %>% 
337
  ggplot(aes(x=Status, y=Prop))+
338
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
339
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
340
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
341
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
342
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
343
  ggtitle(labels_cl[["1"]])+
344
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
345
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
346
  mytheme_1+
347
  theme_characteristics+
348
  labs(tag = "F")
349
350
plot_age_ident1 <- 
351
  df_char %>% 
352
  filter(IdentI %in% c(1), Entity!="rLN") %>% 
353
  ggplot(aes(x=Age, y=Prop))+
354
  geom_point(size=1.25, color="grey65", shape=21, stroke=0, aes(fill=Entity))+
355
  geom_smooth(method = "lm", color="black", size=0.25, linetype="dashed", alpha=0.25,  formula = 'y ~ x')+
356
  stat_cor(size=2.5)+
357
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
358
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
359
  ggtitle(labels_cl[["1"]])+
360
  mytheme_1+
361
  theme_characteristics+
362
  theme(axis.title.x = element_text(size=7, vjust = 4))+
363
  labs(tag = "G")
364
365
plot_status_ident9 <- 
366
  df_char %>% 
367
  filter(IdentI %in% c(9), Entity!="rLN") %>% 
368
  ggplot(aes(x=Status, y=Prop))+
369
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
370
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
371
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
372
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
373
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
374
  ggtitle(labels_cl[["9"]])+
375
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
376
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
377
  mytheme_1+
378
  theme_characteristics+
379
  labs(tag = "H")
380
381
plot_status_ident13 <- 
382
  df_char %>% 
383
  filter(IdentI %in% c(13), Entity!="rLN") %>% 
384
  ggplot(aes(x=Status, y=Prop))+
385
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
386
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
387
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(29),
388
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
389
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
390
  ggtitle(labels_cl[["13"]])+
391
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
392
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
393
  mytheme_1+
394
  theme_characteristics+
395
  labs(tag = "I")
396
397
plot_status_ident1+plot_age_ident1+plot_status_ident9+plot_status_ident13
398
399
#ggsave(width = 9.5, height = 9.65, units = "cm", filename = "Figure3_p5.pdf")
400
401
```
402
403
# Session info
404
```{r session info}
405
406
sessionInfo()
407
408
```