Figure 3
Read data, functions and packages
Proportions overview
Handle data and calculate p values
## Population IdentI
## 1 TPR 14
## 2 THNaive 1
## 3 THCM1 2
## 4 THCM2 9
## 5 TFH 6
## 6 TREGCM1 8
## 7 TREGCM2 13
## 8 TREGEM1 15
## 9 TREGEM2 11
## 10 TTOXNaive 12
## 11 TTOXEM1 3
## 12 TTOXEM2 16
## 13 TTOXEM3 5
## 14 TDN 19
mat_complete <- rbind(
df_facs %>%
left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>%
filter(`CITEseq`=="-") %>%
select(PatientID, Population, Prop=FACS),
df_freq %>%
mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>%
select(PatientID, Population, Prop=RNA)) %>%
filter(Population %in% df_pop$Population) %>%
left_join(., df_pop) %>%
select(-Population) %>%
pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>%
column_to_rownames("PatientID")
cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19)
names(labels_cl_parsed) <- as.character(cluster_order)
df_freqPlot <-
mat_complete %>%
rownames_to_column("PatientID") %>%
pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>%
add_entity() %>%
mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>%
mutate(IdentI=factor(IdentI, levels=cl_order)) %>%
mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>%
group_by(Entity, IdentI) %>%
mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5))
df_medianLines <- df_freqPlot %>%
filter(Entity=="rLN") %>%
group_by(IdentI, Label) %>%
summarise(MedianProp=median(Prop))
df_freqPlot_pvalues <-
df_freqPlot %>%
group_by(IdentI) %>%
wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>%
adjust_pvalue(method = "BH") %>%
select(IdentI, Entity=group2, p.adj, estimate) %>%
mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>%
mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>%
mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>%
mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA",
p.adj==0.05 ~ "0.05",
p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)),
p.adj==0.001 ~ "0.001",
p.adj < 0.001 ~ p.adj_s)) %>%
filter(!is.na(p.adj)) %>%
left_join(., df_freqPlot %>% select(IdentI, Label) %>% distinct) %>%
left_join(., data.frame(IdentI=factor(cl_order), height=c(52.5, 52.5, 21.5, 21.5, 22, 22, 20, 20, 26, 26, 80, 80, 21, 21)))
Plot
p <- list()
for(i in c(1:7)){
y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]]
ylim <- c(70,30,32,28,35,110,28)
p[[i]] <-
ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>%
mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])),
aes(y=Prop, x=Entity, fill=IdentI))+
geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp),
size=0.25, linetype="dashed", color="grey60")+
geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white",
outlier.alpha = 0, show.legend = F, size=0.25)+
ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25,
groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+
geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y),
inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+
scale_fill_manual(values = colors_umap_cl)+
scale_y_continuous(name="% of total T-cells", limits=c(0,ylim[i]))+
scale_x_discrete(expand = c(0.17,0.17))+
facet_wrap(~Label, ncol = 2, labeller = label_parsed)+
mytheme_1+
theme(axis.title.x = element_blank(),
strip.background = element_rect(color=NA),
plot.margin = unit(c(0,0,0,0), units = "cm"),
plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")),
panel.border = element_rect(size = 0.5),
axis.text.x = element_text(angle=45, hjust = 1))
if(i!=7){
p[[i]] <- p[[i]]+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
}
if(i!=4){
p[[i]] <- p[[i]]+
theme(axis.title.y = element_blank())
}
if(i==1){
p[[i]] <- p[[i]]+
labs(tag = "A")+
theme(plot.tag = element_text(vjust = -0.5))
}
}
plot_freq <- wrap_plots(p, ncol = 1)
plot_freq
Principal component analysis (PCA)
pca_seq <- prcomp(mat_complete, scale. = T, center = T)
p1 <-
pca_seq$x %>%
data.frame() %>%
rownames_to_column("PatientID") %>%
add_entity() %>%
ggplot(aes(x=PC1, y=-PC2, fill=Entity))+
geom_point(size=1.75, shape=21, stroke=0.25, color="white")+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
guides(fill=guide_legend(override.aes = list(size=2)))+
ylab("PC2")+
xlab("PC1")+
mytheme_1+
coord_cartesian(clip = "off")+
theme(legend.position = "top",
legend.title = element_blank(),
legend.spacing.x = unit("cm", x = 0.05),
legend.box.margin = unit(c(0,0,-0.35,0), "cm"),
plot.tag = element_text(vjust = -2.5),
plot.margin = unit(c(0,0.25,0,-0.25), units = "cm"),
plot.background = element_rect(fill = "transparent",
colour = NA_character_),
panel.border = element_rect(size=0.25),
legend.key.height = unit("cm", x = 0.36),
axis.title.x = element_text(margin = unit(c(-1,0,0,0), units = "cm")),
legend.key.width = unit("cm", x = 0.26))+
labs(tag = "B")
pc1 <-
pca_seq$rotation %>%
data.frame %>%
select(PC1) %>%
rownames_to_column("IdentI") %>%
top_n(4, abs(PC1)) %>%
arrange(-PC1)
p2 <- ggplot(pc1, aes(y=PC1, x=IdentI, fill=IdentI))+
geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
labels=unlist(labels_cl))+
scale_x_discrete(limits=pc1$IdentI, labels=labels_cl[pc1$IdentI] %>% unlist())+
geom_hline(yintercept = 0, size=0.25)+
scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC1")+
mytheme_1+
coord_cartesian(clip = "off")+
theme(axis.title.x = element_blank(),
plot.margin = unit(c(0.25,0,0,0), units = "cm"),
axis.text = element_text(size=6.5),
plot.tag = element_text(vjust = -2.5),
axis.text.x = element_text(angle=45, hjust = 1))+
labs(tag = "C")
pc2 <- pca_seq$rotation %>%
data.frame %>%
select(PC2) %>%
rownames_to_column("IdentI") %>%
top_n(4, abs(PC2)) %>%
arrange(PC2)
p3 <- ggplot(pc2, aes(y=-PC2, x=IdentI, fill=IdentI))+
geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
labels=unlist(labels_cl))+
scale_x_discrete(limits=pc2$IdentI, labels=labels_cl[pc2$IdentI] %>% unlist())+
geom_hline(yintercept = 0, size=0.25)+
scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC2")+
coord_cartesian(clip = "off")+
mytheme_1+
theme(axis.title.x = element_blank(),
plot.margin = unit(c(0,0,0.25,0), units = "cm"),
axis.text.x = element_text(angle=45, hjust = 1))+
labs(tag = "D")
p1+(p2/p3)+plot_layout(widths = c(1,0.5))
Lasso prediction
Dendrogram
# Create data frame
data <- data.frame(
level1="all",
level2=c("rLN",
"MZL",
"MCL",
"FL",
"DLBCL")
)
edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2)
edge_list=rbind(edges_level1_2)
vert <- data.frame(
name=unique(c(data$level1, data$level2))) %>%
mutate(label=c(NA, "rLN", "MZL", "MCL", "FL", "DLBCL"))
# Create graph object
mygraph_lasso <- graph_from_data_frame( edge_list ,vertices = vert)
# Plot dendrogramm
ggraph(mygraph_lasso, layout = 'tree', circular = FALSE)+
geom_edge_diagonal(strength = 1.4, edge_width=0.25)+
geom_node_point(shape=21, size=3.5, color="white", stroke=2, alpha=c(0,1,1,1,1,1),
fill=c(NA, brewer.pal(name = "Paired", 5)[c(5,4,3,2,1)]))+
coord_flip(clip = "off")+
scale_y_reverse()+
theme_void()+
theme(legend.position = "right",
plot.margin = margin(0.25,0.25,0.25,0, unit = "cm"),
plot.title = element_text(hjust=0.4, size=7, face = "bold"),)
Model
Confusion matrix
entities <- c("DLBCL", "MCL", "FL", "MZL", "rLN")
tbl <- gt$confusion_table
class(tbl) = "matrix"
tbl = tbl / rowSums(tbl) # convert to probability estimates
tbl = tbl[entities, entities]
tbl %>% data.frame() %>%
rownames_to_column("truth") %>%
pivot_longer(cols = 2:ncol(.), names_to = "predicted", values_to = "Prop") %>%
ggplot(aes(x=truth, y=predicted, fill=Prop))+
geom_tile()+
scale_x_discrete(limits=rev(entities), expand = c(0,0), position = "top")+
scale_y_discrete(limits=entities, expand = c(0,0), name="Truth")+
geom_hline(yintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
geom_vline(xintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Prop", limits=c(0,0.8))+
xlab("Reference")+
mytheme_1+
coord_fixed()+
theme(panel.border = element_blank(),
legend.position = "right",
legend.key.height = unit(0.3, "cm"),
legend.key.width = unit(0.3, "cm"),
legend.box.spacing = unit(0.1, "cm"),
legend.box.margin = unit(c(0,-0.25,0,0.05), units = "cm"),
plot.margin = unit(c(0.1,0.1,0.1,0.1), units = "cm"),
axis.text.x = element_text(angle=45, hjust = 0),
axis.ticks = element_blank(),
axis.title.y = element_blank())
Patient characteristics
df_char <- mat_complete %>%
rownames_to_column("PatientID") %>%
pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>%
left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Entity, Age) %>% distinct, by="PatientID") %>%
mutate(Entity=factor(Entity, levels = c("DLBCL", "MCL", "FL", "MZL", "rLN")))
plot_status_ident1 <-
df_char %>%
filter(IdentI %in% c(1), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["1"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "F")
plot_age_ident1 <-
df_char %>%
filter(IdentI %in% c(1), Entity!="rLN") %>%
ggplot(aes(x=Age, y=Prop))+
geom_point(size=1.25, color="grey65", shape=21, stroke=0, aes(fill=Entity))+
geom_smooth(method = "lm", color="black", size=0.25, linetype="dashed", alpha=0.25, formula = 'y ~ x')+
stat_cor(size=2.5)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
ggtitle(labels_cl[["1"]])+
mytheme_1+
theme_characteristics+
theme(axis.title.x = element_text(size=7, vjust = 4))+
labs(tag = "G")
plot_status_ident9 <-
df_char %>%
filter(IdentI %in% c(9), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["9"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "H")
plot_status_ident13 <-
df_char %>%
filter(IdentI %in% c(13), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(29),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["13"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "I")
plot_status_ident1+plot_age_ident1+plot_status_ident9+plot_status_ident13
Session info
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Rocky Linux 8.8 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /g/easybuild/x86_64/Rocky/8/haswell/software/FlexiBLAS/3.0.4-GCC-11.2.0/lib64/libflexiblas.so.3.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pamr_1.56.1 cluster_2.1.2 glmnet_4.1-2 Matrix_1.5-1 immunarch_0.7.0 data.table_1.14.2
## [7] dtplyr_1.2.2 rmdformats_1.0.4 ggplotify_0.1.0 ggraph_2.0.6 igraph_1.3.5 ggrastr_1.0.1
## [13] ggtext_0.1.1 ggalluvial_0.12.3 maxstat_0.7-25 survival_3.2-13 survminer_0.4.9 ggridges_0.5.3
## [19] cowplot_1.1.1 R.utils_2.11.0 R.oo_1.24.0 R.methodsS3_1.8.1 readxl_1.4.1 caret_6.0-90
## [25] lattice_0.20-45 patchwork_1.1.2 rstatix_0.7.0 ggpubr_0.4.0 ggrepel_0.9.1 matrixStats_0.61.0
## [31] scales_1.2.1 RColorBrewer_1.1-3 viridis_0.6.2 viridisLite_0.4.1 forcats_0.5.1 stringr_1.4.1
## [37] dplyr_1.0.10 purrr_0.3.4 readr_2.1.2 tidyr_1.2.1 tibble_3.1.8 ggplot2_3.3.6
## [43] tidyverse_1.3.1 SeuratObject_4.0.4 Seurat_4.1.0 knitr_1.40
##
## loaded via a namespace (and not attached):
## [1] scattermore_0.8 prabclus_2.3-2 ModelMetrics_1.2.2.2 exactRankTests_0.8-34 ragg_1.2.1
## [6] bit64_4.0.5 irlba_2.3.5 rpart_4.1-15 doParallel_1.0.17 generics_0.1.3
## [11] RANN_2.6.1 future_1.23.0 bit_4.0.4 tzdb_0.3.0 rlist_0.4.6.2
## [16] spatstat.data_2.1-2 xml2_1.3.2 lubridate_1.8.0 httpuv_1.6.6 assertthat_0.2.1
## [21] gower_0.2.2 xfun_0.33 hms_1.1.2 jquerylib_0.1.4 evaluate_0.16
## [26] promises_1.2.0.1 DEoptimR_1.0-11 fansi_1.0.3 dbplyr_2.1.1 km.ci_0.5-2
## [31] DBI_1.1.2 htmlwidgets_1.5.4 spatstat.geom_2.3-2 stringdist_0.9.8 stats4_4.1.2
## [36] ellipsis_0.3.2 backports_1.4.1 bookdown_0.29 deldir_1.0-6 vctrs_0.4.2
## [41] ROCR_1.0-11 abind_1.4-5 cachem_1.0.6 withr_2.5.0 ggforce_0.4.0
## [46] robustbase_0.95-0 vroom_1.5.7 sctransform_0.3.3 mclust_5.4.10 goftest_1.2-3
## [51] ape_5.6-2 lazyeval_0.2.2 crayon_1.5.2 labeling_0.4.2 recipes_0.1.17
## [56] pkgconfig_2.0.3 tweenr_2.0.2 nlme_3.1-153 vipor_0.4.5 nnet_7.3-16
## [61] rlang_1.0.6 globals_0.14.0 diptest_0.76-0 lifecycle_1.0.2 miniUI_0.1.1.1
## [66] modelr_0.1.8 cellranger_1.1.0 polyclip_1.10-0 lmtest_0.9-39 phangorn_2.10.0
## [71] ggseqlogo_0.1 KMsurv_0.1-5 carData_3.0-5 zoo_1.8-9 reprex_2.0.1
## [76] beeswarm_0.4.0 GlobalOptions_0.1.2 pheatmap_1.0.12 png_0.1-7 KernSmooth_2.23-20
## [81] pROC_1.18.0 shape_1.4.6 parallelly_1.30.0 spatstat.random_2.1-0 gridGraphics_0.5-1
## [86] ggsignif_0.6.3 magrittr_2.0.3 plyr_1.8.7 ica_1.0-2 compiler_4.1.2
## [91] factoextra_1.0.7 fitdistrplus_1.1-6 cli_3.4.1 listenv_0.8.0 pbapply_1.5-0
## [96] MASS_7.3-54 mgcv_1.8-38 tidyselect_1.1.2 stringi_1.7.8 textshaping_0.3.6
## [101] highr_0.9 yaml_2.3.5 survMisc_0.5.5 sass_0.4.2 fastmatch_1.1-3
## [106] tools_4.1.2 future.apply_1.8.1 parallel_4.1.2 circlize_0.4.15 rstudioapi_0.13
## [111] uuid_1.1-0 foreach_1.5.2 gridExtra_2.3 prodlim_2019.11.13 farver_2.1.1
## [116] Rtsne_0.16 digest_0.6.29 shiny_1.7.2 lava_1.6.10 quadprog_1.5-8
## [121] fpc_2.2-9 Rcpp_1.0.9 gridtext_0.1.4 car_3.1-0 broom_1.0.1
## [126] later_1.3.0 RcppAnnoy_0.0.19 httr_1.4.2 kernlab_0.9-31 colorspace_2.0-3
## [131] rvest_1.0.2 fs_1.5.2 tensor_1.5 reticulate_1.24 splines_4.1.2
## [136] uwot_0.1.11 yulab.utils_0.0.4 spatstat.utils_2.3-0 graphlayouts_0.8.2 xgboost_1.4.1.1
## [141] shinythemes_1.2.0 flexmix_2.3-18 systemfonts_1.0.4 plotly_4.10.0 xtable_1.8-4
## [146] jsonlite_1.8.0 tidygraph_1.2.2 timeDate_3043.102 UpSetR_1.4.0 modeltools_0.2-23
## [151] ipred_0.9-12 R6_2.5.1 pillar_1.8.1 htmltools_0.5.3 mime_0.12
## [156] glue_1.6.1 fastmap_1.1.0 class_7.3-19 codetools_0.2-18 mvtnorm_1.1-3
## [161] utf8_1.2.2 bslib_0.4.0 spatstat.sparse_2.1-0 ggbeeswarm_0.6.0 leiden_0.3.9
## [166] rmarkdown_2.17 munsell_0.5.0 iterators_1.0.14 haven_2.4.3 reshape2_1.4.4
## [171] gtable_0.3.1 spatstat.core_2.4-0