--- a +++ b/figures/Figure3.Rmd @@ -0,0 +1,408 @@ +--- +title: "Figure 3" +author: Tobias Roider +date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`" +output: + rmdformats::readthedown: + +editor_options: + chunk_output_type: console +--- + +```{r options, include=FALSE, warning = FALSE} + +library(knitr) +opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE, + dpi = 100, cache = FALSE, warning = FALSE) +opts_knit$set(root.dir = "../") +options(bitmapType = "cairo") + +``` + +# Read data, functions and packages +```{r read data} + +source("R/ReadPackages.R") +source("R/Functions.R") +source("R/ReadData.R") +source("R/ThemesColors.R") +source("R/Helpers.R") + +``` + +# Proportions overview +## Handle data and calculate p values +```{r create matrix} + +df_pop + +mat_complete <- rbind( + df_facs %>% + left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>% + filter(`CITEseq`=="-") %>% + select(PatientID, Population, Prop=FACS), + df_freq %>% + mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>% + select(PatientID, Population, Prop=RNA)) %>% + filter(Population %in% df_pop$Population) %>% + left_join(., df_pop) %>% + select(-Population) %>% + pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>% + column_to_rownames("PatientID") + +cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19) +names(labels_cl_parsed) <- as.character(cluster_order) + +df_freqPlot <- + mat_complete %>% + rownames_to_column("PatientID") %>% + pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% + add_entity() %>% + mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% + mutate(IdentI=factor(IdentI, levels=cl_order)) %>% + mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>% + group_by(Entity, IdentI) %>% + mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5)) + +df_medianLines <- df_freqPlot %>% + filter(Entity=="rLN") %>% + group_by(IdentI, Label) %>% + summarise(MedianProp=median(Prop)) + +df_freqPlot_pvalues <- + df_freqPlot %>% + group_by(IdentI) %>% + wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>% + adjust_pvalue(method = "BH") %>% + select(IdentI, Entity=group2, p.adj, estimate) %>% + mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% + mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>% + mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>% + mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA", + p.adj==0.05 ~ "0.05", + p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)), + p.adj==0.001 ~ "0.001", + p.adj < 0.001 ~ p.adj_s)) %>% + filter(!is.na(p.adj)) %>% + left_join(., df_freqPlot %>% select(IdentI, Label) %>% distinct) %>% + left_join(., data.frame(IdentI=factor(cl_order), height=c(52.5, 52.5, 21.5, 21.5, 22, 22, 20, 20, 26, 26, 80, 80, 21, 21))) + +``` + +## Plot +```{r freq overview, fig.height=13, fig.width=5.2} + +p <- list() + +for(i in c(1:7)){ + + y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]] + ylim <- c(70,30,32,28,35,110,28) + + p[[i]] <- + ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>% + mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])), + aes(y=Prop, x=Entity, fill=IdentI))+ + geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp), + size=0.25, linetype="dashed", color="grey60")+ + geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white", + outlier.alpha = 0, show.legend = F, size=0.25)+ + ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25, + groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+ + geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y), + inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+ + scale_fill_manual(values = colors_umap_cl)+ + scale_y_continuous(name="% of total T-cells", limits=c(0,ylim[i]))+ + scale_x_discrete(expand = c(0.17,0.17))+ + facet_wrap(~Label, ncol = 2, labeller = label_parsed)+ + mytheme_1+ + theme(axis.title.x = element_blank(), + strip.background = element_rect(color=NA), + plot.margin = unit(c(0,0,0,0), units = "cm"), + plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")), + panel.border = element_rect(size = 0.5), + axis.text.x = element_text(angle=45, hjust = 1)) + + if(i!=7){ + p[[i]] <- p[[i]]+ + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank()) + } + + if(i!=4){ + p[[i]] <- p[[i]]+ + theme(axis.title.y = element_blank()) + } + + if(i==1){ + p[[i]] <- p[[i]]+ + labs(tag = "A")+ + theme(plot.tag = element_text(vjust = -0.5)) +} + + +} + +plot_freq <- wrap_plots(p, ncol = 1) +plot_freq +ggsave(width = 9, height = 21, units = "cm", filename = "Figure3_p1.pdf") + +``` + +# Principal component analysis (PCA) +```{r pca, fig.width=6, fig.height=4} + +pca_seq <- prcomp(mat_complete, scale. = T, center = T) + +p1 <- + pca_seq$x %>% + data.frame() %>% + rownames_to_column("PatientID") %>% + add_entity() %>% + ggplot(aes(x=PC1, y=-PC2, fill=Entity))+ + geom_point(size=1.75, shape=21, stroke=0.25, color="white")+ + scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+ + guides(fill=guide_legend(override.aes = list(size=2)))+ + ylab("PC2")+ + xlab("PC1")+ + mytheme_1+ + coord_cartesian(clip = "off")+ + theme(legend.position = "top", + legend.title = element_blank(), + legend.spacing.x = unit("cm", x = 0.05), + legend.box.margin = unit(c(0,0,-0.35,0), "cm"), + plot.tag = element_text(vjust = -2.5), + plot.margin = unit(c(0,0.25,0,-0.25), units = "cm"), + plot.background = element_rect(fill = "transparent", + colour = NA_character_), + panel.border = element_rect(size=0.25), + legend.key.height = unit("cm", x = 0.36), + axis.title.x = element_text(margin = unit(c(-1,0,0,0), units = "cm")), + legend.key.width = unit("cm", x = 0.26))+ + labs(tag = "B") + +pc1 <- + pca_seq$rotation %>% + data.frame %>% + select(PC1) %>% + rownames_to_column("IdentI") %>% + top_n(4, abs(PC1)) %>% + arrange(-PC1) + +p2 <- ggplot(pc1, aes(y=PC1, x=IdentI, fill=IdentI))+ + geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+ + scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order), + labels=unlist(labels_cl))+ + scale_x_discrete(limits=pc1$IdentI, labels=labels_cl[pc1$IdentI] %>% unlist())+ + geom_hline(yintercept = 0, size=0.25)+ + scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC1")+ + mytheme_1+ + coord_cartesian(clip = "off")+ + theme(axis.title.x = element_blank(), + plot.margin = unit(c(0.25,0,0,0), units = "cm"), + axis.text = element_text(size=6.5), + plot.tag = element_text(vjust = -2.5), + axis.text.x = element_text(angle=45, hjust = 1))+ + labs(tag = "C") + +pc2 <- pca_seq$rotation %>% + data.frame %>% + select(PC2) %>% + rownames_to_column("IdentI") %>% + top_n(4, abs(PC2)) %>% + arrange(PC2) + +p3 <- ggplot(pc2, aes(y=-PC2, x=IdentI, fill=IdentI))+ + geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+ + scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order), + labels=unlist(labels_cl))+ + scale_x_discrete(limits=pc2$IdentI, labels=labels_cl[pc2$IdentI] %>% unlist())+ + geom_hline(yintercept = 0, size=0.25)+ + scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC2")+ + coord_cartesian(clip = "off")+ + mytheme_1+ + theme(axis.title.x = element_blank(), + plot.margin = unit(c(0,0,0.25,0), units = "cm"), + axis.text.x = element_text(angle=45, hjust = 1))+ + labs(tag = "D") + +p1+(p2/p3)+plot_layout(widths = c(1,0.5)) +#ggsave(width = 9.75, height = 7.5, units = "cm", filename = "Figure3_p2.pdf") + +``` + +# Lasso prediction +## Dendrogram +```{r dendrogram, fig.width=3, fig.height=3} + +# Create data frame +data <- data.frame( + level1="all", + level2=c("rLN", + "MZL", + "MCL", + "FL", + "DLBCL") +) + +edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2) +edge_list=rbind(edges_level1_2) +vert <- data.frame( + name=unique(c(data$level1, data$level2))) %>% + mutate(label=c(NA, "rLN", "MZL", "MCL", "FL", "DLBCL")) + +# Create graph object +mygraph_lasso <- graph_from_data_frame( edge_list ,vertices = vert) + +# Plot dendrogramm +ggraph(mygraph_lasso, layout = 'tree', circular = FALSE)+ + geom_edge_diagonal(strength = 1.4, edge_width=0.25)+ + geom_node_point(shape=21, size=3.5, color="white", stroke=2, alpha=c(0,1,1,1,1,1), + fill=c(NA, brewer.pal(name = "Paired", 5)[c(5,4,3,2,1)]))+ + coord_flip(clip = "off")+ + scale_y_reverse()+ + theme_void()+ + theme(legend.position = "right", + plot.margin = margin(0.25,0.25,0.25,0, unit = "cm"), + plot.title = element_text(hjust=0.4, size=7, face = "bold"),) + +#ggsave(width = 3.5, height = 3.4, units = "cm", filename = "Figure3_p3.pdf") + +``` + +## Model +```{r lasso model, fig.height=3, fig.width=3} + +total <- mat_complete %>% + rownames_to_column("PatientID") %>% + left_join(., df_meta %>% select(PatientID, Tcells)) %>% + add_entity() %>% + mutate_if(is.numeric, ~./100) + +cell_types <- total %>% select(-Entity, -PatientID) %>% colnames() +gt <- my_glmnet(total) + +``` + +## Confusion matrix +```{r lasso plot, fig.height=3, fig.width=3} + +entities <- c("DLBCL", "MCL", "FL", "MZL", "rLN") + +tbl <- gt$confusion_table +class(tbl) = "matrix" +tbl = tbl / rowSums(tbl) # convert to probability estimates +tbl = tbl[entities, entities] + +tbl %>% data.frame() %>% + rownames_to_column("truth") %>% + pivot_longer(cols = 2:ncol(.), names_to = "predicted", values_to = "Prop") %>% + ggplot(aes(x=truth, y=predicted, fill=Prop))+ + geom_tile()+ + scale_x_discrete(limits=rev(entities), expand = c(0,0), position = "top")+ + scale_y_discrete(limits=entities, expand = c(0,0), name="Truth")+ + geom_hline(yintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+ + geom_vline(xintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+ + scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Prop", limits=c(0,0.8))+ + xlab("Reference")+ + mytheme_1+ + coord_fixed()+ + theme(panel.border = element_blank(), + legend.position = "right", + legend.key.height = unit(0.3, "cm"), + legend.key.width = unit(0.3, "cm"), + legend.box.spacing = unit(0.1, "cm"), + legend.box.margin = unit(c(0,-0.25,0,0.05), units = "cm"), + plot.margin = unit(c(0.1,0.1,0.1,0.1), units = "cm"), + axis.text.x = element_text(angle=45, hjust = 0), + axis.ticks = element_blank(), + axis.title.y = element_blank()) + +#ggsave(width = 5.5, height = 5.5, units = "cm", filename = "Figure3_p4.pdf") + +``` + +# Patient characteristics +```{r Patient characteristics, fig.width=5.25} + +df_char <- mat_complete %>% + rownames_to_column("PatientID") %>% + pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% + left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Entity, Age) %>% distinct, by="PatientID") %>% + mutate(Entity=factor(Entity, levels = c("DLBCL", "MCL", "FL", "MZL", "rLN"))) + +plot_status_ident1 <- + df_char %>% + filter(IdentI %in% c(1), Entity!="rLN") %>% + ggplot(aes(x=Status, y=Prop))+ + geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6), + outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+ + stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28), + size=2.5, tip.length = 0.02, bracket.size = 0.25)+ + scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+ + ggtitle(labels_cl[["1"]])+ + scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+ + scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+ + mytheme_1+ + theme_characteristics+ + labs(tag = "F") + +plot_age_ident1 <- + df_char %>% + filter(IdentI %in% c(1), Entity!="rLN") %>% + ggplot(aes(x=Age, y=Prop))+ + geom_point(size=1.25, color="grey65", shape=21, stroke=0, aes(fill=Entity))+ + geom_smooth(method = "lm", color="black", size=0.25, linetype="dashed", alpha=0.25, formula = 'y ~ x')+ + stat_cor(size=2.5)+ + scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+ + scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+ + ggtitle(labels_cl[["1"]])+ + mytheme_1+ + theme_characteristics+ + theme(axis.title.x = element_text(size=7, vjust = 4))+ + labs(tag = "G") + +plot_status_ident9 <- + df_char %>% + filter(IdentI %in% c(9), Entity!="rLN") %>% + ggplot(aes(x=Status, y=Prop))+ + geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6), + outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+ + stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28), + size=2.5, tip.length = 0.02, bracket.size = 0.25)+ + scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+ + ggtitle(labels_cl[["9"]])+ + scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+ + scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+ + mytheme_1+ + theme_characteristics+ + labs(tag = "H") + +plot_status_ident13 <- + df_char %>% + filter(IdentI %in% c(13), Entity!="rLN") %>% + ggplot(aes(x=Status, y=Prop))+ + geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6), + outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+ + stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(29), + size=2.5, tip.length = 0.02, bracket.size = 0.25)+ + scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+ + ggtitle(labels_cl[["13"]])+ + scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+ + scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+ + mytheme_1+ + theme_characteristics+ + labs(tag = "I") + +plot_status_ident1+plot_age_ident1+plot_status_ident9+plot_status_ident13 + +#ggsave(width = 9.5, height = 9.65, units = "cm", filename = "Figure3_p5.pdf") + +``` + +# Session info +```{r session info} + +sessionInfo() + +```