409 lines (343 with data), 15.0 kB
---
title: "Figure 3"
author: Tobias Roider
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
output:
rmdformats::readthedown:
editor_options:
chunk_output_type: console
---
```{r options, include=FALSE, warning = FALSE}
library(knitr)
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
dpi = 100, cache = FALSE, warning = FALSE)
opts_knit$set(root.dir = "../")
options(bitmapType = "cairo")
```
# Read data, functions and packages
```{r read data}
source("R/ReadPackages.R")
source("R/Functions.R")
source("R/ReadData.R")
source("R/ThemesColors.R")
source("R/Helpers.R")
```
# Proportions overview
## Handle data and calculate p values
```{r create matrix}
df_pop
mat_complete <- rbind(
df_facs %>%
left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>%
filter(`CITEseq`=="-") %>%
select(PatientID, Population, Prop=FACS),
df_freq %>%
mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>%
select(PatientID, Population, Prop=RNA)) %>%
filter(Population %in% df_pop$Population) %>%
left_join(., df_pop) %>%
select(-Population) %>%
pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>%
column_to_rownames("PatientID")
cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19)
names(labels_cl_parsed) <- as.character(cluster_order)
df_freqPlot <-
mat_complete %>%
rownames_to_column("PatientID") %>%
pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>%
add_entity() %>%
mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>%
mutate(IdentI=factor(IdentI, levels=cl_order)) %>%
mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>%
group_by(Entity, IdentI) %>%
mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5))
df_medianLines <- df_freqPlot %>%
filter(Entity=="rLN") %>%
group_by(IdentI, Label) %>%
summarise(MedianProp=median(Prop))
df_freqPlot_pvalues <-
df_freqPlot %>%
group_by(IdentI) %>%
wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>%
adjust_pvalue(method = "BH") %>%
select(IdentI, Entity=group2, p.adj, estimate) %>%
mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>%
mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>%
mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>%
mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA",
p.adj==0.05 ~ "0.05",
p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)),
p.adj==0.001 ~ "0.001",
p.adj < 0.001 ~ p.adj_s)) %>%
filter(!is.na(p.adj)) %>%
left_join(., df_freqPlot %>% select(IdentI, Label) %>% distinct) %>%
left_join(., data.frame(IdentI=factor(cl_order), height=c(52.5, 52.5, 21.5, 21.5, 22, 22, 20, 20, 26, 26, 80, 80, 21, 21)))
```
## Plot
```{r freq overview, fig.height=13, fig.width=5.2}
p <- list()
for(i in c(1:7)){
y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]]
ylim <- c(70,30,32,28,35,110,28)
p[[i]] <-
ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>%
mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])),
aes(y=Prop, x=Entity, fill=IdentI))+
geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp),
size=0.25, linetype="dashed", color="grey60")+
geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white",
outlier.alpha = 0, show.legend = F, size=0.25)+
ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25,
groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+
geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y),
inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+
scale_fill_manual(values = colors_umap_cl)+
scale_y_continuous(name="% of total T-cells", limits=c(0,ylim[i]))+
scale_x_discrete(expand = c(0.17,0.17))+
facet_wrap(~Label, ncol = 2, labeller = label_parsed)+
mytheme_1+
theme(axis.title.x = element_blank(),
strip.background = element_rect(color=NA),
plot.margin = unit(c(0,0,0,0), units = "cm"),
plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")),
panel.border = element_rect(size = 0.5),
axis.text.x = element_text(angle=45, hjust = 1))
if(i!=7){
p[[i]] <- p[[i]]+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
}
if(i!=4){
p[[i]] <- p[[i]]+
theme(axis.title.y = element_blank())
}
if(i==1){
p[[i]] <- p[[i]]+
labs(tag = "A")+
theme(plot.tag = element_text(vjust = -0.5))
}
}
plot_freq <- wrap_plots(p, ncol = 1)
plot_freq
ggsave(width = 9, height = 21, units = "cm", filename = "Figure3_p1.pdf")
```
# Principal component analysis (PCA)
```{r pca, fig.width=6, fig.height=4}
pca_seq <- prcomp(mat_complete, scale. = T, center = T)
p1 <-
pca_seq$x %>%
data.frame() %>%
rownames_to_column("PatientID") %>%
add_entity() %>%
ggplot(aes(x=PC1, y=-PC2, fill=Entity))+
geom_point(size=1.75, shape=21, stroke=0.25, color="white")+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
guides(fill=guide_legend(override.aes = list(size=2)))+
ylab("PC2")+
xlab("PC1")+
mytheme_1+
coord_cartesian(clip = "off")+
theme(legend.position = "top",
legend.title = element_blank(),
legend.spacing.x = unit("cm", x = 0.05),
legend.box.margin = unit(c(0,0,-0.35,0), "cm"),
plot.tag = element_text(vjust = -2.5),
plot.margin = unit(c(0,0.25,0,-0.25), units = "cm"),
plot.background = element_rect(fill = "transparent",
colour = NA_character_),
panel.border = element_rect(size=0.25),
legend.key.height = unit("cm", x = 0.36),
axis.title.x = element_text(margin = unit(c(-1,0,0,0), units = "cm")),
legend.key.width = unit("cm", x = 0.26))+
labs(tag = "B")
pc1 <-
pca_seq$rotation %>%
data.frame %>%
select(PC1) %>%
rownames_to_column("IdentI") %>%
top_n(4, abs(PC1)) %>%
arrange(-PC1)
p2 <- ggplot(pc1, aes(y=PC1, x=IdentI, fill=IdentI))+
geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
labels=unlist(labels_cl))+
scale_x_discrete(limits=pc1$IdentI, labels=labels_cl[pc1$IdentI] %>% unlist())+
geom_hline(yintercept = 0, size=0.25)+
scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC1")+
mytheme_1+
coord_cartesian(clip = "off")+
theme(axis.title.x = element_blank(),
plot.margin = unit(c(0.25,0,0,0), units = "cm"),
axis.text = element_text(size=6.5),
plot.tag = element_text(vjust = -2.5),
axis.text.x = element_text(angle=45, hjust = 1))+
labs(tag = "C")
pc2 <- pca_seq$rotation %>%
data.frame %>%
select(PC2) %>%
rownames_to_column("IdentI") %>%
top_n(4, abs(PC2)) %>%
arrange(PC2)
p3 <- ggplot(pc2, aes(y=-PC2, x=IdentI, fill=IdentI))+
geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
labels=unlist(labels_cl))+
scale_x_discrete(limits=pc2$IdentI, labels=labels_cl[pc2$IdentI] %>% unlist())+
geom_hline(yintercept = 0, size=0.25)+
scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC2")+
coord_cartesian(clip = "off")+
mytheme_1+
theme(axis.title.x = element_blank(),
plot.margin = unit(c(0,0,0.25,0), units = "cm"),
axis.text.x = element_text(angle=45, hjust = 1))+
labs(tag = "D")
p1+(p2/p3)+plot_layout(widths = c(1,0.5))
#ggsave(width = 9.75, height = 7.5, units = "cm", filename = "Figure3_p2.pdf")
```
# Lasso prediction
## Dendrogram
```{r dendrogram, fig.width=3, fig.height=3}
# Create data frame
data <- data.frame(
level1="all",
level2=c("rLN",
"MZL",
"MCL",
"FL",
"DLBCL")
)
edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2)
edge_list=rbind(edges_level1_2)
vert <- data.frame(
name=unique(c(data$level1, data$level2))) %>%
mutate(label=c(NA, "rLN", "MZL", "MCL", "FL", "DLBCL"))
# Create graph object
mygraph_lasso <- graph_from_data_frame( edge_list ,vertices = vert)
# Plot dendrogramm
ggraph(mygraph_lasso, layout = 'tree', circular = FALSE)+
geom_edge_diagonal(strength = 1.4, edge_width=0.25)+
geom_node_point(shape=21, size=3.5, color="white", stroke=2, alpha=c(0,1,1,1,1,1),
fill=c(NA, brewer.pal(name = "Paired", 5)[c(5,4,3,2,1)]))+
coord_flip(clip = "off")+
scale_y_reverse()+
theme_void()+
theme(legend.position = "right",
plot.margin = margin(0.25,0.25,0.25,0, unit = "cm"),
plot.title = element_text(hjust=0.4, size=7, face = "bold"),)
#ggsave(width = 3.5, height = 3.4, units = "cm", filename = "Figure3_p3.pdf")
```
## Model
```{r lasso model, fig.height=3, fig.width=3}
total <- mat_complete %>%
rownames_to_column("PatientID") %>%
left_join(., df_meta %>% select(PatientID, Tcells)) %>%
add_entity() %>%
mutate_if(is.numeric, ~./100)
cell_types <- total %>% select(-Entity, -PatientID) %>% colnames()
gt <- my_glmnet(total)
```
## Confusion matrix
```{r lasso plot, fig.height=3, fig.width=3}
entities <- c("DLBCL", "MCL", "FL", "MZL", "rLN")
tbl <- gt$confusion_table
class(tbl) = "matrix"
tbl = tbl / rowSums(tbl) # convert to probability estimates
tbl = tbl[entities, entities]
tbl %>% data.frame() %>%
rownames_to_column("truth") %>%
pivot_longer(cols = 2:ncol(.), names_to = "predicted", values_to = "Prop") %>%
ggplot(aes(x=truth, y=predicted, fill=Prop))+
geom_tile()+
scale_x_discrete(limits=rev(entities), expand = c(0,0), position = "top")+
scale_y_discrete(limits=entities, expand = c(0,0), name="Truth")+
geom_hline(yintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
geom_vline(xintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Prop", limits=c(0,0.8))+
xlab("Reference")+
mytheme_1+
coord_fixed()+
theme(panel.border = element_blank(),
legend.position = "right",
legend.key.height = unit(0.3, "cm"),
legend.key.width = unit(0.3, "cm"),
legend.box.spacing = unit(0.1, "cm"),
legend.box.margin = unit(c(0,-0.25,0,0.05), units = "cm"),
plot.margin = unit(c(0.1,0.1,0.1,0.1), units = "cm"),
axis.text.x = element_text(angle=45, hjust = 0),
axis.ticks = element_blank(),
axis.title.y = element_blank())
#ggsave(width = 5.5, height = 5.5, units = "cm", filename = "Figure3_p4.pdf")
```
# Patient characteristics
```{r Patient characteristics, fig.width=5.25}
df_char <- mat_complete %>%
rownames_to_column("PatientID") %>%
pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>%
left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Entity, Age) %>% distinct, by="PatientID") %>%
mutate(Entity=factor(Entity, levels = c("DLBCL", "MCL", "FL", "MZL", "rLN")))
plot_status_ident1 <-
df_char %>%
filter(IdentI %in% c(1), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["1"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "F")
plot_age_ident1 <-
df_char %>%
filter(IdentI %in% c(1), Entity!="rLN") %>%
ggplot(aes(x=Age, y=Prop))+
geom_point(size=1.25, color="grey65", shape=21, stroke=0, aes(fill=Entity))+
geom_smooth(method = "lm", color="black", size=0.25, linetype="dashed", alpha=0.25, formula = 'y ~ x')+
stat_cor(size=2.5)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
ggtitle(labels_cl[["1"]])+
mytheme_1+
theme_characteristics+
theme(axis.title.x = element_text(size=7, vjust = 4))+
labs(tag = "G")
plot_status_ident9 <-
df_char %>%
filter(IdentI %in% c(9), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["9"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "H")
plot_status_ident13 <-
df_char %>%
filter(IdentI %in% c(13), Entity!="rLN") %>%
ggplot(aes(x=Status, y=Prop))+
geom_boxplot(width=0.35, size=0.25, aes(fill=Entity), position = position_dodge(width = 0.6),
outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(29),
size=2.5, tip.length = 0.02, bracket.size = 0.25)+
scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
ggtitle(labels_cl[["13"]])+
scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
mytheme_1+
theme_characteristics+
labs(tag = "I")
plot_status_ident1+plot_age_ident1+plot_status_ident9+plot_status_ident13
#ggsave(width = 9.5, height = 9.65, units = "cm", filename = "Figure3_p5.pdf")
```
# Session info
```{r session info}
sessionInfo()
```