[9905a0]: / figures / Figure3.Rmd

Download this file

409 lines (343 with data), 15.0 kB

---
title: "Figure 3"
author: Tobias Roider
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
output: 
  rmdformats::readthedown: 
  
editor_options: 
  chunk_output_type: console
---

```{r options, include=FALSE, warning = FALSE}

library(knitr)
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE,
               dpi = 100, cache = FALSE, warning = FALSE)
opts_knit$set(root.dir = "../")
options(bitmapType = "cairo")

```

# Read data, functions and packages
```{r read data}

source("R/ReadPackages.R")
source("R/Functions.R")
source("R/ReadData.R")
source("R/ThemesColors.R")
source("R/Helpers.R")

```

# Proportions overview
## Handle data and calculate p values
```{r create matrix}

df_pop 

mat_complete <- rbind(
  df_facs %>% 
    left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>% 
    filter(`CITEseq`=="-") %>% 
    select(PatientID, Population, Prop=FACS),
  df_freq %>% 
    mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>% 
    select(PatientID, Population, Prop=RNA)) %>% 
  filter(Population %in% df_pop$Population) %>% 
  left_join(., df_pop) %>% 
  select(-Population) %>% 
  pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>% 
  column_to_rownames("PatientID") 

cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19)
names(labels_cl_parsed) <- as.character(cluster_order)

df_freqPlot <- 
  mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
  add_entity() %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  mutate(IdentI=factor(IdentI, levels=cl_order)) %>% 
  mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>% 
  group_by(Entity, IdentI) %>% 
  mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5))

df_medianLines <- df_freqPlot %>% 
  filter(Entity=="rLN") %>% 
  group_by(IdentI, Label) %>% 
  summarise(MedianProp=median(Prop))

df_freqPlot_pvalues <- 
  df_freqPlot %>% 
  group_by(IdentI) %>% 
  wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>% 
  adjust_pvalue(method = "BH") %>% 
  select(IdentI, Entity=group2, p.adj, estimate) %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>% 
  mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>% 
  mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA",
                           p.adj==0.05 ~ "0.05",
                           p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)),
                           p.adj==0.001 ~ "0.001",
                           p.adj < 0.001 ~ p.adj_s)) %>% 
  filter(!is.na(p.adj)) %>% 
  left_join(., df_freqPlot %>% select(IdentI,  Label) %>% distinct) %>% 
  left_join(., data.frame(IdentI=factor(cl_order), height=c(52.5, 52.5, 21.5, 21.5, 22, 22, 20, 20, 26, 26, 80, 80, 21, 21)))

```

## Plot
```{r freq overview, fig.height=13, fig.width=5.2}

p <- list()

for(i in c(1:7)){
  
  y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]]
  ylim <- c(70,30,32,28,35,110,28)
  
  p[[i]] <- 
    ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>% 
             mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])), 
             aes(y=Prop, x=Entity, fill=IdentI))+
    geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp),
               size=0.25, linetype="dashed", color="grey60")+
    geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white", 
                 outlier.alpha = 0, show.legend = F, size=0.25)+
    ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25, 
                              groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+
    geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y), 
              inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+
    scale_fill_manual(values = colors_umap_cl)+
    scale_y_continuous(name="% of total T-cells", limits=c(0,ylim[i]))+
    scale_x_discrete(expand = c(0.17,0.17))+
    facet_wrap(~Label, ncol = 2, labeller = label_parsed)+
    mytheme_1+
    theme(axis.title.x = element_blank(),
          strip.background = element_rect(color=NA),
          plot.margin = unit(c(0,0,0,0), units = "cm"),
          plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")),
          panel.border = element_rect(size = 0.5),
          axis.text.x = element_text(angle=45, hjust = 1))
  
  if(i!=7){
    p[[i]] <- p[[i]]+
      theme(axis.text.x = element_blank(),
            axis.ticks.x = element_blank())
  }
  
  if(i!=4){
    p[[i]] <- p[[i]]+
      theme(axis.title.y = element_blank())
    }
  
  if(i==1){
    p[[i]] <- p[[i]]+
      labs(tag = "A")+
      theme(plot.tag = element_text(vjust = -0.5))
}
  
  
}

plot_freq <- wrap_plots(p, ncol = 1)
plot_freq
ggsave(width = 9, height = 21, units = "cm", filename = "Figure3_p1.pdf")

```

# Principal component analysis (PCA)
```{r pca, fig.width=6, fig.height=4}

pca_seq <- prcomp(mat_complete, scale. = T, center = T)

p1 <- 
  pca_seq$x %>% 
  data.frame() %>% 
  rownames_to_column("PatientID") %>% 
  add_entity() %>% 
  ggplot(aes(x=PC1, y=-PC2, fill=Entity))+
  geom_point(size=1.75, shape=21, stroke=0.25, color="white")+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  guides(fill=guide_legend(override.aes = list(size=2)))+
  ylab("PC2")+
  xlab("PC1")+
  mytheme_1+
  coord_cartesian(clip = "off")+
  theme(legend.position = "top",
        legend.title = element_blank(),
        legend.spacing.x = unit("cm", x = 0.05),
        legend.box.margin = unit(c(0,0,-0.35,0), "cm"),
        plot.tag = element_text(vjust = -2.5),
        plot.margin = unit(c(0,0.25,0,-0.25), units = "cm"),
        plot.background = element_rect(fill = "transparent",
                                 colour = NA_character_),
        panel.border = element_rect(size=0.25),
        legend.key.height = unit("cm", x = 0.36),
        axis.title.x =  element_text(margin = unit(c(-1,0,0,0), units = "cm")),
        legend.key.width = unit("cm", x = 0.26))+
  labs(tag = "B")

pc1 <- 
  pca_seq$rotation %>% 
  data.frame %>% 
  select(PC1) %>% 
  rownames_to_column("IdentI") %>% 
  top_n(4, abs(PC1)) %>%
  arrange(-PC1)

p2 <- ggplot(pc1, aes(y=PC1, x=IdentI, fill=IdentI))+
  geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
  scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
                    labels=unlist(labels_cl))+
  scale_x_discrete(limits=pc1$IdentI, labels=labels_cl[pc1$IdentI] %>% unlist())+
  geom_hline(yintercept = 0, size=0.25)+
  scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC1")+
  mytheme_1+
  coord_cartesian(clip = "off")+
  theme(axis.title.x = element_blank(),
        plot.margin = unit(c(0.25,0,0,0), units = "cm"),
        axis.text = element_text(size=6.5),
        plot.tag = element_text(vjust = -2.5),
        axis.text.x = element_text(angle=45, hjust = 1))+
  labs(tag = "C")

pc2 <- pca_seq$rotation %>% 
  data.frame %>% 
  select(PC2) %>% 
  rownames_to_column("IdentI") %>% 
  top_n(4, abs(PC2)) %>% 
  arrange(PC2)

p3 <- ggplot(pc2, aes(y=-PC2, x=IdentI, fill=IdentI))+
  geom_bar(stat = "identity", width = 0.4, color=NA, alpha=0.5)+
  scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order),
                    labels=unlist(labels_cl))+
  scale_x_discrete(limits=pc2$IdentI, labels=labels_cl[pc2$IdentI] %>% unlist())+
  geom_hline(yintercept = 0, size=0.25)+
  scale_y_continuous(limits=c(-0.5, 0.5), breaks = c(-0.5, 0, 0.5), name = "PC2")+
  coord_cartesian(clip = "off")+
  mytheme_1+
  theme(axis.title.x = element_blank(),
        plot.margin = unit(c(0,0,0.25,0), units = "cm"),
        axis.text.x = element_text(angle=45, hjust = 1))+
  labs(tag = "D")

p1+(p2/p3)+plot_layout(widths = c(1,0.5))
#ggsave(width = 9.75, height = 7.5, units = "cm", filename = "Figure3_p2.pdf")

```

# Lasso prediction
## Dendrogram
```{r dendrogram, fig.width=3, fig.height=3}
  
# Create data frame
data <- data.frame(
  level1="all",
  level2=c("rLN", 
           "MZL",
           "MCL",
           "FL",
           "DLBCL")
)

edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2)
edge_list=rbind(edges_level1_2)
vert <- data.frame(
  name=unique(c(data$level1, data$level2))) %>% 
  mutate(label=c(NA, "rLN", "MZL", "MCL", "FL", "DLBCL"))

# Create graph object
mygraph_lasso <- graph_from_data_frame( edge_list ,vertices = vert)

# Plot dendrogramm
ggraph(mygraph_lasso, layout = 'tree', circular = FALSE)+ 
  geom_edge_diagonal(strength = 1.4, edge_width=0.25)+
  geom_node_point(shape=21, size=3.5, color="white", stroke=2, alpha=c(0,1,1,1,1,1),
                  fill=c(NA, brewer.pal(name = "Paired", 5)[c(5,4,3,2,1)]))+
  coord_flip(clip = "off")+
  scale_y_reverse()+
  theme_void()+
  theme(legend.position = "right",
        plot.margin = margin(0.25,0.25,0.25,0, unit = "cm"),  
        plot.title = element_text(hjust=0.4, size=7, face = "bold"),)

#ggsave(width = 3.5, height = 3.4, units = "cm", filename = "Figure3_p3.pdf")

```

## Model
```{r lasso model, fig.height=3, fig.width=3}

total <- mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  left_join(., df_meta %>% select(PatientID, Tcells)) %>% 
  add_entity() %>% 
  mutate_if(is.numeric, ~./100)

cell_types <- total %>% select(-Entity, -PatientID) %>% colnames()
gt <- my_glmnet(total)

```

## Confusion matrix
```{r lasso plot, fig.height=3, fig.width=3}

entities <- c("DLBCL", "MCL", "FL", "MZL", "rLN")

tbl <- gt$confusion_table
class(tbl) = "matrix"
tbl = tbl / rowSums(tbl) # convert to probability estimates
tbl = tbl[entities, entities]

tbl %>% data.frame() %>% 
  rownames_to_column("truth") %>% 
  pivot_longer(cols = 2:ncol(.), names_to = "predicted", values_to = "Prop") %>% 
  ggplot(aes(x=truth, y=predicted, fill=Prop))+
  geom_tile()+
  scale_x_discrete(limits=rev(entities), expand = c(0,0), position = "top")+
  scale_y_discrete(limits=entities, expand = c(0,0), name="Truth")+
  geom_hline(yintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
  geom_vline(xintercept = c(1.5,2.5,3.5,4.5),size=0.25, color="black")+
  scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Prop", limits=c(0,0.8))+
  xlab("Reference")+
  mytheme_1+
  coord_fixed()+
  theme(panel.border = element_blank(),
        legend.position = "right",
        legend.key.height = unit(0.3, "cm"),
        legend.key.width = unit(0.3, "cm"),
        legend.box.spacing = unit(0.1, "cm"),
        legend.box.margin = unit(c(0,-0.25,0,0.05), units = "cm"),
        plot.margin = unit(c(0.1,0.1,0.1,0.1), units = "cm"),
        axis.text.x = element_text(angle=45, hjust = 0),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

#ggsave(width = 5.5, height = 5.5, units = "cm", filename = "Figure3_p4.pdf")

```

# Patient characteristics
```{r Patient characteristics, fig.width=5.25}

df_char <- mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
  left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Entity, Age) %>% distinct, by="PatientID") %>% 
  mutate(Entity=factor(Entity, levels = c("DLBCL", "MCL", "FL", "MZL", "rLN")))

plot_status_ident1 <- 
  df_char %>% 
  filter(IdentI %in% c(1), Entity!="rLN") %>% 
  ggplot(aes(x=Status, y=Prop))+
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  ggtitle(labels_cl[["1"]])+
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
  mytheme_1+
  theme_characteristics+
  labs(tag = "F")

plot_age_ident1 <- 
  df_char %>% 
  filter(IdentI %in% c(1), Entity!="rLN") %>% 
  ggplot(aes(x=Age, y=Prop))+
  geom_point(size=1.25, color="grey65", shape=21, stroke=0, aes(fill=Entity))+
  geom_smooth(method = "lm", color="black", size=0.25, linetype="dashed", alpha=0.25,  formula = 'y ~ x')+
  stat_cor(size=2.5)+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
  ggtitle(labels_cl[["1"]])+
  mytheme_1+
  theme_characteristics+
  theme(axis.title.x = element_text(size=7, vjust = 4))+
  labs(tag = "G")

plot_status_ident9 <- 
  df_char %>% 
  filter(IdentI %in% c(9), Entity!="rLN") %>% 
  ggplot(aes(x=Status, y=Prop))+
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(28),
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  ggtitle(labels_cl[["9"]])+
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
  mytheme_1+
  theme_characteristics+
  labs(tag = "H")

plot_status_ident13 <- 
  df_char %>% 
  filter(IdentI %in% c(13), Entity!="rLN") %>% 
  ggplot(aes(x=Status, y=Prop))+
  geom_boxplot(width=0.35, size=0.25, aes(fill=Entity),  position = position_dodge(width = 0.6),
               outlier.shape = 21, outlier.size = 1, outlier.color = "white", outlier.alpha = 0.75)+
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), vjust = -0.35, label.y = c(29),
                     size=2.5, tip.length = 0.02, bracket.size = 0.25)+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  ggtitle(labels_cl[["13"]])+
  scale_y_continuous(name="% of total T-cells", expand = c(0.05,0.15), limits=c(0,35))+
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
  mytheme_1+
  theme_characteristics+
  labs(tag = "I")

plot_status_ident1+plot_age_ident1+plot_status_ident9+plot_status_ident13

#ggsave(width = 9.5, height = 9.65, units = "cm", filename = "Figure3_p5.pdf")

```

# Session info
```{r session info}

sessionInfo()

```