[9905a0]: / figures / SupplementaryFigures.Rmd

Download this file

2140 lines (1836 with data), 90.1 kB

---
title: "Supplementary Figures"
author: Tobias Roider
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y, %X')`"
output: 
  rmdformats::readthedown:
  
editor_options: 
  chunk_output_type: console
---

```{r options, include=FALSE, warning = FALSE}

library(knitr)
opts_chunk$set(echo=TRUE, tidy=FALSE, include=TRUE, message=FALSE, cache.lazy = FALSE,
               dpi = 100, cache = FALSE, warning = FALSE)
opts_knit$set(root.dir = "../")
options(bitmapType = "cairo")

```

# Read data, functions and packages
```{r read data}

source("R/ReadPackages.R")
source("R/Functions.R")
source("R/ReadData.R")
source("R/ThemesColors.R")
source("R/Helpers.R")

```

# Supplementary Figure 1
## Patient characteristics
```{r patient characteristics SF1 part 1}

p1 <- 
  df_meta %>% 
  ggplot(aes(x=Order, y=Tcells, fill=Entity))+
  geom_bar(stat = "identity", color="white", width=0.5, size=0.25)+
  geom_text(aes(x=6, y=84, label=paste0("n = ", nrow(df_meta))), check_overlap = T, size=2.75)+
  scale_y_continuous(name="% T-cells", limits=c(0, 90), expand = c(0.025, 0.025))+
  scale_x_discrete(limits=as.character(1:101))+
  scale_fill_manual(values = colors_characteristics)+
  mytheme_1+
  coord_cartesian(clip = "off")+
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=7),
        axis.ticks.x = element_blank(),
        plot.tag = element_text(margin = unit(c(0,0,0,-0.75), "cm")),
        plot.margin = unit(c(0.1,0.1,0,1), "lines"))

cex_ <- 0.6
pos_ <- -0.25
size_ <- 2.4

p_ann <- 
  ggplot()+
  geom_tile(data=df_meta, aes(y=0, x=Order, fill=Entity))+
  geom_tile(data=df_meta, aes(y=-1.75, x=Order, fill=Status))+
  geom_tile(data=df_meta, aes(y=-3.5, x=Order, fill=Sex))+
  geom_text(data=df_meta, aes(y=-5.25, x=Order, label=`CITEseq`), size=size_)+
  geom_text(data=df_meta, aes(y=-7, x=Order, label=`TCRseq`), size=size_)+
  geom_text(data=df_meta, aes(y=-8.75, x=Order, label=`MultiIF`), size=size_)+
  geom_text(data=df_meta, aes(y=-10.5, x=Order, label=`MultiFlow`), size=size_)+
  annotation_custom(grob=textGrob(label = "Entity", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = 0, ymax = 0)+
  annotation_custom(grob=textGrob(label = "Collection", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -1.75, ymax = -1.75)+
  annotation_custom(grob=textGrob(label = "Sex", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -3.5, ymax = -3.5)+
  annotation_custom(grob=textGrob(label = "CITE-seq", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -5.25, ymax = -5.25)+
  annotation_custom(grob=textGrob(label = "TCR-seq", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -7, ymax = -7)+
  annotation_custom(grob=textGrob(label = "Multi-IF", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -8.75, ymax = -8.75)+
  annotation_custom(grob=textGrob(label = "Multi-flow", gp = gpar(cex=cex_), just="right"), xmin = pos_, xmax = pos_, ymin = -10.5, ymax = -10.5)+
   scale_x_discrete(limits=as.character(1:101))+
  scale_y_continuous(expand = c(0,0), name=NULL)+
  geom_vline(xintercept = seq(0.5, nrow(df_meta), 1), size=1, color="white")+
  scale_fill_manual(values = colors_characteristics)+
  coord_cartesian(clip = "off")+
  theme_void()+
  theme(legend.position = "none",
        plot.margin = unit(c(0,0.1,0,0.1), "lines"))

plot_legend1 <- 
  ggplot()+
  geom_tile(data=df_meta, aes(y=4, x=Order, fill=Entity))+
  scale_x_discrete(expand = c(0,0), name=NULL)+
  scale_y_discrete(expand = c(0,0), name=NULL)+
  geom_vline(xintercept = seq(0.5, 51.5, 1), color="white")+
  scale_fill_manual(values = colors_characteristics, limits=df_meta$Entity %>% unique())+
  guides(fill=guide_legend(direction = "horizontal", keywidth = 0.3, keyheight = 0.4))+
  theme_void()+
  mytheme_1+
  theme(legend.position = "bottom",
        legend.title = element_text(size=7, face = "bold"),
        legend.text = element_text(size=7))

legend1 <- get_legend(plot_legend1)

plot_legend2 <-
  ggplot()+
  geom_tile(data=df_meta, aes(y=4, x=Order, fill=Sex))+
  scale_x_discrete(expand = c(0,0), name=NULL)+
  scale_y_discrete(expand = c(0,0), name=NULL)+
  geom_vline(xintercept = seq(0.5, 51.5, 1), color="white")+
  scale_fill_manual(values = colors_characteristics, limits=df_meta$Sex %>% unique())+
  guides(fill=guide_legend(direction = "horizontal", keywidth = 0.3, keyheight = 0.4))+
  theme_void()+
  mytheme_1+
  theme(legend.position = "bottom",
        legend.title = element_text(size=7, face = "bold"),
        legend.text = element_text(size=7))

legend2 <- get_legend(plot_legend2)

plot_legend3 <- 
  ggplot()+
  geom_tile(data=df_meta, aes(y=4, x=Order, fill=Status))+
  scale_x_discrete(expand = c(0,0), name=NULL)+
  scale_y_discrete(expand = c(0,0), name=NULL)+
  geom_vline(xintercept = seq(0.5, 51.5, 1), color="white")+
  scale_fill_manual(values = colors_characteristics, name="Collection",
                    limits=filter(df_meta, Status!="NA")$Status %>% unique())+
  guides(fill=guide_legend(direction = "horizontal", keywidth = 0.3, keyheight = 0.4))+
  theme_void()+
  mytheme_1+
  theme(legend.position = "bottom",
        legend.title = element_text(size=7, face = "bold"),
        legend.text = element_text(size=7))

legend3 <- get_legend(plot_legend3)

```

## Associations with overall T-cell frequencies
```{r patient characteristics SF1 part 2}

# Sex
p2 <- df_meta %>% 
  filter(!Entity %in% c("CLL", "rLN")) %>%
  mutate(Tcells=ifelse(is.na(Tcells), TcellsDx, Tcells)) %>% 
  filter(Tcells>1) %>% 
  ggplot(aes(x=Sex, y=Tcells))+
  geom_boxplot(width=0.35, size=0.25)+
  #ggbeeswarm::geom_beeswarm(size=0.8, shape=21, stroke=0.25, cex = 2.75, aes(fill=Entity))+
  stat_compare_means(comparisons = list(c("F", "M")), label.y = 78, size=2.5, bracket.size = 0.25)+
  scale_fill_manual(values = hue_pal()(5))+
  scale_x_discrete(labels=c("Female", "Male"))+
  ylim(0,90)+
  ylab("T-cells in %")+
  mytheme_1+
  theme(axis.title.x = element_blank(),
        plot.tag = element_text(margin = unit(c(0,0,0,-0.75), "cm")))+
  labs(tag = "B")

# Status
p3 <- df_meta %>% 
  filter(!Entity %in% c("CLL", "rLN")) %>%
  mutate(Tcells=ifelse(is.na(Tcells), TcellsDx, Tcells)) %>% 
  filter(Tcells>1) %>% 
  ggplot(aes(x=Status, y=Tcells))+
  geom_boxplot(width=0.35, size=0.25)+
  #ggbeeswarm::geom_beeswarm(size=0.8, shape=21, stroke=0.25, cex = 2.75, aes(fill=Entity))+
  stat_compare_means(comparisons = list(c("Initial diagnosis", "Relapse")), label.y = 78, bracket.size = 0.25, size=2.5)+
  scale_x_discrete(labels=c("Initial \ndiagnosis", "Relapse"))+
  scale_fill_manual(values = hue_pal()(5))+
  ylim(0,90)+
  ylab("T-cells in %")+
  mytheme_1+
  theme(axis.title.x = element_blank())+
  labs(tag = "C")

# Age
p5 <- df_meta %>% 
  filter(!Entity %in% c("CLL", "rLN")) %>%
  mutate(Tcells=ifelse(is.na(Tcells), TcellsDx, Tcells)) %>% 
  filter(Tcells>1) %>% 
  ggplot(aes(x=Age, y=Tcells))+
  geom_point(size=1.25, shape=21, stroke=0.25, color="white", aes(fill=Entity))+
  stat_cor(aes(label=..r.label..),  size=2.5, label.y = 82)+
  scale_fill_manual(values = colors_characteristics, name=NULL, limits=c("DLBCL", "FL", "MCL", "MZL"))+
  ylim(0,90)+
  ylab("T-cells in %")+
  mytheme_1+
  theme(legend.position = "right",
        axis.title.x = element_text(vjust = 6.5),
        legend.box.margin = unit(c(0,0,0,-0.38), "cm"),
        legend.key.width = unit("cm", x = 0.15),
        legend.key.height = unit("cm", x = 0.35))+
  labs(tag = "D")

# Entity
df_tmp <- df_meta %>% 
  filter(!Entity %in% c("CLL", "rLN")) %>%
  mutate(Tcells=ifelse(is.na(Tcells), TcellsDx, Tcells)) %>% 
  filter(Tcells>1)
  
p6 <- ggplot(df_meta, aes(x=Entity, y=Tcells))+
  geom_boxplot(width=0.4, size=0.25)+
  ggbeeswarm::geom_beeswarm(size=0.75, shape=21, stroke=0.25, cex = 2.25, aes(fill=Entity))+
  stat_compare_means(data=df_tmp %>% filter(!Entity %in% c("rLN")), 
                     label.y = 83, label.x = 2.5, size=2.5, hjust=0.5, bracket.size = 0.25)+
  scale_fill_manual(values = colors_characteristics, name=NULL, 
                    limits=c("DLBCL", "FL", "MCL", "MZL", "rLN"))+
  geom_segment(data = NULL, aes(x=1, xend=4, y=80, yend=80), size=0.1)+
  ylim(0,90)+
  ylab("T-cells in %")+
  mytheme_1+
  theme(axis.title.x = element_blank())+
  labs(tag = "E")

```

## Assemble plot
```{r assemble SF1, fig.height=5.5}

# Plot
wrap_plots(p1+labs(tag = "A")+p_ann+plot_layout(nrow = 2, heights = c(1, 0.5)))/
  plot_spacer()/
  wrap_plots(p2+p3+p5+p6+plot_layout(nrow = 1, widths = c(0.55,0.55,1,1)))+
  plot_layout(heights = c(1.25,0.075,0.7))

#ggsave(width = 18.5, height = 12, units = "cm", filename = "SF1.pdf")

```

## Legend
```{r legend SF1, fig.height=0.5}

# Legend
as_ggplot(legend1)+as_ggplot(legend2)+as_ggplot(legend3)+plot_layout(nrow = 1, widths = c(1, 0.5, 0.75))
ggsave(width = 13, height = 1, units = "cm", filename = "S1_p1_legend.pdf")

```

# Supplementary Figure 2
## Confusion Matrix
```{r, confusion SF2, fig.height=4.5}

confmat <- confusionMatrix(GBclasses_surface$test, GBclasses_surface$predict)

conf_freq <- confmat$table %>% 
  data.frame() %>% 
  group_by(Reference) %>% 
  mutate(Prop=Freq/sum(Freq)) 
  
plot_surface <- ggplot(conf_freq, aes(x=Reference, y=Prediction, fill=Prop, label=round(Prop, 2)))+
  geom_tile()+
  geom_text(data=conf_freq %>% filter(Prop>0.4), color="white", size=1.75)+
    scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Sensitivity",
                       limits=c(0,0.9), breaks=seq(0,0.8,0.2))+
  geom_hline(yintercept = seq(1.5, 13.5), color="black",linetype="solid", size=0.25, alpha=0.25)+
  geom_vline(xintercept = seq(1.5, 13.5), color="black",linetype="solid", size=0.25, alpha=0.25)+
  scale_y_discrete(expand = c(0,0), limits=factor(cluster_order), labels=unlist(labels_cl))+
  scale_x_discrete(expand = c(0,0), limits=factor(cluster_order), labels=unlist(labels_cl))+
  coord_fixed()+
  ggtitle("Surface marker only")+
  mytheme_1+
  theme(legend.position = "none",
        axis.text.x = element_text(angle=45, hjust = 1))+
  labs(tag = "A")

confmat_plus <- confusionMatrix(GBclasses_surfaceplus$test, GBclasses_surfaceplus$predict)

conf_freq <- confmat_plus$table %>% 
  data.frame() %>% 
  group_by(Reference) %>% 
  mutate(Prop=Freq/sum(Freq)) 
  
plot_surfaceplus <- ggplot(conf_freq, aes(x=Reference, y=Prediction, fill=Prop, label=round(Prop, 2)))+
  geom_tile()+
  geom_text(data=conf_freq %>% filter(Prop>0.4), color="white", size=1.75)+
  scale_fill_gradientn(colours = colorRampPalette(RColorBrewer::brewer.pal(9, "BuPu"))(100), name="Sensitivity",
                       limits=c(0,0.9), breaks=seq(0,0.8,0.2))+
  geom_hline(yintercept = seq(1.5, 13.5), color="black",linetype="solid", size=0.25, alpha=0.25)+
  geom_vline(xintercept = seq(1.5, 13.5), color="black",linetype="solid", size=0.25, alpha=0.25)+
  scale_y_discrete(expand = c(0,0), limits=factor(cluster_order), labels=unlist(labels_cl))+
  scale_x_discrete(expand = c(0,0), limits=factor(cluster_order), labels=unlist(labels_cl))+
  coord_fixed()+
  ggtitle("Surface plus FoxP3, IKZF3, and Ki67")+
  mytheme_1+
  theme(legend.position = "right",
        legend.key.height = unit(0.35, "cm"),
        legend.key.width = unit(0.28, "cm"),
        legend.margin = margin(c(0,0.2,0,0), unit = "cm"),
        axis.text.x = element_text(angle=45, hjust = 1))+
  labs(tag = "B")

plot_surface+plot_surfaceplus+plot_layout(guides = "collect", widths = c(1,1))

#ggsave(width = 19, height = 9, units = "cm", filename = "SF2.pdf")

```

# Supplementary Figure 3
## Frequencies overview
```{r freq overview SF3}

df_pop

mat_complete <- rbind(
  df_facs %>% 
    left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>% 
    filter(`CITEseq`=="-") %>% 
    select(PatientID, Population, Prop=FACS),
  df_freq %>% 
    mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>% 
    select(PatientID, Population, Prop=RNA)) %>% 
  filter(Population %in% df_pop$Population) %>% 
  left_join(., df_pop) %>% 
  select(-Population) %>% 
  left_join(., df_meta %>% select(PatientID, Tcells)) %>% 
  mutate(Prop=Tcells*Prop/100) %>% 
  select(-Tcells) %>% 
  pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>% 
  column_to_rownames("PatientID") 

cl_order <- c(6,1,2,9,8,13,15,11,12,16,3,5,14,19)
names(labels_cl_parsed) <- as.character(cluster_order)

df_freqPlot <- 
  mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
  add_entity() %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  mutate(IdentI=factor(IdentI, levels=cl_order)) %>% 
  mutate(Label=factor(IdentI, levels=cl_order, labels = labels_cl_parsed[as.character(cl_order)])) %>% 
  group_by(Entity, IdentI) %>% 
  mutate(outlier = (Prop > quantile(Prop, 0.75) + IQR(Prop) * 1.5) | (Prop < quantile(Prop, 0.25) - IQR(Prop) * 1.5))

df_medianLines <- df_freqPlot %>% 
  filter(Entity=="rLN") %>% 
  group_by(IdentI, Label) %>% 
  summarise(MedianProp=median(Prop))

df_freqPlot_pvalues <- df_freqPlot %>% 
  group_by(IdentI) %>% 
  wilcox_test(data=., formula = Prop ~ Entity, detailed = T, ref.group = "rLN") %>% 
  adjust_pvalue(method = "BH") %>% 
  select(IdentI, Entity=group2, p.adj, estimate) %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  mutate(p.adj=ifelse(p.adj>0.05, NA, p.adj)) %>% 
  mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>% 
  mutate(p.adj_f=case_when(p.adj > 0.05 ~ "NA",
                           p.adj==0.05 ~ "0.05",
                           p.adj < 0.05 & p.adj > 0.001 ~ as.character(round(p.adj, 3)),
                           p.adj==0.001 ~ "0.001",
                           p.adj < 0.001 ~ p.adj_s)) %>% 
  filter(!is.na(p.adj)) %>% 
  left_join(., df_freqPlot %>% select(IdentI,  Label) %>% distinct) %>% 
  left_join(., data.frame(IdentI=factor(cl_order), height=c(28, 28, 21, 21, 10, 10, 12, 12, 14, 14, 50, 50, 12, 12)))

```

## Plot
```{r freq overview, fig.height=6, fig.width=5.2}

p <- list()

for(i in c(1:7)){
  
  y <- list(c(1,6),c(2,9),c(8,13),c(15,11),c(12,16),c(3,5),c(14,19))[[i]]
  ylim <- c(38,30,13,15.5,20,65,15)
  
  p[[i]] <- 
    ggplot(data=df_freqPlot %>% filter(IdentI %in% y) %>% 
             mutate(Label=factor(Label, levels = labels_cl_parsed[as.character(y)])), 
             aes(y=Prop, x=Entity, fill=IdentI))+
    geom_hline(data=df_medianLines %>%filter(IdentI %in% y), aes(yintercept=MedianProp),
               size=0.25, linetype="dashed", color="grey60")+
    geom_boxplot(width=0.4, outlier.shape = 21, outlier.size = 1, outlier.color = "white", 
                 outlier.alpha = 0, show.legend = F, size=0.25)+
    ggbeeswarm::geom_beeswarm(data = function(x) dplyr::filter_(x, ~ outlier), cex = 3, stroke=0.25, 
                              groupOnX = TRUE, shape = 21, size = 1, color = "white", alpha = 1)+
    geom_text(data=df_freqPlot_pvalues %>% filter(IdentI %in% y), 
              inherit.aes = F, aes(y=height, x=Entity, label=p.adj_f), hjust=0.1, size=2.3, angle=45)+
    scale_fill_manual(values = colors_umap_cl)+
    scale_y_continuous(name="% of total cells", limits=c(0,ylim[i]))+
    scale_x_discrete(expand = c(0.17,0.17))+
    facet_wrap(~Label, ncol = 2, labeller = label_parsed)+
    mytheme_1+
    theme(axis.title.x = element_blank(),
          strip.background = element_rect(color=NA),
          plot.margin = unit(c(0.1,0.25,0,0), units = "cm"),
          panel.border = element_rect(size = 0.5),
          axis.text.x = element_text(angle=45, hjust = 1))
  
  if(i!=7){
    p[[i]] <- p[[i]]+
      theme(axis.text.x = element_blank(),
            axis.ticks.x = element_blank())
  }
  
  if(!i %in% c(4,5)){
    p[[i]] <- p[[i]]+
      theme(axis.title.y = element_blank())
    }
  
}

plot_freq <- wrap_plots(p, ncol = 2)
plot_freq
ggsave(width = 18.5, height = 15, units = "cm", filename = "SF3.pdf")

```


# Supplementary Figure 4
## Lasso model (beta coefficients)
```{r lasso model SF4}

mat_complete <- rbind(
  df_facs %>% 
    left_join(., df_meta %>% select(PatientID, `CITEseq`)) %>% 
    filter(`CITEseq`=="-") %>% 
    select(PatientID, Population, Prop=FACS),
  df_freq %>% 
    mutate(RNA=ifelse(is.nan(RNA), 0, RNA)) %>% 
    select(PatientID, Population, Prop=RNA)) %>% 
  filter(Population %in% df_pop$Population) %>% 
  left_join(., df_pop) %>% 
  select(-Population) %>% 
  pivot_wider(names_from = "IdentI", values_from = "Prop", values_fill = 0) %>% 
  column_to_rownames("PatientID") 

total <- mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  left_join(., df_meta %>% select(PatientID, Tcells)) %>% 
  add_entity() %>% 
  mutate_if(is.numeric, .funs = ~./100)

cell_types <- total %>% select(-Entity, -PatientID) %>% colnames()

gt <- my_glmnet(total)

limits_coefs=c(cluster_order, "Tcells", "(Intercept)")

plot_coefs <- gt$coefs %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  ggplot(aes(x=beta, y=cell_type,  fill=cell_type, color=cell_type))+
  geom_hline(yintercept = c(1.5, 5.5, 9.5, 10.5, 13.5)+2, linetype="solid", size=0.25, alpha=0.1)+
  geom_bar(stat = "identity", width = 0.5)+
  scale_y_discrete(limits=rev(limits_coefs), labels=c("Intercept", "T cells", rev(unlist(unname(labels_cl)))))+
  scale_color_manual(limits=rev(limits_coefs), values = c("black", "black", rev(unname(colors_umap_cl[as.character(cluster_order)]))))+
  scale_fill_manual(limits=rev(limits_coefs), values = c("black", "black", rev(unname(colors_umap_cl[as.character(cluster_order)]))))+
  facet_wrap(~Entity, ncol=5)+
  mytheme_1+
  theme(axis.title.y = element_blank(),
        panel.border = element_rect(size = 0.5))

```

## Multivariate Model
```{r multivariate model SF4, fig.height=7}

models <- list()
entities <- c("Overall", "FL", "MZL", "MCL")

for(i in colnames(mat_complete)){
  for(e in entities) {
  ent <- e
  if(e=="Overall"){ent <- c("FL", "MZL", "MCL", "DLBCL")}
    
models[[paste0(i, "_", e)]] <- mat_complete %>% 
  rownames_to_column("PatientID") %>% 
  pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
  left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Sex, Entity, Department, Age, Subtype, Tcells, `CITEseq`) %>% distinct, 
            by="PatientID") %>%
  filter(Entity %in% ent) %>% 
  filter(IdentI==i) %>% 
  glm(formula = Prop ~ Status + Age + Sex, 
      family = "gaussian") %>% summary()

models[[paste0(i, "_", e)]] <- models[[paste0(i, "_", e)]]$coefficients %>% 
  `colnames<-`(c("Estimate", "Std.error", "t.value", "p.value")) %>% 
  data.frame() %>% 
  dplyr::mutate(IdentI=as.character(i)) %>% 
  rownames_to_column("Parameter")

}
}

for(i in colnames(mat_complete)){
  ent <- "DLBCL"
  e <- "DLBCL"
  models[[paste0(i, "_", e)]] <- mat_complete %>% 
    rownames_to_column("PatientID") %>% 
    pivot_longer(cols=2:ncol(.), names_to = "IdentI", values_to = "Prop") %>% 
    left_join(., df_meta %>% select(PatientID, Status, Pretreatment, Sex, Entity, Department, Age, Subtype, Tcells, `CITEseq`) %>% distinct, 
              by="PatientID") %>%
    filter(Entity %in% ent) %>% 
    filter(IdentI==i) %>% 
    glm(formula = Prop ~ Status + Age + Sex + Subtype, 
        family = "gaussian") %>% summary()

models[[paste0(i, "_", e)]] <- models[[paste0(i, "_", e)]]$coefficients %>% 
  `colnames<-`(c("Estimate", "Std.error", "t.value", "p.value")) %>% 
  data.frame() %>% 
  dplyr::mutate(IdentI=as.character(i)) %>% 
  rownames_to_column("Parameter")

}

p.value <- bind_rows(models, .id = "model") %>% 
  filter(Parameter!="(Intercept)") %>% 
  mutate(Entity=strsplit(model, split = "_") %>% sapply(., "[[", 2)) %>% 
  group_by(IdentI, Entity) %>% 
  mutate(p.adj=p.adjust(p.value, method = "BH")) %>% 
  mutate(sign=ifelse(Estimate<0 & p.adj<0.05, "-", NA)) %>% 
  mutate(sign=ifelse(Estimate>0 & p.adj<0.05, "+", sign)) %>% 
  mutate(Entity=factor(Entity, levels = c("Overall", "DLBCL", "MCL", "FL", "MZL")))


plot_multi <- ggplot()+
  geom_point(data=p.value %>% filter(Entity=="Overall"), aes(x=IdentI, y=Parameter, size=-log10(p.adj), fill=t.value), shape=21, stroke=0.25,
             position = position_nudge(y=0.16))+
  geom_point(data=p.value %>% filter(Entity!="Overall"), aes(x=IdentI, y=Parameter, size=-log10(p.adj), fill=t.value, group=Entity), shape=21, stroke=0.25, 
             position = ggpp::position_dodgenudge(y = -0.14, width = 0.78))+
  geom_text(data=p.value %>% filter(Entity=="Overall", p.adj<0.05), aes(x=IdentI, y=Parameter, label=round(p.adj, 3)), hjust=0.5, nudge_y = 0.4, size=2.5)+
  geom_text(data=p.value %>% filter(Entity=="Overall", p.adj<0.05), aes(x=IdentI, y=Parameter, label=sign), hjust=0.5, nudge_y = 0.18, size=2.5)+
  geom_text(data=p.value %>% filter(Entity!="Overall"), aes(x=IdentI, y=Parameter, label=sign, group=Entity), size=2.5, 
             position = ggpp::position_dodgenudge(y = -0.11, width = 0.78))+
  scale_x_discrete(limits=factor(cluster_order), labels=unlist(labels_cl))+
  scale_size_continuous(range=c(0.25,3.5), limits=c(0,3), name=expression('-log'[10]~'p'))+
  scale_fill_gradientn(colors=brewer.pal(9, name = "RdBu"), limits=c(-5, 5), breaks=c(-3,0,3), name="Statistic")+
  guides(fill=guide_colorbar(barwidth = 2.5))+
  guides(size=guide_legend(keywidth = 0.3))+
  geom_hline(yintercept = seq(1,6,1), color="grey90", size=0.25)+
  geom_vline(xintercept = seq(1.5,13.5,1), color="grey90", size=0.25)+
  scale_y_discrete(limits=rev(c("StatusRelapse",  "Age", "SexM", "Subtypenon-GCB")),
                   labels=rev(c("<b style='color:#B2182B'>Initial diagnosis </b><br>vs. <b style='color:#2166AC'>Relapse</b>",
                                "<b style='color:#B2182B'>Younger </b>vs.<br><b style='color:#2166AC'>Older</b>",
                                "<b style='color:#B2182B'>Female </b>vs.<br><b style='color:#2166AC'>Male</b>",
                                "<b style='color:#B2182B'>GCB</b> vs.<br><b style='color:#2166AC'>non-GCB</b><br>[only DLBCL]")))+
  mytheme_1+
  theme(axis.title = element_blank(),
        legend.position = "top",
        plot.title = element_text(margin = unit(c(0,0,0,0), units = "cm")),
        axis.text.x = element_text(angle = 45, hjust = 1),
        panel.border = element_rect(size=0.5),
        legend.key.height = unit(x = 0.3, units = "cm"),
        legend.box.margin = unit(c(0,-8.5,-0.35,0), "cm"),
        plot.margin = unit(c(0,0.25,0,0.25), "cm"),
        legend.key.width = unit(x = 0.35, units = "cm"),
        axis.text.y = element_markdown(hjust = 1,  lineheight = 1.2))+
  labs(tag = "B")

plot_coefs+labs(tag = "A")+plot_multi+plot_layout(heights = c(1.2,1))

ggsave(width = 18, height = 15, units = "cm", filename = "SF4.pdf")

```

# Supplementary Figure 5
## Proportions of T-cell subsets in 5' scRNA versus CITE-seq
### Data handling
```{r data handling SF5}

df_freq_5prime <- DFtotal_5prime %>% 
  select(Barcode_full, IdentI, PatientID) %>% 
  distinct() %>% 
  add_prop(vars = c("PatientID", "IdentI"), group.vars = 1) %>% 
  mutate(Class="5prime") %>% 
  filter(PatientID %in% unique(df_comb$PatientID))

df_freq_3prime <- df_comb %>% 
  add_prop(vars = c( "PatientID", "IdentI"), group.vars = 1) %>% 
  mutate(Class="3prime") %>% 
  filter(PatientID %in% DFtotal_5prime$PatientID)

df_freq_prime <- rbind(df_freq_3prime, df_freq_5prime) %>% 
  mutate(Prop=100*Prop) %>% 
  pivot_wider(names_from = "Class", values_from = "Prop", values_fill = 0) 

```

### Correlation plots
```{r cor plots SF5}

this_theme <- theme(plot.margin = unit(c(0,0.25,0.1,0), units = "cm"),
  plot.title = element_text(vjust = -1),
  axis.title = element_blank(),
  axis.text = element_text(size=7, color="black"))

cor_plots_prime <- list()

cor_plots_prime[["TFH"]] <- 
  df_freq_prime %>% 
  filter(IdentI==6) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["6"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["6"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,60), breaks = c(0, 20, 40, 60))+
  scale_y_continuous(limits = c(0,60), breaks = c(0, 20, 40, 60))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["6"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme

cor_plots_prime[["TPR"]] <- df_freq_prime %>% 
   filter(IdentI==14) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["14"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["14"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,12), breaks = c(0, 4, 8, 12))+
  scale_y_continuous(limits = c(0,12), breaks = c(0, 4, 8, 12))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["14"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme+
  theme(axis.title.y = element_text(size=7))

cor_plots_prime[["TDN"]] <- df_freq_prime %>% 
  filter(IdentI==19) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["19"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["19"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,8), breaks = c(0, 2, 4, 6, 8))+
  scale_y_continuous(limits = c(0,8), breaks = c(0, 2, 4, 6, 8))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["19"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3

cor_plots_prime[["THCM2"]] <- df_freq_prime %>% 
  filter(IdentI==9) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["9"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["9"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,15), breaks = c(0, 5, 10, 15))+
  scale_y_continuous(limits = c(0,15), breaks = c(0, 5, 10, 15))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["9"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme

cor_plots_prime[["THCM1"]] <- df_freq_prime %>% 
  filter(IdentI==2) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["2"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["2"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  scale_y_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["2"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme


cor_plots_prime[["THNaive"]] <- df_freq_prime %>% 
  filter(IdentI==1) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["1"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["1"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,45), breaks = c(0, 15, 30, 45))+
  scale_y_continuous(limits = c(0,45), breaks = c(0, 15, 30, 45))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["1"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme

cor_plots_prime[["TREGCM1"]] <- df_freq_prime %>% 
  filter(IdentI==8) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["8"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["8"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["8"]])+
    scale_x_continuous(limits = c(0,25), breaks = c(0, 8, 16, 24))+
  scale_y_continuous(limits = c(0,25), breaks = c(0, 8, 16, 24))+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme

cor_plots_prime[["TREGEM2"]] <- df_freq_prime %>% 
  filter(IdentI==11) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["11"]],size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["11"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,16), breaks = c(0, 5, 10, 15))+
  scale_y_continuous(limits = c(0,16), breaks = c(0, 5, 10, 15))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["11"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3

cor_plots_prime[["TREGEM1"]] <- df_freq_prime %>% 
  filter(IdentI==15) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["15"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["15"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  scale_y_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["15"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme+
  theme(axis.title.y = element_text(size=7),
        axis.title.x = element_text(size=7))

cor_plots_prime[["TREGCM2"]] <- df_freq_prime %>% 
  filter(IdentI==13) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["13"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["13"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  scale_y_continuous(limits = c(0,20), breaks = c(0, 6, 12, 18))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["13"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  this_theme

cor_plots_prime[["TTOXNaive"]] <-df_freq_prime %>% 
  filter(IdentI==12) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["12"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["12"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,27), breaks = c(0, 8, 16, 24))+
  scale_y_continuous(limits = c(0,27), breaks = c(0, 8, 16, 24))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["12"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3
 
cor_plots_prime[["TTOXEM3"]] <- df_freq_prime %>% 
  filter(IdentI==5) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color="#b50923", size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color="#b50923", se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,80), breaks = c(0, 25, 50, 75))+
  scale_y_continuous(limits = c(0,80), breaks = c(0, 25, 50, 75))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["5"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3

cor_plots_prime[["TTOXEM1"]] <- df_freq_prime %>% 
  filter(IdentI==3) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color="#fea044", size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x,na.rm = T,
              color="#fea044", se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,50), breaks = c(0, 15, 30, 45))+
  scale_y_continuous(limits = c(0,50), breaks = c(0, 15, 30, 45))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["3"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3

cor_plots_prime[["TTOXEM2"]] <- df_freq_prime %>% 
  filter(IdentI==16) %>% 
  ggplot(aes(y=`5prime`, x=`3prime`))+
  geom_point(color=colors_umap_cl[["16"]], size=0.65, alpha=0.75)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_umap_cl[["16"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson",size=2.5, label.x.npc = c(0.45), label.y.npc = c(0.9))+
  scale_x_continuous(limits = c(0,15), breaks = c(0, 4, 8, 12))+
  scale_y_continuous(limits = c(0,15), breaks = c(0, 4, 8, 12))+
  labs(x="3' scRNA - %", y="5' scRNA - %")+
  ggtitle(labels_cl[["16"]])+
  theme_bw()+
  coord_fixed()+
  mytheme_1+
  theme_axis_sub3

cor_plots_prime_all <- cor_plots_prime$TPR+labs(tag ="A")+
  cor_plots_prime$THNaive+cor_plots_prime$THCM1+cor_plots_prime$THCM2+cor_plots_prime$TFH+
  cor_plots_prime$TREGCM1+cor_plots_prime$TREGCM2+cor_plots_prime$TREGEM1+cor_plots_prime$TREGEM2+
  cor_plots_prime$TTOXNaive+cor_plots_prime$TTOXEM1+cor_plots_prime$TTOXEM2+cor_plots_prime$TTOXEM3+
  cor_plots_prime$TDN+
  plot_layout(nrow = 2)

df_freq_prime %>% 
  group_by(IdentI) %>% 
  summarise(R=cor.test(`3prime`, `5prime`)$estimate) %>% pull(R) %>% median()

```

## CD4 and CD8 expression in clonal T-cells
```{r CD4 and CD8 expression in SF5}

lim_CD4 <- 0.3
lim_CD8 <- 0.6

df_clone_expr <- lapply(sobjs_T_5prime, function(sobj){
  FetchData(sobj, vars=c("CD4", "CD8A")) %>% 
              data.frame() %>% 
              rownames_to_column("Barcode_full")
  }) %>% bind_rows() %>% remove_rownames()

df_clone_CD4 <- DFtotal_5prime %>% 
  select(Barcode_full, PatientID, refUMAP_1, refUMAP_2, Entity, IdentI, raw_clonotype_id) %>% 
  distinct() %>% 
  add_count(IdentI, PatientID, raw_clonotype_id) %>% 
  left_join(., df_clone_expr) %>% 
  mutate(IdentI_new=factor(IdentI, levels = cluster_order, labels = labels_cl_parsed)) %>% 
  filter(IdentI %in% c(3,5)) %>% 
  mutate(Expanded=n>2) %>% 
  mutate(CD4CD8=case_when(CD4 > lim_CD4 & CD8A < lim_CD8 ~ "CD4pos",
                          CD4 < lim_CD4 & CD8A > lim_CD8 ~ "CD8pos",
                          CD4 > lim_CD4 & CD8A > lim_CD8 ~ "CD4CD8pos",
                          CD4 < lim_CD4 & CD8A < lim_CD8 ~ "CD4CD8neg"))
  
df_clone_CD4_freq <- df_clone_CD4 %>% 
  drop_na() %>% 
  add_prop(vars = c("IdentI", "Expanded", "CD4CD8"), group.vars = c(1,2)) %>% 
  fill_zeros(names_from = "IdentI", values_from = "Prop") %>% 
  mutate(Prop=round(Prop, 2)) %>% 
  left_join(., data.frame(CD4CD8=c("CD4CD8neg", "CD4pos", "CD4CD8pos",  "CD8pos"), 
                          CD8=c(-0.35, -0.35, 3.7, 3.7),
                          CD4=c(-0.35, 1.9, 1.9, -0.35))) %>% 
    mutate(IdentI_new=factor(IdentI, levels = cluster_order, labels = labels_cl_parsed)) 
  
plot_expr1 <- ggplot()+
  geom_point(data=df_clone_CD4 %>% filter(n<3), inherit.aes = F, aes(x=CD4, y=CD8A, color=IdentI), 
             position = position_jitter(width = 0.1, height = 0.1),  na.rm = T,
             size=0.25, alpha=0.25, stroke=0)+
  scale_color_manual(values = colors_umap_cl, guide="none")+
  scale_fill_manual(values = colors_umap_cl, guide="none")+
  geom_text(data=df_clone_CD4_freq %>% filter(Expanded==F), inherit.aes = F, aes(x=CD4, y=CD8, label=Prop), size=2.5, alpha=1)+
  geom_hline(yintercept = 0.6, linetype="dashed", size=0.25)+
  geom_vline(xintercept = 0.3, linetype="dashed", size=0.25)+
  scale_x_continuous(breaks = c(0,0.5,1,1.5,2), limits=c(-0.5, 2.4))+
  scale_y_continuous(breaks = c(0,1,2,3), limits=c(-0.5, 4))+
  labs(x="<i>CD4</i> - RNA expression",
       y="<i>CD8</i> - RNA expression",
       title="Clone size < 3",
       tag = "B")+
  facet_wrap(~IdentI_new, nrow = 1, labeller = label_parsed)+
  mytheme_1+
  theme(axis.title.x = element_textbox(size=7, halign = 0.5, margin = unit(units = "cm", c(0.1,0,0,0))),
        axis.title.y = element_textbox(size=7, orientation = "left-rotated", margin = unit(units = "cm", c(0,0,0.1,0))),
        panel.border = element_rect(size=0.4))

plot_expr2 <- ggplot()+
  geom_point(data=df_clone_CD4 %>% filter(n>2), inherit.aes = F, aes(x=CD4, y=CD8A, color=IdentI),
             position =  position_jitter(width = 0.1, height = 0.1), na.rm = T,
             size=0.25, alpha=0.25, stroke=0)+
  scale_color_manual(values = colors_umap_cl, guide="none")+
  scale_fill_manual(values = colors_umap_cl, guide="none")+
  scale_size_continuous(range=c(1, 5), limits=c(3, 50), breaks=c(3, 20, 35, 50),
                        labels=c("3", "20", "35", "> 50"), name = "Clonotype size")+
  geom_hline(yintercept = 0.6, linetype="dashed", size=0.25)+
  geom_vline(xintercept = 0.3, linetype="dashed", size=0.25)+
  geom_text(data=df_clone_CD4_freq %>% filter(Expanded==T), inherit.aes = F, aes(x=CD4, y=CD8, label=Prop), size=2.5, alpha=1)+
  facet_wrap(~IdentI_new, nrow = 1, labeller = label_parsed)+
  scale_x_continuous(breaks = c(0,0.5,1,1.5,2), limits=c(-0.5, 2.4))+
  scale_y_continuous(breaks = c(0,1,2,3), limits=c(-0.5, 4))+
  labs(x="<i>CD4</i> - RNA expression",
       y="<i>CD8</i> - RNA expression",
       title="Clone size > 2",
       tag = "C")+
  facet_wrap(~IdentI_new, nrow = 1, labeller = label_parsed)+
  mytheme_1+
  theme(axis.title.x = element_textbox(size=7, halign = 0.5, margin = unit(units = "cm", c(0.1,0,0,0))),
        axis.title.y = element_textbox(size=7, orientation = "left-rotated", margin = unit(units = "cm", c(0,0,0.1,0))),
        panel.border = element_rect(size=0.4))

```

## Assemble plot
```{r assemble SF5}

cor_plots_prime_all/wrap_plots(plot_expr1+plot_spacer()+plot_expr2+plot_layout(widths = c(1,0.1,1)))+
  plot_layout(heights = c(1.6,1))

#ggsave(width = 18.5, height = 12.5, units = "cm", filename = "SF5.pdf")

```

## TCR diversity
### Read
```{r TCR diversity read}

# Single cell T-cell receptor data read by immunarch package
# RNA–seq, epitope and TCR raw and processed data have been deposited in the Gene Expression Omnibus (GEO) under accession codes GSE252608 and GSE252455.
DF_immunarchTCR <- repLoad(list.files(path = "countMatrices", pattern = "TCRrep", full.names = T))
DF_immunarchTCR$meta$Sample <- strsplit(DF_immunarchTCR$meta$Sample, split = "_") %>% sapply("[[", 1)
names(DF_immunarchTCR$data) <- DF_immunarchTCR$meta$Sample

```

### Plot
```{r TCR diversity plot, fig.height=3.5}

plots <- list()
for(i in unique(DFtotal_5prime$PatientID)){
set.seed(substr(i, 4,7) %>% as.numeric()+5)
plots[[i]] <- 
  DFtotal_5prime %>% filter(PatientID==i) %>% 
  select(Barcode_full, PatientID, raw_clonotype_id) %>% 
  distinct() %>% 
  dplyr::count(raw_clonotype_id) %>% 
  drop_na() %>% 
  mutate(Prop=n/sum(n)) %>% 
  dplyr::arrange(-n) %>% 
  mutate(Cumsum=cumsum(n)) %>% 
  mutate(Max=sum(n)) %>% 
  filter(Cumsum < 0.1*Max) %>% 
  mutate(new_id=as.character(1:nrow(.))) %>% 
  mutate(PatientID=i) %>% 
  add_entity() %>% 
  mutate(PatientID_new=paste0(PatientID, " (", Entity, ")")) %>% 
  ggplot(aes(x=new_id, y=n))+
  geom_segment(aes(x=new_id, xend=new_id, y=0, yend=n), size=0.2)+
  facet_wrap(~PatientID_new)+
  xlab("Clonotype ID")+
  scale_y_continuous(name =  "Clonotype size")+
  scale_x_discrete(limits=sample(as.character(1:195), 195), name="Unique clonotype ID")+
  mytheme_1+
  theme(legend.position = "none",
        panel.border = element_rect(size=0.4),
        strip.background = element_rect(colour = NA),
        axis.title.x = element_text(vjust = 5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

}

imm_raref <- repDiversity(DF_immunarchTCR$data, "raref", .verbose = F) %>% 
  rename(PatientID=Sample) %>% 
  add_entity() %>% 
  filter(!PatientID %in% c("LN0256", "LN0367"))

tops <- imm_raref %>% 
  group_by(PatientID) %>% 
  top_n(n=1, Size) %>% 
  #mutate(Mean=ifelse(PatientID=="LN0302", 5.0, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0132", 88, Size)) %>% 
  mutate(Size=ifelse(PatientID=="LN0110", 72, Size)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0110", 19, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0259", 90, Size)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0259", 20.75, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0264", 55, Size)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0264", 13.5, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0217", Size-10, Size)) %>% 
  mutate(Size=ifelse(PatientID=="LN0144", 145, Size)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0193", Mean+1, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0198", 115, Size)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0198", Mean+0.2, Mean)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0278", Mean+1, Mean)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0144", Mean+0.75, Mean)) %>% 
  mutate(Mean=ifelse(PatientID=="LN0302", Mean-1.5, Mean)) %>% 
  mutate(Size=ifelse(PatientID=="LN0302", 50, Size)) %>% 
  filter(!PatientID %in% c("LN0417", "LN0104", "LN0249")) # Manuel labelling in Powerpoint

plot_rare <- 
  imm_raref %>%
  ggplot(aes(x=Size, y=Mean, group=PatientID, color=Entity))+
  geom_line(linetype="solid", size=0.25)+
  geom_label(data=tops, aes(x=Size, y=Mean, label=PatientID),  size=2.25, 
                            show.legend = F, fill="white", color="white", 
                            label.padding = unit(units = "cm", 0.02), label.size = 0)+
  geom_text(data=tops, aes(x=Size, y=Mean, label=PatientID), size=2, show.legend = F)+
  guides(color=guide_legend(override.aes = list(size=0.35, linetype="solid")))+
  scale_color_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  ylab("Estimated diversity")+
  xlab("Clone size")+
  #xlim(0, 70)+
  mytheme_1+
  theme(#legend.position = "top",
        legend.title = element_blank(),
        panel.grid = element_blank(),
        legend.spacing.x = unit("cm", x = 0.05),
        legend.box.margin = unit(c(0,0,-0.95,0), "cm"),
        panel.border = element_rect(size=0.4),
        legend.key.height = unit("cm", x = 0.36),
        legend.key.width = unit("cm", x = 0.5))+
  labs(tag = "D")

plot_rare+wrap_plots((plots$LN0132+theme(axis.title.x = element_blank())+labs(tag = "E"))/plots$LN0217+labs(tag = "F"))+
  plot_layout(widths = c(1,1.7))
  
#ggsave(width = 18, height = 8, units = "cm", filename = "SF5.pdf")

```

# Supplementary Figure 6
## T-cell exhaustion UMAP
### Calculate exhaustion module
```{r calculate exhaustion module}

exhausted_cells <- ttox@meta.data %>%
  mutate(Exhausted=ifelse( Pseudotime>=24, "yes", "no")) %>% 
  filter(Exhausted=="yes") %>% rownames()

Combined_T@meta.data$Exhausted <- Combined_T@meta.data %>% 
  mutate(Exhausted=Barcode_full %in% exhausted_cells) %>% 
  pull(Exhausted)

Idents(Combined_T) <- "Exhausted"

module_exhausted <- FindMarkers(Combined_T, ident.1 = "TRUE", assay = "integratedRNA", test.use = "roc") %>% 
  rownames_to_column("Gene") %>% 
  mutate(Module=paste0(Gene, ifelse(myAUC>0.5, "+", "-")),
         Assay="RNA")

module_exhausted_prot <- FindMarkers(Combined_T, ident.1 = "TRUE", assay = "integratedADT", test.use = "roc") %>% 
  rownames_to_column("Gene") %>% 
  mutate(Module=paste0(Gene, ifelse(myAUC>0.5, "+", "-")),
         Assay="Protein")

#module_exhausted --> Supplementary Table 5
#WriteXLS::WriteXLS(rbind(module_exhausted, module_exhausted_prot), ExcelFileName = "SuppTable5.xlsx")

module_exhausted <- list(module_exhausted$Module)
names(module_exhausted) <- "exhausted"

Combined_T <- UCell::AddModuleScore_UCell(Combined_T, features = module_exhausted)

```

### Plot exhaustion module
```{r plot exhaustion module}

set.seed(1)
plot_exh <- FetchData(Combined_T, vars = c("wnnUMAP_1", "wnnUMAP_2", "exhausted_UCell")) %>% 
  sample_frac(0.2) %>% 
  ggplot(aes(x=wnnUMAP_1, y=wnnUMAP_2, fill= exhausted_UCell))+
  ggrastr::geom_point_rast(size=0.25, stroke=0, shape=21, raster.dpi = 600, alpha=0.75)+
  scale_fill_gradientn(colours = brewer.pal(n = 9, name = "YlOrRd")[2:9],
                       name="Score")+
  xlab("wnnUMAP-1")+
  ylab("wnnUMAP-2")+
  ggtitle("Exhaustion signature")+
  mytheme_1+
  theme(panel.border = element_rect(size = 0.2),
        axis.title.x = element_text(margin = unit(units = "cm", c(-0.75,0,0,0))),
        #legend.margin = margin(c(0,0,0,-0.35), unit = "cm"),
        legend.box.margin = unit(c(0,0,0,-0.35), units = "cm"),
        legend.title = element_text(size=6),
        legend.text = element_text(size=6),
        legend.position = "right",
        plot.title = element_text(face = "plain", vjust = -0.5),
        panel.background = element_rect(fill=NA),
        legend.key.height = unit(units="cm", 0.2),
        legend.key.width = unit(units="cm", 0.15),
        legend.box.background = element_rect(fill=NA, color=NA),
        legend.background = element_rect(fill=NA, color=NA)
        )+
  labs(tag="A")

```

## Association with cell-of-origin in DLBCL
```{r SF6 part 1}

### Schmitz
p1 <- left_join(df_surv_schmitz, df_ttoxcompl_schmitz) %>% 
  drop_na() %>% 
  ggplot(aes(x=Subtype, y=Exhausted/Absolute, group=Subtype))+
  geom_boxplot(outlier.alpha = 0, width=0.4, size=0.25)+
  stat_compare_means(size=2.25, vjust = 1, aes(label=paste0("p = ", ..p.format..)))+
  stat_compare_means(comparisons = list(c("ABC", "GCB")), size=2.25)+
  scale_y_continuous(limits=c(0, 0.65), name = "Exhausted T-cells")+
  #ggtitle("Schmitz et al. 2018")+
  xlab("Cell-of-origin")+
  mytheme_1+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        #axis.title.x = element_text(margin = unit(units = "cm", c(-0.1,0,0,0))),
        plot.title = element_text(face = "plain", vjust = -0.5))+
  labs(tag = "B")

### Chapuy
p2 <- left_join(df_surv_chapuy, df_ttoxcompl_chapuy) %>% 
  drop_na() %>% 
  ggplot(aes(x=Subtype, y=Exhausted/Absolute, group=Subtype))+
  geom_boxplot(outlier.alpha = 0, width=0.4, size=0.25)+
  stat_compare_means(size=2.25, vjust = 1, aes(label=paste0("p = ", ..p.format..)))+
  scale_y_continuous(limits=c(0, 0.65), name="Exhausted T-cells")+
  #ggtitle("Chapuy et al. 2018")+
  xlab("Cell-of-origin")+
  mytheme_1+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        #axis.title.x = element_text(margin = unit(units = "cm", c(-0.1,0,0,0))),
        plot.title = element_text(face = "plain", vjust = -0.5))+
  labs(tag = "C")

```

## Association with genetic Subtype
```{r SF6 part 2}

### Schmitz
p3 <- left_join(df_surv_schmitz, df_ttoxcompl_schmitz) %>% 
  drop_na() %>% 
  ggplot(aes(x=GenSubtype, y=Exhausted/Absolute, group=GenSubtype))+
  geom_boxplot(outlier.alpha = 0, width=0.4, size=0.25)+
  stat_compare_means(size=2.25, vjust = 1, aes(label=paste0("p = ", ..p.format..)))+
  scale_y_continuous(limits = c(0, 0.65), name="Exhausted T-cells")+
  #ggtitle("Schmitz et al. 2018")+
  xlab("Cluster")+
  mytheme_1+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(face = "plain", vjust = -0.5),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        #axis.title.x = element_text(margin = unit(units = "cm", c(-0.1,0,0,0)))
        )

### Chapuy
p4 <- left_join(df_surv_chapuy, df_ttoxcompl_chapuy) %>% 
  drop_na() %>% 
  mutate(Cluster=paste0("C", Cluster)) %>% 
  ggplot(aes(x=Cluster, y=Exhausted/Absolute, group=Cluster))+
  geom_boxplot(outlier.alpha = 0, width=0.4, size=0.25)+
  stat_compare_means(size=2.25, vjust = 1, aes(label=paste0("p = ", ..p.format..)))+
  scale_y_continuous(limits=c(0, 0.65), name="Exhausted T-cells")+
  #ggtitle("Chapuy et al. 2018")+
  xlab("Cluster")+
  mytheme_1+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(face = "plain", vjust = -0.5),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        #axis.title.x = element_text(margin = unit(units = "cm", c(-0.65,0,0,0)))
        )

```

## Assemble plot
### Part 1
```{r assemble plot SF6 part 1, fig.height=2.5}

plot_exh+p1+p3+p2+p4+plot_layout(nrow = 1, widths = c(1.4,0.75,1,0.75,1))

#ggsave(width = 19, height = 6, units = "cm", filename = "SF6_p1.pdf")

```


## Associations with genetic features
### Mutations
```{r mutations SF6}

pairs <- list(c(1:42), c(43:85))

df_mut <- df_snvs_chapuy %>% 
  filter(Description=="Mutation", value %in% c(0,2)) %>% 
  mutate(value=factor(value, levels = c("0", "2"), labels = c("wt", "mut"))) %>% 
  group_by(Name, value) %>%
  dplyr::summarise(MeanExhausted=mean(Exhausted/Absolute)) %>% 
  mutate(MeanExhausted=ifelse(MeanExhausted>0.15, 0.15, MeanExhausted),
         Group=case_when(Name %in% unique(.$Name)[pairs[[1]]] ~ "group1",
                         Name %in% unique(.$Name)[pairs[[2]]] ~ "group2")) 

p.values <- df_snvs_chapuy %>% 
  filter(Description=="Mutation", value %in% c(0,2)) %>% 
  mutate(Exhausted=Exhausted/Absolute) %>% 
  compare_means(formula = Exhausted ~ value, group.by = "Name", p.adjust.method = "BH") %>% 
  filter(p<0.05) %>% 
  left_join(., df_mut %>% select(Name, Group), by="Name")

p5 <- df_mut %>% 
  ggplot(aes(x=value, y=Name, fill=MeanExhausted))+
  geom_tile()+
  scale_fill_gradientn(name="Expression", colours = brewer.pal(5, "PRGn"), limits=c(0,0.17))+
  geom_vline(xintercept = 1.5, color="black", size=0.25)+
  geom_segment(data=data.frame(y=seq(1.5, 42.5,1)), inherit.aes = F, 
               aes(y = y, yend = y, x=0.05, xend=2.8), color="white", size=0.25)+
  geom_text(data = p.values, inherit.aes = F, aes(y=Name, x=2.8, label=round(p, 2)), size=2.5)+
  facet_wrap(~Group, ncol=3, scales = "free_y")+
  ggtitle("Mutations")+
  coord_cartesian(clip = "off")+
  mytheme_1+
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_text(angle=45, hjust=1, size=7),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=7, margin = unit(c(0,-0.4,0,0), units = "cm")),
        plot.margin = unit(units = "cm", c(0,1.25,0,0)),
        axis.title = element_blank(),
        panel.border = element_blank())+
  labs(tag = "E")

```

### Copy number gain
```{r copy number SF6}

df_gain <- df_snvs_chapuy %>% 
  filter(Description=="CN gain") %>% 
  mutate(value=factor(value, levels = c("0", "1", "2"), labels = c("wt", "gain", "gain"))) %>% 
  group_by(Name, value) %>%
  dplyr::summarise(MeanExhausted=mean(Exhausted/Absolute)) 

p.values <- df_snvs_chapuy %>% 
  filter(Description=="CN gain") %>% 
  mutate(Exhausted=Exhausted/Absolute) %>% 
  mutate(value=factor(value, levels = c("0", "1", "2"), labels = c("wt", "gain", "gain"))) %>% 
  compare_means(formula = Exhausted ~ value, group.by = "Name", p.adjust.method = "BH") %>% 
  filter(p<0.05)

p6 <- df_gain %>% 
  ggplot(aes(x=value, y=Name, fill=MeanExhausted))+
  geom_tile()+
  scale_fill_gradientn(name="Expression", colours = brewer.pal(5, "PRGn"), limits=c(0,0.17))+
  geom_vline(xintercept = 1.5, color="black", size=0.25)+
  geom_segment(data=data.frame(y=seq(1.5, 31.5,1)), inherit.aes = F, 
               aes(y = y, yend = y, x=0.25, xend=2.8), color="white", size=0.25)+
  geom_text(data = p.values, inherit.aes = F, aes(y=Name, x=2.8, label=round(p, 2)), size=2.5)+
  coord_cartesian(clip = "off")+
  ggtitle("CN gain")+
  mytheme_1+
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_text(angle=45, hjust=1, size=7),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=7, margin = unit(c(0,-0.15,0,0), units = "cm")),
        plot.margin = unit(units = "cm", c(0,1.25,0,0)),
        axis.title = element_blank(),
        panel.border = element_blank())+
  labs(tag = "F")

```

### Copy number loss
```{r copy number loss SF6}

df_loss <- df_snvs_chapuy %>% 
  filter(Description=="CN loss") %>% 
  mutate(value=factor(value, levels = c("0", "1", "2"), labels = c("wt", "loss", "loss"))) %>% 
  group_by(Name, value) %>%
  dplyr::summarise(MeanExhausted=mean(Exhausted/Absolute)) 

p.values <- df_snvs_chapuy %>% 
  filter(Description=="CN loss") %>% 
  mutate(Exhausted=Exhausted/Absolute) %>% 
  mutate(value=factor(value, levels = c("0", "1", "2"), labels = c("wt", "loss", "loss"))) %>% 
  compare_means(formula = Exhausted ~ value, group.by = "Name", p.adjust.method = "BH") %>% 
  filter(p<0.05) %>% 
  mutate(p=ifelse(p<0.005, 0.0051, p))

p7 <- df_loss %>% 
  ggplot(aes(x=value, y=Name, fill=MeanExhausted))+
  geom_tile()+
  scale_fill_gradientn(name="Expression", colours = brewer.pal(5, "PRGn"), limits=c(0,0.17))+
  geom_vline(xintercept = 1.5, color="black", size=0.25)+
   geom_segment(data=data.frame(y=seq(1.5, 32.5,1)), inherit.aes = F, 
               aes(y = y, yend = y, x=0.25, xend=2.8), color="white", size=0.25)+
  geom_text(data = p.values, inherit.aes = F, aes(y=Name, x=2.8, label=round(p, 2)), size=2.5)+
  ggtitle("CN loss")+
    coord_cartesian(clip = "off")+
  mytheme_1+
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_text(angle=45, hjust=1, size=7),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=7, margin = unit(c(0,-0.15,0,0), units = "cm")),
        axis.title = element_blank(),
        panel.border = element_blank())+
  labs(tag = "G")

```

### Structural variants
```{r structural variants SF6}

df_struct <- df_snvs_chapuy %>% 
  filter(Description=="SV") %>% 
  mutate(value=factor(value, levels = c("0", "3"), labels = c("wt", "mut")))%>% 
   group_by(Name, value) %>%
  dplyr::summarise(MeanExhausted=mean(Exhausted/Absolute)) 

p.values <- df_snvs_chapuy %>% 
  filter(Description=="SV") %>% 
  mutate(Exhausted=Exhausted/Absolute) %>% 
   mutate(value=factor(value, levels = c("0", "3"), labels = c("wt", "mut"))) %>% 
  compare_means(formula = Exhausted ~ value, group.by = "Name", p.adjust.method = "BH") %>% 
  filter(p<0.05) 

p8 <- df_struct %>% 
  ggplot(aes(x=value, y=Name, fill=MeanExhausted))+
  geom_tile()+
  scale_fill_gradientn(name="Expression", colours = brewer.pal(5, "PRGn"), limits=c(0,0.17))+
  geom_vline(xintercept = 1.5, color="black", size=0.25)+
  geom_segment(data=data.frame(y=seq(1.5, 7.5,1)), inherit.aes = F, 
               aes(y = y, yend = y, x=0.25, xend=2.75), color="white", size=0.25)+
  ggtitle("Structural variants")+
  coord_cartesian(clip = "off")+
  mytheme_1+
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        axis.text.x = element_text(angle=45, hjust=1, size=7),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=7, margin = unit(c(0,-0.15,0,0), units = "cm")),
        plot.margin = unit(units = "cm", c(0,1.25,0,0)),
        axis.title = element_blank(),
        panel.border = element_blank())+
  labs(tag = "H")

```


### Part 2
```{r assemble plot SF6 part 2, fig.height=7}

p5+wrap_plots(p6/p8+plot_layout(heights = c(2,0.5)))+(p7/plot_spacer()+plot_layout(heights = c(2,0.5)))+
  plot_layout(nrow = 1, widths = c(2.9,1,1))

#ggsave(width = 18, height = 14, units = "cm", filename = "SF6_p2.pdf")

```

### Part 3 (Legend)
```{r assemble plot SF6 part 3, fig.height=1}

as_ggplot(get_legend(p8+guides(fill=guide_colorbar(nrow = 2, title = "Exhausted\nT-cells"))+
                       theme(legend.position = "right",
                             legend.key.height = unit(units = "cm", 0.3),
                             legend.key.width = unit(units = "cm", 0.3))))

ggsave(width = 2, height = 2.4, units = "cm", filename = "SF6_legend.pdf")

```

# Supplementary Figure 7
Panel A was generated using FlowJo. 

## Flow cytometry: IKZF3
```{r, fig.height=3}

med <- df_ikzf3 %>% filter(Entity=="rLN") %>% pull(`FoxP3+/IKZF3+`) %>% median()

pvalues <- df_ikzf3 %>% rename(IKZF3=`FoxP3+/IKZF3+`) %>% 
  data.frame() %>% 
  compare_means(data=., formula = IKZF3 ~ Entity, ref.group = "rLN") %>% 
  select(Entity=group2, p) %>% 
  filter(p<0.05) %>% 
  mutate(p=round(p,3))

nrow(df_ikzf3)

plot_aiolos <- df_ikzf3 %>% 
  ggplot(aes(x=Entity,y=`FoxP3+/IKZF3+`))+
  geom_hline(yintercept = med, size=0.25, linetype="dashed", color="grey60")+
  geom_boxplot(width=0.5, outlier.alpha = 0, size=0.25)+
  ggbeeswarm::geom_beeswarm(size=0.75, shape=21, stroke=0.25, cex = 2.25, aes(fill=Entity))+
  geom_text(inherit.aes = F, data = pvalues %>% mutate(Y=c(75, 75)),
            aes(x=Entity, y=Y, label=p), size=2.5, check_overlap = T)+
  scale_fill_brewer(palette = "Paired", limits=c("DLBCL", "MCL", "FL", "MZL", "rLN"))+
  scale_y_continuous(limits = c(0,80), name=expression('% IKZF3'^'+'~'of FoxP3'^'+'))+
  scale_x_discrete(limits=c("rLN", "DLBCL", "MCL", "FL", "MZL"))+
  ggtitle("Flow cytometry")+
  mytheme_1+
  theme(legend.position = "none",
        strip.background = element_rect(color=NA),
        axis.title.x = element_blank(),
        plot.title = element_text(face = "plain", size=7),
        panel.border = element_rect(size=0.5),
        axis.text.x = element_text(angle=45, hjust = 1, size=7),
        axis.text.y = element_text(size=7),
        axis.title.y = element_text(size=7),
        panel.background = element_rect(fill=NA),
        plot.margin = unit(c(0,0.25,0,0.25), "cm"))

plot_spacer()+plot_aiolos+plot_layout(widths = c(3,1))

ggsave(width = 19, height = 5.7, units = "cm", filename = "SF7.pdf")

```

## Treg clonotypes
```{r treg clonotypes SF7, fig.height=3}

df_clonotypes_shared <- 
  left_join(DFtotal_5prime %>% filter(!is.na(raw_clonotype_id)) %>% 
            select(Barcode_fulla=Barcode_full, PatientID, refUMAP_1a=refUMAP_1, refUMAP_2a=refUMAP_2, IdentIa=IdentI, raw_clonotype_id) %>% distinct(),
          DFtotal_5prime %>% filter(!is.na(raw_clonotype_id)) %>% 
            select(Barcode_fullb=Barcode_full, PatientID, refUMAP_1b=refUMAP_1, refUMAP_2b=refUMAP_2, IdentIb=IdentI, raw_clonotype_id) %>% distinct()
          ) %>% 
  filter(Barcode_fulla!=Barcode_fullb) %>% 
  filter(IdentIa!=IdentIb)

treg_shared <- list()
for(i in c(8,13,15)){

df_subset <- 
  df_clonotypes_shared %>% 
  add_entity() %>% 
  filter(IdentIb==i) 

treg_shared[[i]] <- ggplot()+
  geom_point_rast(data=DFtotal_5prime,
                  aes(x=refUMAP_1, y=refUMAP_2, fill=IdentI), size=0.25, 
                  alpha=ifelse(DFtotal_5prime$IdentI==i, 0.4, 0.04), stroke=0, shape=21)+
  geom_curve(data= df_subset, 
             aes(x=refUMAP_1a, y=refUMAP_2a, xend=refUMAP_1b, yend=refUMAP_2b, color=IdentIa,
                 group=paste(raw_clonotype_id, PatientID)), curvature = -0.4, size=0.15, alpha=0.4)+
  scale_color_manual(values = colors_umap_cl, limits=factor(cluster_order),
                    labels=unlist(labels_cl), guide="none")+
  scale_fill_manual(values = colors_umap_cl, limits=factor(cluster_order), guide="none",
                    labels=unlist(labels_cl))+
  guides(fill=guide_legend(nrow = 7, byrow = F, override.aes = list(size=1.75, stroke=0, shape=21, alpha=1, color="white")))+
  coord_cartesian(clip = "off")+
  xlab("refUMAP-1")+
  ylab("refUMAP-2")+
  mytheme_1+
  theme(legend.position = "right",
        panel.border = element_rect(size=0.25),
        plot.title = element_textbox_simple(size=7, width = NULL, face = "plain",
                                            padding = margin(1.25, 0, 1, 0),
                                            lineheight = 1.25,
                                            halign=0.5),
        legend.text = element_text(size=7),
        legend.spacing.x = unit("cm", x = 0.13),
        axis.title.x = element_text(size=7),
        axis.title.y = element_text(size=7),
        axis.text = element_text(size=7),
        legend.spacing.y = unit("cm", x = 0.001),
        legend.key.width = unit("cm", x = 0.05),
        legend.key.height = unit("cm", x = 0.5),
        legend.box.margin = margin(unit = "cm",c(0,-0.35,0,-1)),
        legend.title = element_blank())

if(i==8)
  treg_shared[[i]] <- treg_shared[[i]]+
  labs(title="Paired clonotypes of <span style='color:#C6DBEF'>T<sub>REG</sub> CM<sub>1</sub></span>",
       tag = "C")
  
if(i==13)
 treg_shared[[i]] <- treg_shared[[i]]+
 labs(title="Paired clonotypes of <span style='color:#6BAED6'>T<sub>REG</sub> CM<sub>2</sub></span>")+
  theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())

if(i==15)
 treg_shared[[i]] <- treg_shared[[i]]+
 labs(title="Paired clonotypes of <span style='color:#2171B5'>T<sub>REG</sub> EM<sub>1</sub></span>")+
    theme(axis.ticks.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())
  
}

plot_treg <- treg_shared[[8]]+treg_shared[[13]]+treg_shared[[15]]+plot_layout(guides = "collect")
#plot_treg
```

## Survival analysis
```{r survival plots}

kmplot_tfh <- readRDS("data/SurvPlot_Tfh_Gallium.rds")
kmplot_tfh$plot$plot_env$legend <- c(0.32, 0.2)
kmplot_tfh$plot$theme$legend.position <- c(0.32, 0.2)
kmplot_tfh$plot$theme$legend.background$fill <- NA
kmplot_tfh$plot$theme$legend.text$colour <- NA
kmplot_tfh$plot$theme$legend.text$size <- 7
kmplot_tfh$plot <- kmplot_tfh$plot+annotation_custom(grob = textGrob(label = expression('T'[FH]~'High'), gp = gpar(cex=0.5), x=0.36, y=0.215))+
  annotation_custom(grob = textGrob(label = expression('T'[FH]~'Low'), gp = gpar(cex=0.5), x=0.36, y=0.125))+
  labs(tag = "D")

kmplot_treg <- readRDS("data/SurvPlot_TregEff2_Gallium.rds")
kmplot_treg$plot$plot_env$legend <- c(0.32, 0.2)
kmplot_treg$plot$theme$legend.position <- c(0.32, 0.2)
kmplot_treg$plot$theme$legend.background$fill <- NA
kmplot_treg$plot$theme$legend.text$colour <- NA
kmplot_tfh$plot$theme$legend.text$size <- 7
kmplot_treg$plot <- kmplot_treg$plot+annotation_custom(grob = textGrob(label = expression('T'[REG]~'EM'[2]~'High'), gp = gpar(cex=0.5), x=0.36, y=0.215))+
  annotation_custom(grob = textGrob(label = expression('T'[REG]~'EM'[2]~'Low'), gp = gpar(cex=0.5), x=0.36, y=0.125))+
  labs(tag = "E")


```

## Assemble plot
```{r}

plot_treg/
  wrap_plots(kmplot_tfh$plot+kmplot_treg$plot+plot_spacer())+
  plot_layout(heights = c(1.2,1))

#ggsave(width = 18, height = 11, units = "cm", filename = "SF7.pdf")

```

# Supplementary Figure 8
## Dendrogram T-cells
```{r dendrogram}

# Create data frame
data <- data.frame(
  level1="_Tcells",
  level2=c("_'T'[Pr]",
           rep("_'T'[H]",2),  
           "_'T'[FH]", 
           rep("_'T'[REG]",1),  
           rep("_'T'[TOX]",3)),
  level3=c("_'T'[Pr]", 
           "TH_'CD4'^'+'*' Naive'",
           "TH_'non-Naive (CM'[1]*' + CM'[2]*')'", 
           "_'T'[FH]",  
           "_'T'[REG]",
           "TTOX_'CD8'^'+'*' Naive'",
           "TTOX_'non-Naive (EM'[1]*' + EM'[2]*')'", 
           "TTOX_'   Exhausted (EM'[3]*')'")
)

# Data handling
edges_level1_2 <- data %>% select(level1, level2) %>% unique %>% rename(from=level1, to=level2)
edges_level2_3 <- data %>% select(level2, level3) %>% unique %>% rename(from=level2, to=level3)
edge_list=rbind(edges_level1_2, edges_level2_3)
vert <- data.frame(
  name=unique(c(data$level1, data$level2, data$level3))) %>% 
  mutate(cluster=as.character(c(NA, 14, 'TH', 6, 'TREG', "TTOX", 1, 2, 12, 3, 5))) %>% 
  mutate(label=strsplit(name, split = "_") %>% sapply(., "[[", 2))

# Make ggraph object
mygraph_codex <- graph_from_data_frame( edge_list ,vertices = vert)

# Small codex dendrogramm
ggraph(mygraph_codex, layout = 'tree', circular = FALSE) + 
  geom_edge_diagonal(strength = 1.4, edge_width=0.25)+
  geom_node_label(aes(label=label), 
                  parse = T, nudge_y=0.11, label.padding =  unit(units = "cm", 0.12),
                  size=2.35, alpha=1, 
                  fill=c(rep("white", 4), "black", rep("white", 6)), vjust=1, color=NA,
                  label.size = 0, label.r = unit(units = "cm", 0))+
  geom_node_text(aes(label=label, color=cluster), 
                 vjust=1, nudge_y=0.05, 
                 parse = T, 
                 alpha=c(0,rep(1,5),rep(0,5)),
                 size=2.35)+
  scale_color_manual(values = colors_dendrogramm_codex)+
  coord_cartesian(clip = "off")+
  ggtitle("T-cell subsets \nidentified by mIF")+
  theme_void()+
  theme(legend.position = "none")+
  theme(plot.margin = unit(c(-0.5,0.35,0.25,0.25), units = "cm"),
        plot.title = element_text(hjust = 0.4, vjust=-7.5, size=7))

#ggsave(filename = "Figure8_p2.pdf", width = 12.6, height = 4.25, units = "cm")

```

## Handle data
```{r handle data SF8}
  
# Read CODEX expression data
# Available at BioStudies database (https://www.ebi.ac.uk/biostudies/) under accession number S-BIAD565
codex_expression <- data.table::fread("data/cells_expression.csv") %>% tibble() %>% 
  rename(unique_cell_id=V1)

proteins_selected <- c("PAX5", "CD20", "CD79a", "CD21", "PDPN", "CD38", "MCT",  "GRZB", "CD56", "CD163", "CD206", "CD11c",
                       "CD15", "CD34", "CD31", "CD90", "Ki67", "PD1", "CXCR5", "ICOS", "CD69", "CD45RO", "TIM3", "LAG3",
                       "CD57", "CD8", "CD45RA", "FOXP3", "CD4",  "CD3")

codex_meanExp <- codex_expression %>% 
  left_join(., codex_annotation %>% select(unique_cell_id, Merged_final)) %>% 
  filter(!is.na(Merged_final)) %>% 
  select(-unique_cell_id) %>% 
  group_by(Merged_final) %>% 
  summarise_all(mean) %>% 
  pivot_longer(cols = 2:ncol(.), names_to = "Protein", values_to = "Expression") %>% 
  group_by(Protein) %>% 
  mutate(Expression=(Expression-min(Expression))/(max(Expression)-min(Expression))) %>% 
  filter(Protein %in% proteins_selected)

```

## Makers and cell types
```{r plot markers SF8}

p2 <- codex_meanExp %>% 
  ggplot(aes(x=Protein, y=Merged_final, fill=Expression))+
  geom_tile()+
  scale_fill_gradientn(name="Expression", colours = brewer.pal(5, "GnBu"), limits=c(0,1), breaks=c(0,0.5,1))+
  geom_hline(yintercept = seq(1.5, 17.5, 1), size=0.25, color="white")+
  geom_vline(xintercept = seq(1.5, 32.5, 1), size=0.25, color="white")+
  scale_x_discrete( expand = c(0,0), limits=c(proteins_selected))+
  scale_y_discrete(expand = c(0,0), limits=c("B", "FDC", "PC", "MC", "NK", "NKT",  "Macro", "DC", "Granulo", "Stromal cells",
                                             "TPR", "TFH",  "TTOX_exh", "TTOX", "TTOXNaive",   "Treg",  "CD4T", "CD4TNaive"),
                   labels=unlist(list("B-cells", "FDC", "Plasma cells", "Mast cells", "NK cells", "NK T-cells", "Macrophages", "Dendritic cells", "Granulocytes",
                                      "Stromal cells", labels_codex$TPR, labels_codex$TFH, labels_codex$TTOX_exh, labels_codex$TTOX, labels_codex$TTOXNaive, 
                                      labels_codex$Treg, labels_codex$CD4T, labels_codex$CD4TNaive)))+
  guides(fill=guide_colorbar(ticks.colour = "black"))+
  theme_bw()+
  mytheme_1+
   theme(axis.title = element_blank(),
         legend.position = "top",
         axis.text.y = element_text(size=7),
         legend.text = element_text(size = 7, color="black"),
         legend.title = element_text(size = 7, color="black", vjust = 0.8, margin = unit(units = "cm", c(0,0.2,0,0))),
         legend.key.height = unit(0.25, "cm"),
         plot.margin = unit(c(0,0,0,0), units = "cm"),
         legend.key.width = unit(0.2, "cm"),
         legend.box.spacing = unit(0.1, "cm"),
         legend.box.margin = unit(c(0,0,0,0), units = "cm"),
         plot.title = element_text(face = "plain", vjust = -1),
         plot.tag = element_text(margin = unit(c(0,-0.5,-0.25,0), units = "cm")),
         axis.text.x = element_text(size=6.5, angle = 45, hjust = 1))+
  labs(tag = "C")

```

## T-cell numbers in codex
```{r T-cell numbers SF8}

df_codex_no <- 
  codex_annotation %>% 
  filter(Merged_final %in% c("TTOXNaive", "TTOX_exh", "TTOX", "Treg", "TPR", "TFH", "CD4T", "CDT4Naive")) %>% 
  count(PatientID, Merged_final, unique_region) %>% 
  group_by(PatientID) %>% 
  mutate(Sum=sum(n)) %>% 
  ungroup() %>% 
  mutate(No=dense_rank(desc(Sum)))

regions_random <- df_codex_no %>% 
  select(PatientID, unique_region) %>% 
  distinct() %>% 
  group_by(PatientID) %>% 
  sample_n(1)

p3 <- ggplot()+
  geom_hline(yintercept = 47280, size=0.25, linetype="dashed")+
  geom_bar(data=df_codex_no %>% filter(unique_region %in% regions_random$unique_region), 
           aes(x=No-0.15, y=n, fill=Merged_final), color="white",
           stat = "identity",  width=0.2, size=0.25, alpha=0.7)+
  geom_bar(data=df_codex_no %>% filter(!unique_region %in% regions_random$unique_region), 
           aes(x=No+0.15, y=n, fill=Merged_final), color="white",
           stat = "identity",  width=0.25, size=0.25, alpha=0.7)+
  scale_y_continuous(name = "Absolute number of cells - mIF", limits = c(0, 100000))+
 scale_fill_manual(values = colors_codex[c(2:9)],
                   limits=limits_codex[c(2:9)],
                   labels=labels_codex[c(2:9)],
                   name=NULL)+
  guides(fill=guide_legend(nrow = 2, default.unit = "cm", override.aes = list(color="white"), 
                           keywidth = 0.3, keyheight = 0.3, byrow = T))+
  scale_x_continuous(name="Patients", 
                     breaks=unique(df_codex_no$No), 
                     labels=unique(df_codex_no$PatientID), 
                     expand = c(0.02,0.02))+
  mytheme_1+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.margin = unit(c(0,0,0,0), units = "cm"),
        legend.box.margin = unit(c(0,0,-0.3,0), units = "cm"),
        axis.title.y = element_text(size=7, vjust = -15),
        axis.title.x = element_blank(),
        legend.spacing.y = unit(0.15, units = "cm"),
        plot.tag = element_text(margin = unit(c(0,-0.5,0,0), units = "cm")),
        legend.position = "top")+
  labs(tag = "D")

```

## Correlations
```{r correlations SF8}

freq_codex <- 
  codex_annotation %>% 
  filter(Merged_final %in% c("TTOXNaive", "TTOX_exh", "TTOX", "Treg", "TPR", "TFH", "CD4T", "CD4TNaive")) %>% 
  filter(!unique_region %in% c("191_1reg004", "191_4reg004", "191_4reg005", "191_1reg006")) %>% 
  add_prop(vars = c("Merged_final", "PatientID"), group.vars = 2) %>% 
  rename(Prop_codex=Prop, IdentI=Merged_final) 

freq_citeseq <- 
  Combined_T@meta.data %>% 
  add_prop(vars = c("IdentI", "PatientID"), group.vars = 2) %>% 
  mutate(IdentII=case_when(IdentI==1 ~ "CD4TNaive",
                           IdentI %in% c(2,9) ~ "CD4T",
                           IdentI==14 ~ "TPR",
                           IdentI==6 ~ "TFH",
                           IdentI %in% c(8,11,13,15) ~ "Treg",
                           IdentI==12 ~ "TTOXNaive",
                           IdentI %in% c(3,16) ~ "TTOX",
                           IdentI %in% c(5) ~ "TTOX_exh")) %>% 
  group_by(IdentII, PatientID) %>% 
  summarise(Prop=sum(Prop)) %>% 
  rename(Prop_citeseq=Prop, IdentI=IdentII) %>% 
  fill_zeros(names_from = "IdentI", values_from = "Prop_citeseq")

freq_joined <- left_join(freq_codex, freq_citeseq) %>% 
  mutate(Prop_codex=100*Prop_codex, Prop_citeseq=100*Prop_citeseq)

this_theme <- 
  theme_bw()+
  mytheme_1+
  theme(plot.margin = unit(c(0,0.2,0.2,0.35), units = "cm"),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        plot.title = element_textbox_simple(face = "plain", halign=0.5, width = 2, padding = margin(0, 0, 0, 0)),
        plot.tag = element_text(margin = unit(c(0,-0.5,0,0), units = "cm")),
        axis.text = element_text(size=7, color="black"))

cor_plots_codex <- list()

cor_plots_codex[["TFH"]] <- 
  freq_joined %>% 
  filter(IdentI=="TFH") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["TFH"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["TFH"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,40), breaks = c(0, 12, 24, 36), name = "mIF")+
  scale_y_continuous(limits = c(0,40), breaks = c(0, 12, 24, 36), name = "CITE-seq")+
  labs(title="T<sub>FH</sub>")+
  coord_fixed()+
  this_theme+
  theme(plot.margin = unit(c(0,0,0.2,0), units = "cm"))

cor_plots_codex[["TREG"]] <- 
  freq_joined %>% 
  filter(IdentI=="Treg") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["Treg"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color="black", se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,56), breaks = c(0, 16, 32, 48), name = "mIF")+
  scale_y_continuous(limits = c(0,56), breaks = c(0, 16, 32, 48), name = "CITE-seq")+
  labs(title="T<sub>REG</sub>")+
  coord_fixed()+
  this_theme+
  theme(axis.title.y = element_text(size=7, angle = 90, vjust = 2.5))

cor_plots_codex[["TTOXNaive"]] <- 
  freq_joined %>% 
  filter(IdentI=="TTOXNaive") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["TTOXNaive"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["TTOXNaive"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,13), breaks = c(0, 4, 8, 12), name = "mIF")+
  scale_y_continuous(limits = c(0,13), breaks = c(0, 4, 8, 12), name = "CITE-seq")+
  labs(title="CD8<sup>+</sup> Naive")+
  coord_fixed()+
  this_theme+
  theme(plot.margin = unit(c(0,0,0.2,0), units = "cm"))

cor_plots_codex[["THNaive"]] <- 
  freq_joined %>% 
  filter(IdentI=="CD4TNaive") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["CD4TNaive"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["CD4TNaive"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.53), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,30), breaks = c(0, 10, 20, 30), name = "mIF")+
  scale_y_continuous(limits = c(0,30), breaks = c(0, 10, 20, 30), name = "CITE-seq")+
  labs(title="CD4<sup>+</sup> Naive")+
  coord_fixed()+
  this_theme+
  theme(plot.title = element_textbox_simple(face = "plain", halign=0.5, margin = unit(units = "cm", c(0,0,-1.75,0)), 
                                            width = 2, padding = margin(0, 0, 0, 0)),
        plot.margin = unit(c(0,0,0.2,0), units = "cm"))

cor_plots_codex[["TPR"]] <- 
  freq_joined %>% 
  filter(IdentI=="TPR") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["TPR"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["TPR"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.93))+
  scale_x_continuous(limits = c(0,13), breaks = c(0, 4, 8, 12), name = "mIF")+
  scale_y_continuous(limits = c(0,13), breaks = c(0, 4, 8, 12), name = "CITE-seq")+
  labs(title="T<sub>Pr</sub>")+
  coord_fixed()+
  this_theme+
  theme(axis.title.y = element_text(size=7, angle = 90, vjust = 2.5),
        plot.title = element_textbox_simple(face = "plain", halign=0.5, margin = unit(units = "cm", c(0,0,-1.75,0)), 
                                            width = 2, padding = margin(0, 0, 0, 0)),
        plot.tag = element_text(margin = unit(c(0,0,-0.25,0), units = "cm")))

cor_plots_codex[["TH"]] <- 
  freq_joined %>% 
  filter(IdentI=="CD4T") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["CD4T"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["CD4T"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.93))+
  scale_x_continuous(limits = c(0,60), breaks = c(0, 20, 40, 60), name = "mIF")+
  scale_y_continuous(limits = c(0,60), breaks = c(0, 20, 40, 60), name = "CITE-seq")+
  labs(title="Memory T<sub>H</sub>")+
  coord_fixed()+
  this_theme+
  theme(axis.title.y = element_text(size=7, angle = 90, vjust = 2.5))

cor_plots_codex[["TTOX"]] <- 
  freq_joined %>% 
  filter(IdentI=="TTOX") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["TTOX"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["TTOX"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,45), breaks = c(0, 15, 30, 45), name = "mIF")+
  scale_y_continuous(limits = c(0,45), breaks = c(0, 15, 30, 45), name = "CITE-seq")+
  labs(title="Memory T<sub>TOX</sub>")+
  coord_fixed()+
  this_theme+
  theme(axis.title.y = element_text(size=7, angle = 90, vjust = 2.5),
        axis.title.x = element_text(size=7))

cor_plots_codex[["TTOX_exh"]] <- 
  freq_joined %>% 
  filter(IdentI=="TTOX_exh") %>% 
  ggplot(aes(x=Prop_codex, y=Prop_citeseq))+
  geom_point(fill=colors_codex[["TTOX_exh"]], size=1, alpha=0.75, shape=21, stroke=0.1)+
  geom_smooth(method = "lm", linetype="dashed", size=0.25, formula = y ~ x, na.rm = T,
              color=colors_codex[["TTOX_exh"]], se=F, fullrange=T)+
  stat_cor(aes(label=..r.label..), method = "pearson", size=2.5, label.x.npc = c(0.5), label.y.npc = c(0.1))+
  scale_x_continuous(limits = c(0,65), breaks = c(0, 20, 40, 60), name = "mIF")+
  scale_y_continuous(limits = c(0,65), breaks = c(0, 20, 40, 60), name = "CITE-seq")+
  labs(title="PD1<sup>+</sup> TIM3<sup>+</sup> T<sub>TOX</sub>")+
  coord_fixed()+
  this_theme+
  theme(axis.title.x = element_text(size=7),
        plot.margin = unit(c(0,0,0.2,0), units = "cm"))

freq_joined %>% 
  group_by(IdentI) %>% 
  summarise(R=cor.test(Prop_codex, Prop_citeseq)$estimate) %>% pull(R) %>% median()

```

## Assemble plot
```{r assemble SF9, fig.height=5.5}

p_full <- wrap_plots(p2/p3+plot_layout(heights = c(1.25,1)))+wrap_plots(cor_plots_codex$TPR+labs(tag = "E")+cor_plots_codex$THNaive+cor_plots_codex$TH+cor_plots_codex$TFH+
  cor_plots_codex$TREG+cor_plots_codex$TTOXNaive+cor_plots_codex$TTOX+cor_plots_codex$TTOX_exh+
  plot_layout(ncol = 2))+
  plot_layout(widths = c(2.25,1.15))
p_full
#ggsave(p_full, width = 18, height = 14, units = "cm", filename = "SF8.pdf")


```

# Supplementary Figure 9
## Immunofluorescence images
```{r images SF9, fig.height=3}

plots_codex <- list()

for(r in c("191_3reg008", "191_4reg004", "191_2reg007", "191_5reg002", "191_1reg003", "empty")) {
  
  df_tmp <- codex_annotation %>% filter(unique_region== r) %>% 
    mutate(Merged_all_simple=ifelse(Merged_final %in% c("Granulo", "Macro", "DC"), "Myeloid", Merged_final)) %>% 
    mutate(Merged_all_simple=ifelse(Merged_all_simple %in% c("MC", "NKT", "PC", "NK"), "Other", Merged_all_simple)) %>% 
    filter(((x-mean(.$x))^2+(y-mean(.$y))^2)<2500^2) 
  
  plots_codex[[r]] <- ggplot()+
    geom_point_rast(data=df_tmp %>% filter(Merged_all_simple=="B"), aes(x=x,y=y), shape=21, size=0.25, stroke=0, alpha=1, raster.dpi =300,
                    color=colors_codex[["B"]], fill=colors_codex[["B"]])+
    geom_point_rast(data=df_tmp %>% filter(Merged_all_simple!="B"), aes(x=x,y=y, fill=Merged_all_simple, color=Merged_all_simple), 
                    shape=21, size=0.25, stroke=0, alpha=1, raster.dpi=300)+
    scale_color_manual(values = colors_codex, limits=limits_codex, labels=labels_codex, name="Cell type")+
    scale_fill_manual(values = colors_codex, limits=limits_codex, labels=labels_codex, name="Cell type")+
    ggtitle(unique(df_tmp$Entity))+
    coord_fixed()+
    theme_void()+
    theme(legend.position = "none",
          plot.title = element_text(color="white", hjust=0.1, size=8, 
                                    margin = unit(units = "cm", c(0,0,-0.6,0)), face = "bold"),
          plot.margin = unit(units = "cm", c(0.1, 0.1, 0.1, 0.1)),
          panel.background = element_rect(fill = "black", color="black"))
}

plots_codex

#ggsave(wrap_plots(plots_codex), width = 19, height = 12.5, units = "cm", filename = "SFigure9_p2.pdf")

legend_plot_codex <- ggplot()+
  geom_point_rast(data=df_tmp %>% filter(Merged_all_simple!="B"), aes(x=x,y=y, fill=Merged_all_simple, color=Merged_all_simple), 
                  shape=21, size=0.25, stroke=0, alpha=0, raster.dpi=300)+
  scale_color_manual(values = colors_codex, limits=limits_codex, labels=labels_codex, name="Cell type")+
  scale_fill_manual(values = colors_codex, limits=limits_codex, labels=labels_codex, name="Cell type")+
  guides(fill=guide_legend(ncol = 2, override.aes = list(size=1.75, stroke=0, shape=21, alpha=1, color=NA)))+
  guides(color=guide_legend(ncol = 2))+
  ggtitle("")+
  coord_fixed()+
  mytheme_codex+
  theme(panel.background = element_rect(fill = "black", color="black"),
        legend.position = "right",
        legend.text = element_text(size=6, color="white"),
        legend.box.background = element_rect(fill = "black", color="black"),
        legend.spacing.x = unit("cm", x = 0.13),
        legend.spacing.y = unit("cm", x = 0.001),
        legend.key.width = unit("cm", x = 0.05),
        legend.key.height = unit("cm", x = 0.5),
        legend.title = element_blank())

as_ggplot(get_legend(legend_plot_codex))
#ggsave(width = 4, height = 4, units = "cm", filename = "SFigure9_p2_legend.pdf")

```

# Supplementary Figure 10
## Load analysis
```{r run analysis SF10}

# Read results from neighborhood analysis
# Please run file: analysis/NeighborhoodAnalysis.Rmd
load("output/Neighborhood_results.RData")

# Add codex annotation
codex_annotation <- left_join(codex_annotation, nn_classes, by="unique_cell_id")
codex_annotation

```

## Tissue cores
### Images
```{r plots SF10, fig.height=6}

regions <- codex_annotation %>% pull(unique_region) %>% unique()
plots <- list()
df <- list()
for(r in regions){
  
df[[r]] <- codex_annotation %>% 
  filter(!is.na(Region), unique_region %in% r) %>% 
  filter(x>500, x<7500) %>% 
  filter(y>500, y<7500) %>% 
  filter(((x-mean(.$x))^2+(y-mean(.$y))^2)<2500^2)
  
plots[[r]] <- df[[r]] %>% 
  ggplot()+
  ggrastr::geom_point_rast(aes(x=x,y=y,color=Region, fill=Region), shape=21, size=0.25, stroke=0, alpha=1, raster.dpi =400)+
  scale_color_manual(values = colors_nn)+
  scale_fill_manual(values = colors_nn)+
  guides(color=guide_legend(override.aes = list(size=3)))+
  ggtitle(unique(df[[r]]$PatientID))+
  coord_fixed(clip = "off")+
  theme_void()+
  theme(legend.position = "none",
        plot.margin = unit(units = "cm", c(0.1,0.1,0.1,0.1)),
        plot.subtitle = element_text(size=7, face = "bold", hjust=0.5, margin = unit(units = "cm", c(0,0,0,0))),
        plot.title = element_text(size=6.5, face = "plain", margin = unit(units = "cm", c(-0.1,0,-0.6,-0.1))),
        panel.background = element_rect(fill = NA, color = NA),
        plot.background = element_rect(fill = NA, color = NA))

}

empty <- codex_annotation %>% 
  filter(!is.na(Region), unique_region %in% "empty") %>% 
  ggplot()+
  ggrastr::geom_point_rast(aes(x=x,y=y,color=Region, fill=Region), alpha=0, raster.dpi =400)+
  guides(color=guide_legend(override.aes = list(size=3, alpha=1)))+
  ggtitle("")+
  coord_fixed(clip = "off")+
  theme_void()+
  theme(legend.position = "right",
        plot.margin = unit(units = "cm", c(0.1,0.1,0.1,0.1)),
        plot.title = element_text(size=8, face = "plain", margin = unit(units = "cm", c(0,0,-0.75,0))),
        panel.background = element_rect(fill = NA, color = NA),
        plot.background = element_rect(fill = NA, color = NA))


p_full <- 
  wrap_plots(plots$`191_1reg006`+labs(tag = "A", subtitle = "rLN")+
               plots$`191_3reg007`+
               plots$`191_5reg005`+plot_layout(ncol = 1))+
  
  wrap_plots(plots$`191_4reg004`+labs(tag = "B",  subtitle = "DLBCL")+#
               plots$`191_3reg001`+
               plots$`191_4reg006`+plot_layout(ncol = 1))+
  
  wrap_plots(plots$`191_2reg007`+labs(tag = "C",  subtitle = "MCL")+
               plots$`191_2reg002`+
               plots$`191_3reg006`+plot_layout(ncol = 1))+
  
  wrap_plots(plots$`191_5reg002`+labs(tag = "D",  subtitle = "FL")+
               plots$`191_3reg002`+
               plots$`191_5reg001`+plot_layout(ncol = 1))+
  
  wrap_plots(plots$`191_1reg003`+labs(tag = "E",  subtitle = "MZL")+
               plots$`191_1reg004`+
               empty+plot_layout(ncol = 1))+
  plot_layout(ncol = 5)

p_full

#ggsave(p_full, width = 18, height = 11.5, units = "cm", filename = "SF11.pdf")

```

### Legend
```{r legend SF10, fig.height=1}

labels_nn <- c(
  "N1: B-cells / FDC" ,
  'N2: B-cells / FDC / T'[FH]~'',
  'N3: T'[Pr]~'/ T'[REG]~'',
  'N4: Macrophages / B-cells / Exh. T'[TOX]~'',
  "N5: B-cells",
  "N6: T-cell area I" ,
  "N7: T-cell area II" ,
  'N8: PC / NK / Memory T'[TOX]~'' ,
  "N9: T-cell area III" ,
  "N10: Stromal cells / Macrophages"
   )

p_legend <- codex_annotation %>% 
  filter(!is.na(Region), unique_region %in% r) %>% 
  ggplot()+
  ggrastr::geom_point_rast(aes(x=x,y=y,color=Region, fill=Region), shape=21, size=0.25, 
                           stroke=0, alpha=1, raster.dpi =400)+
  scale_color_manual(values = colors_nn, limits=factor(1:10), labels=labels_nn)+
  scale_fill_manual(values = colors_nn, limits=factor(1:10), labels=labels_nn)+
  guides(color=guide_legend(nrow = 2, override.aes = list(size=2, color="black", stroke=0.25)))+
  ggtitle(unique(df[[r]]$PatientID))+
  coord_fixed(clip = "off")+
  theme_void()+
  theme(legend.position = "right",
        plot.margin = unit(units = "cm", c(0.1,0.1,0.1,0.1)),
        legend.text = element_text(size=6.5),
        legend.title = element_text(size=7, face = "bold"),
        plot.subtitle = element_text(size=7, face = "bold", hjust=0.5, margin = unit(units = "cm", c(0,0,0,0))),
        plot.title = element_text(size=6.5, face = "plain", margin = unit(units = "cm", c(-0.1,0,-0.6,-0.1))),
        panel.background = element_rect(fill = NA, color = NA),
        plot.background = element_rect(fill = NA, color = NA),
        legend.box.margin = unit(c(0,0,0,-0.38), "cm"),
        legend.key.width = unit("cm", x = 0.1),
        legend.spacing.x = unit("cm", x = 0.1),
        legend.key.height = unit("cm", x = 0.35))

as_ggplot(get_legend(p_legend))

#ggsave(width = 19, height = 1.5, units = "cm", filename = "SF11_legend.pdf")

```

## Composition of neighborhoods
```{r neighborhood composition, fig.height=3}

df_freq_nh <- codex_annotation %>% 
  add_prop(vars = c("Entity", "Region", "unique_region"), group.vars = 3) %>% 
  fill_zeros(names_from = "Region", values_from = "Prop") %>%
  group_by(Entity, Region) %>% 
  mutate(Max=0.06+max(Prop),
         Region_label=paste0("N", Region)) %>% 
  mutate(Region_label=factor(Region_label, levels = paste0("N", 1:10))) %>% 
  mutate(Max=ifelse(Region_label=="N8" & Entity=="MCL", 0.18, Max)) %>% 
  ungroup()

pvalues <- df_freq_nh %>% 
  compare_means(data=., formula = Prop ~ Entity, ref.group = "rLN", 
                group.by = "Region_label", p.adjust.method = "BH") %>% 
  filter(p.adj<0.05) %>% 
  mutate(p.adj_s=format(p.adj, scientific = TRUE, digits=1)) %>% 
  mutate(p.adj_f=case_when(p.adj > 0.01 ~ as.character(round(p.adj, 2)),
                           p.adj==0.01 ~ "0.01",
                           p.adj < 0.01 ~ p.adj_s),
         Entity=group2) %>% 
  mutate(Entity=factor(Entity, levels = c("rLN", "DLBCL", "MCL", "FL", "MZL"))) %>% 
  left_join(., df_freq_nh %>% select(Region_label, Max, Entity) %>% distinct, by = c("Region_label", "Entity"))

df_medianLines <- df_freq_nh %>% 
  filter(Entity=="rLN") %>% 
  group_by(Region_label) %>% 
  dplyr::summarise(MedianProp=median(Prop)) 

df_freq_nh %>% 
  ggplot(aes(x=Entity, y=Prop)) +
  geom_hline(data=df_medianLines, aes(yintercept=MedianProp),
             size=0.25, linetype="dashed", color="grey60")+
  geom_boxplot(width=0.5, outlier.alpha = 0, size=0.25)+
  ggbeeswarm::geom_beeswarm(size=1, shape=21, stroke=0.1, cex = 2, aes(fill=Region))+
  geom_text(data=pvalues, inherit.aes = F, aes(y=Max, x=Entity, label=p.adj_f), size=2.5)+
  scale_fill_manual(values = colors_nn)+
  scale_x_discrete(limits=c("rLN", "DLBCL", "MCL", "FL", "MZL"))+
  facet_wrap(~Region_label, strip.position = "top", scales = "free_y", nrow = 2)+
  scale_y_continuous(name = "% of total area", expand = c(0,0.075))+
  mytheme_1+
  theme(strip.text.y = element_text(angle = 0, size=6),
        axis.text.x = element_text(angle=45, hjust=1),
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        plot.margin = unit(c(0,0.1,0,0.1), "cm"))+
  labs(tag = "F")

ggsave(width = 18, height = 7, units = "cm", filename = "SF10.pdf")

```

# Session info
```{r session}

sessionInfo()

```