Diff of /scripts/CD3-_space.Rmd [000000] .. [fb5156]

Switch to side-by-side view

--- a
+++ b/scripts/CD3-_space.Rmd
@@ -0,0 +1,1161 @@
+---
+title: "CD3- space"
+author: "Ming Tang"
+date: '2023-07-26'
+output: html_document
+---
+
+cluster_id	annotation	hex
+0	Activated_monocytes	#3366cc
+1	Classical_monocytes	#78537f
+2	Classical-like_monocytes	#be4031
+3	Mature_IgK_B	#e44f0c
+4	Intermediate_monocytes	#f27708
+5	CD56_dim_NK	#ef9802
+6	Mature_IgL_B	#8c970d
+7	Non-classical_monocytes	#2a9615
+8	Terminally_differentiated_NK	#38683f
+9	Memory_B	#722974
+10	Conventional_dendritic	#85129e
+11	CD56_bright_NK	#4553b1
+12	Mature_IgK_V1(D)-39_B	#0692c4
+13	Other_CD14+_PF4+	#5179a8
+14	IFN-responsive_monocytes	#ab5688
+15	Other_low_IgD_B	#c5565f
+17	IFN-responsive_NK	#67a700
+18	Plasmacytoid_dendritic	#897414
+19	Follicular_B	#ab4026
+20a	Other_CD14+_CD79b+	#943a48
+20b	Other_Cytotoxic_monocytes	#943a48
+21	NK/NKT	#5d5172
+22	Activated_non-classical_monocytes	#3a6095
+24	Cytotoxic_monocytes	#8e4698
+
+```{r}
+library(devtools)
+library(Seurat)
+library(tidyverse)
+#library(presto)
+library(here)
+library(tictoc)
+library(ComplexHeatmap)
+# presto is much faster than Seurat::FindAllMarkers
+# install_github('immunogenomics/presto')
+```
+
+
+```{r}
+cd3_minus_annotated<- readRDS(here("data/cd3_minus_annotated_seurat.rds"))
+```
+
+
+### change to the new annotation
+```{r}
+
+table(cd3_minus_annotated$clusters_anno_sub)
+
+old_annotations<- sort(unique(cd3_minus_annotated$clusters_anno_sub))
+new_annotations<- c("00_Activated_monocytes", "01_Classical_monocytes", "02_Classical-like_monocytes", "03_Mature_IgK_B", "04_Intermediate_monocytes",
+                    "05_CD56_dim_NK", "06_Mature_IgL_B", "07_Non-classical_monocytes",
+                    "08_Terminally_differentiated_NK", "09_Memory_B", "10_Conventional_dendritic", "11_CD56_bright_NK", "12_Mature_IgK_V1(D)-39_B", "13_Other_CD14+_PF4+", "14_IFN-responsive_monocytes", "15_Other_low_IgD_B", "17_IFN-responsive_NK", "18_Plasmacytoid_dendritic",
+                    "19_Follicular_B", "20_a_Other_CD14+_CD79b+", "20_b_Other_Cytotoxic_monocytes", "21_NK/NKT",
+                    "22_Activated_non-classical_monocytes", "24_Cytotoxic_monocytes")
+
+names(new_annotations)<- old_annotations
+
+new_annotations
+
+cd3_minus_annotated$clusters_anno_sub<- new_annotations[cd3_minus_annotated$clusters_anno_sub] %>% unname() 
+
+cd3_minus_annotated$annotation<- str_replace(cd3_minus_annotated$clusters_anno_sub,
+                                             "[0-9]+_", "")
+```
+
+### whole cd3- marker gene heatmaps 
+
+Relate to Suppl Figure 10.
+
+```{r}
+set.seed(123)
+library(ComplexHeatmap)
+
+
+clusters<- cd3_minus_annotated@meta.data$clusters_anno_sub%>%unique()
+
+markers<- presto::wilcoxauc(cd3_minus_annotated, 'clusters_anno_sub', assay = 'data',
+                               groups_use = clusters)
+
+markers %>% 
+  arrange(desc(auc)) %>% 
+  filter(pct_in> 20, pct_out < 30) %>%
+  head()
+
+top_markers_unique<- filter(markers, padj <=0.05, abs(logFC) >=0.8) %>% 
+  pull(feature) %>%
+  unique()
+## for plotting heatmap, subsample the seurat object, 100000 or 5000 cells
+cd3_minus_annotated_sub<- cd3_minus_annotated[, sample.int(ncol(cd3_minus_annotated), size = 10000)]
+
+all_cells<- cd3_minus_annotated_sub@meta.data$clusters_anno_sub %in% clusters
+
+all_mat<- cd3_minus_annotated_sub[["RNA"]]@data[top_markers_unique, all_cells ] %>% as.matrix()
+
+## scale the rows
+all_mat<- t(scale(t(all_mat)))
+
+cluster_anno<- cd3_minus_annotated_sub@meta.data$clusters_anno_sub[all_cells]
+
+annotation_colors<- c("#3366cc", "#78537f", "#be4031", "#e44f0c", "#f27708", "#ef9802", "#8c970d", "#2a9615", "#38683f", "#722974", "#85129e", "#4553b1", "#0692c4", "#5179a8", "#ab5688", "#c5565f", "#67a700", "#897414", "#ab4026", "#943a48", "#943a48", "#5d5172", "#3a6095", "#8e4698")
+
+
+
+cluster_anno<- cd3_minus_annotated_sub@meta.data$clusters_anno_sub[all_cells]
+
+column_ha2<- HeatmapAnnotation(
+    cluster_anno = cluster_anno,
+    col = list(cluster_anno = setNames(annotation_colors, sort(clusters))),
+    na_col = "grey",
+    show_annotation_name = TRUE
+)
+
+
+quantile(all_mat, c(0.1, 0.5, 0.95, 0.99))
+
+col_fun<-  circlize::colorRamp2(c(-1, 0, 2), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+
+## for big matrix, raster helps. https://jokergoo.github.io/2020/06/30/rasterization-in-complexheatmap/
+## cluster the slice (without specifiying the factor level) and the columns.
+
+# can not directly rotate column titles (cluster annotation) for 45, has to use decorate() function, see
+# https://github.com/jokergoo/ComplexHeatmap/issues/189
+
+
+pdf(here("results/figures/2023-04-01_cd3_minus_all_clusters_marker_gene_heatmap_remake_new_color.pdf"), width = 16, height = 20)
+Heatmap(all_mat, name = "Expression",  
+        column_split = factor(cluster_anno),
+        cluster_columns = TRUE,
+        show_column_dend = FALSE,
+        cluster_column_slices = TRUE,
+        row_dend_reorder = TRUE,
+        column_title_gp = gpar(fontsize = 8),
+        column_title_rot = 90,
+        column_gap = unit(0.5, "mm"),
+        cluster_rows = TRUE,
+        column_dend_reorder = TRUE,
+        show_row_dend = FALSE,
+        col = col_fun,
+        row_km = 20,
+        row_gap = unit(0.5, "mm"),
+        row_title = NULL,
+        row_names_gp = gpar(fontsize = 4.5),
+        top_annotation = column_ha2,
+        show_column_names = FALSE,
+        use_raster = TRUE,
+        raster_by_magick = TRUE,
+        raster_quality = 10)
+
+dev.off()
+
+```
+
+### Make a function 
+
+```{r}
+
+make_marker_heatmap<- function(obj, annotation_colors, row_km, row_name_size =8){
+  clusters<- obj@meta.data$clusters_anno_sub%>%unique()
+  markers<- presto::wilcoxauc(obj, 'clusters_anno_sub', assay = 'data',
+                               groups_use = clusters)
+  
+  top_markers_unique<- filter(markers, padj <=0.05, abs(logFC) >=0.8) %>% pull(feature) %>%
+  unique()
+## for plotting heatmap, subsample the seurat object, 100000 or 5000 cells
+  obj_sub<- obj[, sample.int(ncol(obj), size = 10000)]
+  all_cells<- obj_sub@meta.data$clusters_anno_sub %in% clusters
+  all_mat<- obj_sub[["RNA"]]@data[top_markers_unique, all_cells ] %>% as.matrix()
+
+## scale the rows
+  all_mat<- t(scale(t(all_mat)))
+  cluster_anno<- obj_sub@meta.data$clusters_anno_sub[all_cells]
+  
+  column_ha2<- HeatmapAnnotation(
+    cluster_anno = cluster_anno,
+    col = list(cluster_anno = setNames(annotation_colors, sort(clusters))),
+    na_col = "grey",
+    show_annotation_name = TRUE
+)
+  
+  col_fun<-  circlize::colorRamp2(c(-1, 0, 2), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+  
+  Heatmap(all_mat, name = "Expression",  
+        column_split = factor(cluster_anno),
+        cluster_columns = TRUE,
+        show_column_dend = FALSE,
+        cluster_column_slices = TRUE,
+        row_dend_reorder = TRUE,
+        column_title_gp = gpar(fontsize = 8),
+        column_title_rot = 90,
+        column_gap = unit(0.5, "mm"),
+        cluster_rows = TRUE,
+        column_dend_reorder = TRUE,
+        show_row_dend = FALSE,
+        col = col_fun,
+        row_km = row_km,
+        row_gap = unit(0.5, "mm"),
+        row_title = NULL,
+        row_names_gp = gpar(fontsize = row_name_size),
+        top_annotation = column_ha2,
+        show_column_names = FALSE,
+        use_raster = TRUE,
+        raster_by_magick = TRUE,
+        raster_quality = 10)
+
+}
+
+```
+
+### NK cells marker gene heatmaps 
+
+Relate to Suppl Figure 11b
+```{r}
+NK_cells_obj<- cd3_minus_annotated[, cd3_minus_annotated$seurat_clusters %in% c(5,8,11,17,21)]
+NK_colors<- c("#ef9802", "#38683f", "#4553b1", "#67a700", "#5d5172")
+
+pdf(here("results/figures/2023-04-01_cd3_minus_NK_marker_gene_heatmap_remake_new_color.pdf"), width = 16, height = 8)
+make_marker_heatmap(obj=NK_cells_obj, annotation_colors = NK_colors, row_km = 5)
+dev.off()
+```
+
+### B cells marker gene heatmaps 
+
+Relate to Suppl Figure 12b
+```{r}
+B_cells_obj<- cd3_minus_annotated[, cd3_minus_annotated@meta.data$clusters_anno_sub%>% str_detect("^03|^06|^09|^12|^15|^19")]
+
+B_colors<- c("#e44f0c", "#8c970d", "#722974", "#0692c4", "#c5565f", "#ab4026")
+
+pdf(here("results/figures/2023-04-01_cd3_minus_B_marker_gene_heatmap_remake_new_color.pdf"), width = 16, height = 8)
+make_marker_heatmap(obj=B_cells_obj, annotation_colors = B_colors, row_km = 6)
+dev.off()
+```
+
+### monocytes marker gene heatmaps 
+
+Relate to Suppl Figure 13
+```{r}
+
+mono_obj<- cd3_minus_annotated[, cd3_minus_annotated@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")]
+
+mono_colors<- c("#3366cc", "#78537f", "#be4031", "#f27708", "#2a9615", "#85129e", "#ab5688",
+                "#897414", "#943a48", "#3a6095", "#8e4698")
+
+pdf(here("results/figures/2023-04-01_cd3_minus_monocyte_marker_gene_heatmap_remake_new_color.pdf"), width = 16, height = 18)
+make_marker_heatmap(obj=mono_obj, annotation_colors = mono_colors, row_km = 11, row_name_size = 6)
+dev.off()
+
+```
+
+
+### clustered dotplots by group
+
+```{r}
+healthy_indx<- cd3_minus_annotated$group == "Healthy_donors"
+cd3_minus_annotated_no_health<- cd3_minus_annotated[, !healthy_indx]
+cd3_minus_annotated_health<- cd3_minus_annotated[, healthy_indx]
+```
+
+
+### NK cells dotplot
+```{r}
+NK_lineage_markers<- c("NCAM1", "B3GAT1", "FCGR3A")
+NK_cytotoxic_markers<- c("GZMA", "GZMB", "GZMH", "GZMK", "GZMM", "PRF1", "GNLY", "CTSW", "LAMP1")
+NK_cytokine_chemokines<- c("CCL3", "CCL4", "CCL5", "XCL1", "XCL2", "IFNG", "TNF")
+NK_receptors<- c("KLRF1", "NCR1", "NCR3", "KLRK1", "KLRD1", "KLRG1", "KLRB1", "KLRC1",
+                 "KIR2DL1", "KIR2DL2", "KIR2DL3", "KIR3DL1", "KIR3DL2")
+
+NK_adhesion_molecules<- c("CD2", "SELL", "ITGB2", "CD44", "ITGAL")
+NK_TF<- "EOMES"
+NK_INF_genes<- c("IFIT2", "IFIT3", "OASL", "HERC5")
+NK_others<- c("PMAIP1", "COTL1", "LTB","IL7R", "CD8A", "CD8B")
+
+NK_genes<- c(NK_lineage_markers, NK_cytotoxic_markers, NK_cytokine_chemokines,
+             NK_receptors, NK_adhesion_molecules, NK_TF, NK_INF_genes, NK_others)
+
+NK_genes[! NK_genes %in% rownames(cd3_minus_annotated)]
+
+cd3_minus_annotation<- 
+  select(cd3_minus_annotated@meta.data, seurat_clusters, annotation) %>%
+  distinct() %>%
+  mutate(annotation = str_replace_all(annotation, "-", "_")) %>%
+  mutate(annotation = str_replace_all(annotation, "\\+", "")) %>%
+  mutate(seurat_clusters = as.character(seurat_clusters))
+
+```
+
+
+```{r}
+source(here("src/cluster_dotplot_complexheatmap.R"))
+```
+
+### Make dotplot
+
+Relate to Main Figure 3c, Suppl Figure 11c
+
+```{r}
+NK_cells_health<- cd3_minus_annotated_health$seurat_clusters %in% c(5,8,11,17,21) 
+NK_mats_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_health[, NK_cells_health], features = NK_genes, group.by ="clusters_anno_sub" )
+
+
+NK_cells_no_health<- cd3_minus_annotated_no_health$seurat_clusters %in% c(5,8,11,17,21)
+NK_mats_no_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_no_health[, NK_cells_no_health], features = NK_genes, group.by ="clusters_anno_sub"  )
+
+
+NK_cells<- cd3_minus_annotated$seurat_clusters %in% c(5,8,11,17,21)
+NK_mats_all<- GetMatrixFromSeurat(obj = cd3_minus_annotated[, NK_cells], features = NK_genes, group.by ="clusters_anno_sub")
+
+
+colnames(NK_mats_health$exp_mat)
+
+colnames(NK_mats_no_health$exp_mat)
+
+library(viridis)
+#Polychrome::swatch(viridis(20))
+#viridis(20)[10]
+
+
+col_fun<-  circlize::colorRamp2(c(-1, 0, 2), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+
+# NK_anno<- left_join(tibble( seurat_clusters = colnames(NK_mats_health$exp_mat)), cd3_minus_annotation) %>%
+#   mutate(cluster_anno = paste0(str_pad(seurat_clusters, width = 2, side = "left", pad = "0"), "_", annotation)) %>%
+#   pull(cluster_anno)
+
+NK_anno<- colnames(NK_mats_health$exp_mat)
+ 
+
+#library(RColorBrewer)
+column_ha1<- HeatmapAnnotation(
+    cluster_anno = NK_anno,
+    col = list(cluster_anno = setNames(c("#ef9802", "#38683f", "#4553b1", "#67a700", "#5d5172"), NK_anno)),
+    na_col = "grey",
+    show_annotation_name = FALSE
+)
+
+column_ha2<- HeatmapAnnotation(
+    cluster_anno = NK_anno,
+    col = list(cluster_anno = setNames(c("#ef9802", "#38683f", "#4553b1", "#67a700", "#5d5172"), NK_anno)),
+    na_col = "grey",
+    show_annotation_name = TRUE
+)
+
+## legend for the dot size
+lgd_list = list(
+    Legend( labels = c(0, 10, 25, 50 , 75, 100), title = "percentage",
+            graphics = list(
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 0  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.1)  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.5) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"),
+                                               gp = gpar(fill = "black")))
+            ))
+  
+  
+#### scale across different groups
+
+NK_mats_combine<- cbind(NK_mats_no_health$exp_mat, NK_mats_health$exp_mat, NK_mats_all$exp_mat)
+NK_mats_combine_scaled<- t(scale(t(NK_mats_combine)))
+
+ncol(NK_mats_no_health$exp_mat)
+NK_mats_no_health_scaled<-  NK_mats_combine_scaled[, 1:5]
+NK_mats_health_scaled<-  NK_mats_combine_scaled[, 6:10]
+NK_mats_all_scaled<-  NK_mats_combine_scaled[, 11:15]
+
+quantile(NK_mats_combine_scaled, c(0, 0.1, 0.5, 0.8, 0.9, 0.95, 1))
+set.seed(1234)  
+pdf(here("results/figures/2021_11_07_NK_cells_health_vs_RR_vs_all_scale_across_dotplot_complexheatmap.pdf"), width = 8, height = 9)
+NK_hp1<- MakeClusterDotPlot(exp_mat = NK_mats_no_health_scaled, 
+                   percent_mat = NK_mats_no_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "cHLs",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   show_row_dend =FALSE,
+                   show_column_dend = FALSE,
+                   column_names_side = "top",
+                   column_names_gp = gpar(fontsize = 10))
+
+NK_hp2<- MakeClusterDotPlot(exp_mat = NK_mats_health_scaled, 
+                   percent_mat = NK_mats_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "healthy donors",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(NK_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(NK_hp1),
+                   column_names_side = "top",
+                   column_names_gp = gpar(fontsize = 10))
+
+
+NK_hp<- MakeClusterDotPlot(exp_mat = NK_mats_all_scaled, 
+                   percent_mat = NK_mats_all$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "all",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(NK_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(NK_hp1),
+                   column_names_side = "top",
+                   column_names_gp = gpar(fontsize = 10))
+
+draw(NK_hp1 + NK_hp2 + NK_hp , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+                   
+```
+
+
+### B cells dotplot 
+
+Relate to Main Figure 4c,  Suppl Figure 12c. 
+
+```{r}
+
+## CD10 is MME, CD20 is MS4A1, CD21 is CR2, CD23 is FCER2, CD138 is SDC1
+immunoglobuin_genes<- c("IGKV1-39", "IGKV1D-39", "IGLC2", "IGLC3", "IGHA1", "IGHG2")
+ontogeny_markers<- c("CD19", "MS4A1", "MME", "CR2", "CD22", "FCER2", "CD24", "CD27", "CD38", "CD79B",  "CXCR5", "TNFRSF13B", "CLECL1", "TCL1A", "IGLL5", "CD72", "MZB1", "PLD4", "ITM2C", "IGKC", "IGKV1-39", "IGLC2", "IGLC3", "IGHD", "IGHM", "IGHA1", "IGHA2", "IGHE", "IGHG1", "IGHG2", "IGHG3", "IGHG4", "CD19", "CD79A", "CD83", "CD69", "CD80", "CD86")
+
+B_others<- c("PPP1R14A", "IGLV2-14", "JCHAIN", "CD82", "VIM", "HLA-C")
+
+B_genes<- c(immunoglobuin_genes, ontogeny_markers, B_others)
+B_genes<- unique(B_genes)
+B_genes[!B_genes %in% rownames(cd3_minus_annotated)]
+
+
+```
+
+
+```{r}
+B_cells_health<- cd3_minus_annotated_health@meta.data$clusters_anno_sub%>% str_detect("^03|^06|^09|^12|^15|^19")
+B_mats_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_health[, B_cells_health], features = B_genes, group.by = "clusters_anno_sub" )
+
+
+B_cells_no_health<- cd3_minus_annotated_no_health@meta.data$clusters_anno_sub%>% str_detect("^03|^06|^09|^12|^15|^19")
+B_mats_no_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_no_health[, B_cells_no_health], features = B_genes, group.by = "clusters_anno_sub"  )
+
+
+B_cells<- cd3_minus_annotated@meta.data$clusters_anno_sub%>% str_detect("^03|^06|^09|^12|^15|^19")
+B_mats_all<- GetMatrixFromSeurat(obj = cd3_minus_annotated[, B_cells], features = B_genes, 
+                                 group.by = "clusters_anno_sub" )
+
+
+colnames(B_mats_health$exp_mat)
+rownames(B_mats_health$exp_mat)
+colnames(B_mats_no_health$exp_mat)
+
+col_fun<-  circlize::colorRamp2(c(-1, 0, 2), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+
+B_anno<- colnames(B_mats_health$exp_mat)
+ 
+
+library(RColorBrewer)
+column_ha1<- HeatmapAnnotation(
+    cluster_anno = B_anno,
+    col = list(cluster_anno = setNames(c("#e44f0c", "#8c970d", "#722974", "#0692c4", "#c5565f","#ab4026"), B_anno)),
+    na_col = "grey",
+    show_annotation_name = FALSE
+)
+
+column_ha2<- HeatmapAnnotation(
+    cluster_anno = B_anno,
+    col = list(cluster_anno = setNames(c("#e44f0c", "#8c970d", "#722974", "#0692c4", "#c5565f", "#ab4026"), B_anno)),
+    na_col = "grey",
+    show_annotation_name = TRUE
+)
+
+## legend for the dot size
+lgd_list = list(
+    Legend( labels = c(0, 10, 25, 50 , 75, 100), title = "percentage",
+            graphics = list(
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 0  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.1)  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.5) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"),
+                                               gp = gpar(fill = "black")))
+            ))
+  
+  
+set.seed(123)  
+pdf(here("results/figures/2021_11_07_B_cells_health_vs_RR_vs_all_dotplot_complexheatmap.pdf"), width = 10, height = 8)
+B_hp1<- MakeClusterDotPlot(exp_mat = t(scale(t(B_mats_no_health$exp_mat))), 
+                   percent_mat = B_mats_no_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "no health",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"))
+
+B_hp2<- MakeClusterDotPlot(exp_mat = t(scale(t(B_mats_health$exp_mat))), 
+                   percent_mat = B_mats_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "health",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(B_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(B_hp1))
+
+B_hp<- MakeClusterDotPlot(exp_mat = t(scale(t(B_mats_all$exp_mat))), 
+                   percent_mat = B_mats_all$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "all",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha2,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(B_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(B_hp1))
+
+draw(B_hp1 + B_hp2 + B_hp , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+                   
+  
+#### scale across different groups
+
+B_mats_combine<- cbind(B_mats_no_health$exp_mat, B_mats_health$exp_mat, B_mats_all$exp_mat)
+B_mats_combine_scaled<- t(scale(t(B_mats_combine)))
+
+ncol(B_mats_no_health$exp_mat)
+B_mats_no_health_scaled<-  B_mats_combine_scaled[, 1:6]
+B_mats_health_scaled<-  B_mats_combine_scaled[, 7:12]
+B_mats_all_scaled<-  B_mats_combine_scaled[, 13:18]  
+
+set.seed(123)  
+pdf(here("results/figures/2021_11_07_B_cells_health_vs_RR_vs_all_scale_across_dotplot_complexheatmap3.pdf"), width = 10, height = 8.5)
+B_hp1<- MakeClusterDotPlot(exp_mat = B_mats_no_health_scaled, 
+                   percent_mat = B_mats_no_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "cHLs",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   show_row_dend =FALSE,
+                   show_column_dend = FALSE,
+                   column_names_side = "top",
+                   column_names_gp = gpar(fontsize = 10))
+
+B_hp2<- MakeClusterDotPlot(exp_mat = B_mats_health_scaled, 
+                   percent_mat = B_mats_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "healthy donors",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(B_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(B_hp1),
+                   column_names_side = "top",
+                   column_names_gp = gpar(fontsize = 10))
+
+B_hp<- MakeClusterDotPlot(exp_mat = B_mats_all_scaled, 
+                   percent_mat = B_mats_all$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "all",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha2,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(B_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(B_hp1),
+                   column_names_side = "top",column_names_gp = gpar(fontsize = 10))
+
+draw(B_hp1 + B_hp2 + B_hp , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+```
+
+
+### Myeloid cells dot plot 
+
+Relate to Main Figure 5c, Suppl Figure 14b
+```{r}
+myeloid_genes<- c("CD14", "FCGR3A", "LYZ", "SIGLEC10", "SIRPA", "CD274", "RPL27A", "RPSA", "IFI6", "ISG15", "IFI44L", "RHOC", "IFITM1", "IFITM2", "IFITM3", "C1QA", "C1QB", "C1QC", "CLEC10A", "FCER1A", "HLA-DQA1", "HLA-DQB1", "HLA-DRB1", "CD74", "HLA-DRA", "HLA-DPA1", "HLA-DPB1", "HLA-DMA", "CCL4", "CCL5", "CTSW", "GZMA", "GNLY", "KLRB1", "NKG7", "GZMH", "GZMM", "KLRD1", "KLRB1", "CD247", "TCF4", "GZMB", "SERPINF1", "SELENOS", "XBP1", "BCL11A", "IRF4", "TRAF4", "SOX4", "PPP1R14B", "IL1RN", "CCL2", "CTSL", "CCL7", "CD63", "BZW1", "FABP5", "JARID2", "THBS1", "CXCL8", "SERPINB2", "PHLDA1", "PID1", "TIMP1", "EREG", "CXCL2", "CXCL3", "IL1B", "IER3", "PRF1")
+
+myeloid_genes<- unique(myeloid_genes)
+
+myeloid_genes[!myeloid_genes %in% rownames(cd3_minus_annotated)]
+
+
+myeloid_cells_health<- cd3_minus_annotated_health@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")
+myeloid_mats_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_health[, myeloid_cells_health], features = myeloid_genes, group.by = "clusters_anno_sub" )
+
+
+myeloid_cells_no_health<- cd3_minus_annotated_no_health@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")
+myeloid_mats_no_health<- GetMatrixFromSeurat(obj = cd3_minus_annotated_no_health[, myeloid_cells_no_health], features = myeloid_genes, group.by = "clusters_anno_sub"  )
+
+myeloid_cells<- cd3_minus_annotated@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")
+myeloid_mats_all<- GetMatrixFromSeurat(obj = cd3_minus_annotated[, myeloid_cells], features = myeloid_genes, group.by = "clusters_anno_sub"  )
+
+
+all.equal(colnames(myeloid_mats_health$exp_mat), colnames(myeloid_mats_no_health$exp_mat))
+
+
+rownames(myeloid_mats_health$exp_mat)
+colnames(myeloid_mats_no_health$exp_mat)
+
+col_fun<-  circlize::colorRamp2(c(-1, 0, 2), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+
+myeloid_anno<- colnames(myeloid_mats_no_health$exp_mat)
+ 
+
+library(RColorBrewer)
+column_ha1<- HeatmapAnnotation(
+    cluster_anno = myeloid_anno,
+    col = list(cluster_anno = setNames(c("#3366cc", "#78537f", "#be4031", "#f27708","#2a9615",
+                                         "#85129e", "#ab5688", "#897414", "#943a48", "#3a6095",
+                                         "#8e4698"), myeloid_anno)),
+    na_col = "grey",
+    show_annotation_name = FALSE
+)
+
+column_ha2<- HeatmapAnnotation(
+    cluster_anno =myeloid_anno,
+    col = list(cluster_anno = setNames(c("#3366cc", "#78537f", "#be4031", "#f27708","#2a9615",
+                                         "#85129e", "#ab5688", "#897414", "#943a48", "#3a6095",
+                                         "#8e4698"), myeloid_anno)),
+    na_col = "grey",
+    show_annotation_name = TRUE
+)
+
+## legend for the dot size
+lgd_list = list(
+    Legend( labels = c(0, 10, 25, 50 , 75, 100), title = "percentage",
+            graphics = list(
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 0  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.1)  * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.25) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.5) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = sqrt(0.75) * unit(2, "mm"),
+                                               gp = gpar(fill = "black")),
+              function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"),
+                                               gp = gpar(fill = "black")))
+            ))
+  
+  
+set.seed(123)  
+pdf(here("results/figures/myeloid_cells_health_vs_RR_vs_all_dotplot_complexheatmap2.pdf"), width = 12, height = 10)
+myeloid_hp1<- MakeClusterDotPlot(exp_mat = t(scale(t(myeloid_mats_no_health$exp_mat))), 
+                   percent_mat = myeloid_mats_no_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "no health",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"))
+
+myeloid_hp2<- MakeClusterDotPlot(exp_mat = t(scale(t(myeloid_mats_health$exp_mat))), 
+                   percent_mat = myeloid_mats_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "health",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(myeloid_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(myeloid_hp1))
+
+myeloid_hp<- MakeClusterDotPlot(exp_mat = t(scale(t(myeloid_mats_all$exp_mat))), 
+                   percent_mat = myeloid_mats_all$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "all",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha2,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(myeloid_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(myeloid_hp1))
+
+
+draw(myeloid_hp1 + myeloid_hp2 + myeloid_hp , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+
+
+#### scale across different groups
+
+myeloid_mats_combine<- cbind(myeloid_mats_no_health$exp_mat, myeloid_mats_health$exp_mat, myeloid_mats_all$exp_mat)
+myeloid_mats_combine_scaled<- t(scale(t(myeloid_mats_combine)))
+
+ncol(myeloid_mats_no_health$exp_mat)
+myeloid_mats_no_health_scaled<-  myeloid_mats_combine_scaled[, 1:11]
+myeloid_mats_health_scaled<-  myeloid_mats_combine_scaled[, 12:22]
+myeloid_mats_all_scaled<-  myeloid_mats_combine_scaled[, 23:33]  
+
+
+set.seed(12345)  
+pdf(here("results/figures/2021_11_07_myeloid_cells_health_vs_RR_vs_all_scale_across_dotplot_complexheatmap.pdf"), width = 12, height = 12)
+myeloid_hp1<- MakeClusterDotPlot(exp_mat = myeloid_mats_no_health_scaled, 
+                   percent_mat = myeloid_mats_no_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "cHLs",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   show_row_dend =FALSE,
+                   show_column_dend = FALSE,
+                   column_names_side = "top")
+
+myeloid_hp2<- MakeClusterDotPlot(exp_mat = myeloid_mats_health_scaled, 
+                   percent_mat = myeloid_mats_health$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "healthy donors",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(myeloid_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(myeloid_hp1),
+                   column_names_side = "top")
+
+myeloid_hp<- MakeClusterDotPlot(exp_mat = myeloid_mats_all_scaled, 
+                   percent_mat = myeloid_mats_all$percent_mat, 
+                   col_fun = col_fun,
+                   column_title = "all",
+                   column_title_side = "bottom",
+                   row_km = 4,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha2,
+                   legend_dot_size = unit(2, "mm"),
+                   row_order = row_order(myeloid_hp1) %>% unlist(use.names = FALSE),
+                   column_order = column_order(myeloid_hp1),
+                   column_names_side = "top")
+
+
+draw(myeloid_hp1 + myeloid_hp2 + myeloid_hp , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+```
+
+
+
+When all the cells have 0 expression, `scale` will give NaN
+
+```{r}
+# https://stackoverflow.com/questions/15363610/why-does-scale-return-nan-for-zero-variance-columns
+apply(mat, 1, function(y) (y - mean(y)) / sd(y) ^ as.logical(sd(y)))
+```
+
+
+### dot plot for myeloid populations split by groups
+
+borrow some functions from 
+https://github.com/satijalab/seurat/issues/371
+
+```{r}
+
+PrctCellExpringGene <- function(object, genes, group.by = "all"){
+    if(group.by == "all"){
+        prct = unlist(lapply(genes,calc_percentage_helper, object=object))
+        result = data.frame(Markers = genes, Cell_proportion = prct)
+        return(result)
+    }
+
+    else{        
+        list = SplitObject(object, group.by)
+        factors = names(list)
+
+        results = lapply(list, PrctCellExpringGene, genes=genes)
+        for(i in 1:length(factors)){
+        results[[i]]$Feature = factors[i]
+        }
+        combined = do.call("rbind", results)
+        return(combined)
+    }
+}
+
+calc_percentage_helper <- function(object,genes){
+    counts = object[['RNA']]@counts
+    ncells = ncol(counts)
+    if(genes %in% row.names(counts)){
+    sum(counts[genes,]>0)/ncells
+    }else{return(NA)}
+}
+
+
+
+```
+
+```{r}
+PrctCellExpringGene(object = myeloid_cells_no_health_obj, genes = myeloid_genes, group.by = 
+                      "all")
+
+myeloid_cells_no_health<- cd3_minus_annotated_no_health@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")
+
+myeloid_cells_no_health_obj<- cd3_minus_annotated_no_health[, myeloid_cells_no_health]
+
+
+myeloid_cells<- cd3_minus_annotated@meta.data$clusters_anno_sub%>% str_detect("^00|^01|^02|^04|^07|^10|^14|^18|^20_b|^22|^24")
+
+#PR_cells<- cd3_minus_annotated@meta.data$group %in% c("refractory_PR_C1D1",
+#                                                      "refractory_PR_C4D1")
+
+# also remove healthy donors and newly diagnosed 
+PR_cells<- cd3_minus_annotated@meta.data$group %in% c("refractory_PR_C1D1",
+                                                      "refractory_PR_C4D1",
+                                                      "Healthy_donors",
+                                                      "New_C1D1")
+
+myeloid_cells_obj<- cd3_minus_annotated[, myeloid_cells]
+
+cluster0_cells<- cd3_minus_annotated$seurat_clusters == 0
+cluster0_cells_obj_noPR<- cd3_minus_annotated[, cluster0_cells & !PR_cells]
+  
+```
+
+
+```{r}
+GetMatrixFromSeuratByGroup<- function(obj, features){
+  exp_mat<- obj@assays$RNA@data[features, ]
+  count_mat<- obj@assays$RNA@counts[features, ]
+  
+  meta<- obj@meta.data %>%
+  tibble::rownames_to_column(var = "cell")
+
+  exp_df<- as.matrix(exp_mat) %>% as.data.frame() %>% 
+    tibble::rownames_to_column(var="gene") %>%
+    tidyr::pivot_longer(!gene, names_to = "cell", values_to = "expression") %>%
+    left_join(meta) %>%
+    group_by(gene, clusters_anno_sub, group) %>%
+    summarise(average_expression = mean(expression)) %>%
+    tidyr::pivot_wider(names_from = c(clusters_anno_sub, group), values_from= average_expression,
+                       names_sep="|") 
+  
+  exp_mat<- exp_df[, -1] %>% as.matrix()
+  rownames(exp_mat)<- exp_df %>% pull(gene)
+  
+  count_df<- as.matrix(count_mat) %>% as.data.frame() %>% 
+    tibble::rownames_to_column(var="gene") %>%
+    tidyr::pivot_longer(!gene, names_to = "cell", values_to = "count") %>%
+    left_join(meta) %>%
+    group_by(gene, clusters_anno_sub, group) %>%
+    summarise(percentage = mean(count >0)) %>%
+    tidyr::pivot_wider(names_from = c(clusters_anno_sub, group), values_from= percentage,
+                       names_sep="|") 
+
+  percent_mat<- count_df[, -1] %>% as.matrix()
+  rownames(percent_mat)<- count_df %>% pull(gene)
+  if (!identical(dim(exp_mat), dim(percent_mat))) {
+    stop("the dimension of the two matrice should be the same!")
+  }
+  
+  if(! all.equal(colnames(exp_mat), colnames(percent_mat))) {
+    stop("column names of the two matrice should be the same!")
+  }
+  
+  if(! all.equal(rownames(exp_mat), rownames(percent_mat))) {
+    stop("column names of the two matrice should be the same!")
+  }
+  return(list(exp_mat = exp_mat, percent_mat = percent_mat))
+}
+
+```
+
+Relate to Main Figure 6a
+
+```{r}
+mats<- GetMatrixFromSeuratByGroup(myeloid_cells_no_health_obj, features = myeloid_genes)
+
+mats$exp_mat
+
+mats$percent_mat
+
+mats<- GetMatrixFromSeuratByGroup(myeloid_cells_no_health_obj, features = myeloid_genes)
+
+mats<- GetMatrixFromSeuratByGroup(myeloid_cells_obj, features = myeloid_genes)
+
+
+PD_up_genes<- c("CSF2", "IL6", "CXCL5", "CXCL1", "CCL20", "CCL3", "CXCL3", "CCL4",
+                "IL1B", "IL1A", "CXCL2", "CCL2", "CXCL8")
+
+mats<- GetMatrixFromSeuratByGroup(cluster0_cells_obj_noPR, 
+                                  features = PD_up_genes )
+mats$exp_mat
+```
+
+
+
+```{r}
+source(here("src/cluster_dotplot_complexheatmap.R"))
+
+col_fun<-  circlize::colorRamp2(c(-1, 0, 1.5), c("#440154FF", "#238A8DFF", "#FDE725FF"))
+
+colnames(mats$exp_mat)
+quantile(scale(t(mats$exp_mat)), c(0.1, 0.2, 0.5, 0.9, 0.95))
+
+annotation_df<- colnames(mats$exp_mat) %>%
+  as_tibble() %>%
+  separate(value, into= c("cluster", "group"), sep= "\\|") %>%
+  mutate(treatment_cycle = str_replace(group, ".+(C[14]D1)", "\\1" )) %>%
+  mutate(response = str_replace(group, ".+(CR|PD).+", "\\1")) %>%
+  mutate(treatment_cycle = if_else(treatment_cycle=="Healthy_donors",NA_character_, treatment_cycle)) %>%
+  mutate(response = if_else(response=="Healthy_donors",NA_character_, response)) %>%
+  mutate(response = if_else(response=="New_C1D1",NA_character_, response))
+  
+  
+column_ha<- HeatmapAnnotation(
+    df = as.data.frame(annotation_df),
+    col = list(cluster = setNames(brewer.pal(length(unique(annotation_df$cluster)), "Set3"), unique(annotation_df$cluster)),
+               group = setNames(scales::hue_pal()(length(unique(annotation_df$group))), unique(annotation_df$group)),
+               treatment_cycle = c("C1D1" = "brown", "C4D1" = "purple"),
+               response = c("CR"= "red", "PD" = "blue")
+    ),
+    na_col = "grey"
+)
+draw(column_ha)
+
+column_ha1<- HeatmapAnnotation(
+    df = as.data.frame(annotation_df),
+    col = list(cluster = c("00_Activated_monocytes" = "#00563F"),
+               group = setNames(scales::hue_pal()(length(unique(annotation_df$group))), unique(annotation_df$group)),
+               treatment_cycle = c("C1D1" = "brown", "C4D1" = "purple"),
+               response = c("CR"= "red", "PD" = "blue")
+    ),
+    na_col = "grey"
+)
+
+draw(column_ha1)
+
+
+library(RColorBrewer)
+
+
+colnames(mats$exp_mat)
+
+pdf(here("results/figures/2021-11-19_myeloid_dotplot_by_group_cluster0_cHL.pdf"), width = 6, height = 3.5)
+cluster0_dotplot<- MakeClusterDotPlot(exp_mat = t(scale(t(mats$exp_mat)))[PD_up_genes,], percent_mat = mats$percent_mat[PD_up_genes,] *100,
+                   col_fun = col_fun,
+                   column_labels = colnames(mats$exp_mat) %>% str_replace("(^[0-9]+)_.+", "\\1"),
+                   cluster_columns = FALSE,
+                   cluster_rows = FALSE,
+                   column_title = "cHL",
+                   column_title_side = "bottom",
+                   row_km = NULL,
+                   row_names_font_size = 7, 
+                   column_ha = column_ha1,
+                   legend_dot_size = unit(2, "mm"),
+                   show_row_dend =FALSE,
+                   show_column_dend = FALSE,
+                   column_names_side = "top")
+
+draw(cluster0_dotplot , annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
+dev.off()
+```
+
+
+### cell type percentage by groups
+
+Relate to Main Figure 4e, Figure 11d, 12d, 14c
+
+
+some samples for some small clusters do not have any cells and this will not be computed when group_by. and some clusters disappear in PD, the percentage of those clusters should be 0.
+
+The solution is to get all factor levels by `expand` and use `.drop =FALSE` when `count`
+
+```{r}
+
+number_cells_per_sample<- cd3_minus_annotated@meta.data %>%
+  group_by(tigl_id) %>%
+  summarise(total_cells = n()) %>%
+  ungroup()
+
+
+## total 24 clusters
+clusters<- cd3_minus_annotated@meta.data %>% 
+  pull(clusters_anno_sub) %>%
+  unique() %>%
+  sort()
+
+## total 64 samples
+samples<- cd3_minus_annotated@meta.data %>%
+  select(tigl_id, group) %>%
+  distinct() 
+
+## total should be 64 * 23 =1600
+cluster_levels<- tidyr::expand_grid(samples, clusters_anno = clusters) %>%
+  mutate(cluster_levels = paste(tigl_id, group, clusters_anno, sep = "|")) %>%
+  pull(cluster_levels)
+
+number_cells_per_seurat_cluster<- cd3_minus_annotated@meta.data %>%
+  mutate(clusters_anno = paste(tigl_id, group, clusters_anno_sub, sep = "|")) %>%
+  mutate(clusters_anno= factor(clusters_anno, levels = cluster_levels )) %>%
+  count(clusters_anno,name = "cells_each_cluster",  .drop = FALSE) %>% 
+  tidyr::separate(col = clusters_anno, into =c("tigl_id", "group", "clusters_anno"), sep = "\\|") 
+
+seurat_cells_percentage<- number_cells_per_seurat_cluster %>%
+  left_join(number_cells_per_sample) %>%
+  mutate(percentage = cells_each_cluster/total_cells) %>%
+  mutate(group = factor(group, levels = c("Healthy_donors", "New_C1D1", "refractory_CR_C1D1","refractory_PR_C1D1", "refractory_PD_C1D1","refractory_CR_C4D1","refractory_PR_C4D1", "refractory_PD_C4D1" )))
+
+seurat_cells_percentage$clusters_anno
+
+write_csv(seurat_cells_percentage, here("results/cd3_minus_seurat_annotated_cluster_percentage_subcluster20.csv"))
+
+### only for cluster0
+
+seurat_cells_percentage %>%
+  filter(clusters_anno == "00_Activated_monocytes_macrophages") %>%
+  arrange(group, cells_each_cluster) %>% 
+  mutate(cells_each_cluster = as.numeric(cells_each_cluster)) %>%
+  mutate(tigl_id = factor(tigl_id, levels = tigl_id)) %>%
+  ggplot(aes(x= tigl_id, y = cells_each_cluster)) +
+  geom_col(aes(fill = group)) +
+  geom_hline(yintercept = 200, linetype = 2, color = "red") +
+  theme_classic(base_size = 14) +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
+  ylab("cell number in cluster0 mon/mac") +
+  xlab("")
+ggsave(here("results/figures/cluster0_cellnumber.pdf"), width = 10, height = 6)
+
+seurat_cells_percentage %>%
+  filter(clusters_anno == "00_Activated_monocytes_macrophages") %>%
+  arrange(group, cells_each_cluster) %>% 
+  mutate(tigl_id = factor(tigl_id, levels = tigl_id)) %>%
+  ggplot(aes(x= tigl_id, y = percentage)) +
+  geom_col(aes(fill = group)) +
+  geom_hline(yintercept = 0.2, linetype = 2, color = "red") +
+  theme_classic(base_size = 14) +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
+  xlab("")
+
+ggsave(here("results/figures/cluster0_percentage.pdf"), width = 10, height = 6)
+#############
+
+
+ggplot(seurat_cells_percentage, aes(x = group, y = percentage)) +
+  geom_boxplot(aes(fill = group), outlier.shape = NA) +
+  geom_jitter(width = 0.2, size = 1) +
+  scale_y_continuous(labels = scales::percent) + 
+  facet_wrap(~clusters_anno, scales = "free") +
+  theme_bw(base_size = 14) + 
+  theme(axis.text.x = element_blank(),
+        axis.ticks.x = element_blank(),
+        strip.text = element_text(size = 8)) +
+  xlab("")
+
+ggplot(seurat_cells_percentage, aes(x = group, y = percentage)) +
+  geom_beeswarm(aes(color = group), size = 0.5) +
+  scale_y_continuous(labels = scales::percent) + 
+  facet_wrap(~clusters_anno, scales = "free") +
+  theme_bw(base_size = 14) + 
+  theme(axis.text.x = element_blank(),
+        axis.ticks.x = element_blank(),
+        strip.text = element_text(size = 8)) +
+  xlab("")
+
+ggsave(here("results/figures/cd3_minus_seurat_annotaed_cluster_percentage_boxplot_fix_0_subcluster20_cells.pdf"), width = 16, height = 10)
+```
+
+cuzick trend test https://www.rdocumentation.org/packages/PMCMRplus/versions/1.4.4/topics/cuzickTest
+to se if there is a increasing trend in cell percentage in a cluster for pre-CR--> pre-PR -->pre-PD
+
+```{r}
+library(tidyr)
+#install.packages("PMCMRplus")
+library(PMCMRplus)
+seurat_cells_percentage<- read_csv(here("results/cd3_minus_seurat_annotated_cluster_percentage_subcluster20.csv"))
+
+## pre-treatment
+cells_percentage_nest<- seurat_cells_percentage %>% 
+  filter(group %in% c("refractory_CR_C1D1", "refractory_PR_C1D1", "refractory_PD_C1D1")) %>%
+  mutate(group = factor(group, levels =c("refractory_CR_C1D1", "refractory_PR_C1D1", "refractory_PD_C1D1"))) %>%
+  group_by(clusters_anno) %>%
+  nest()
+
+## post-treatment
+cells_percentage_nest<- seurat_cells_percentage %>% 
+  filter(group %in% c("refractory_CR_C4D1", "refractory_PR_C4D1", "refractory_PD_C4D1")) %>%
+  mutate(group = factor(group, levels =c("refractory_CR_C4D1", "refractory_PR_C4D1", "refractory_PD_C4D1"))) %>%
+  group_by(clusters_anno) %>%
+  nest()
+
+cuzickTest(cells_percentage_nest$data[[1]]$percentage, cells_percentage_nest$data[[1]]$group)
+
+cells_percentage_nest<- cells_percentage_nest %>%
+  mutate(cuzick_test = map_dbl(data, ~ cuzickTest(.x$percentage, .x$group)$p.value))
+
+cells_percentage_nest$cuzick_test
+
+View(cells_percentage_nest %>% select(-data)) 
+### healthy donor vs newly diagnosed
+
+cells_percentage_nest<- seurat_cells_percentage %>% 
+  filter(group %in% c("Healthy_donors", "New_C1D1")) %>%
+  mutate(group = factor(group, levels =c("Healthy_donors", "New_C1D1"))) %>%
+  group_by(clusters_anno) %>%
+  nest()
+
+wilcox.test(percentage~ group, data = cells_percentage_nest$data[[1]]) 
+cells_percentage_nest<- cells_percentage_nest %>%
+  mutate(wilcox_test = map_dbl(data, function(x) wilcox.test(percentage ~group, data=x)$p.value))
+
+  
+seurat_cells_percentage %>% 
+  group_by(group, clusters_anno) %>%
+  summarise(median_percentage = median(percentage)) %>%
+  write_tsv(here("data/cd3_minus_seurat_annotated_cluster_median_percentage_subcluster20.csv"))
+
+
+seurat_cells_percentage %>% 
+  distinct(tigl_id, group, total_cells) %>%
+  arrange(group, total_cells) %>%
+  mutate(tigl_id = factor(tigl_id, levels = .$tigl_id) ) %>%
+  ggplot(aes(x= tigl_id, y = total_cells)) +
+  geom_bar(stat ="identity",aes(fill = group)) +theme_classic(base_size = 14) +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
+  xlab("")
+
+ggsave(here("results/figures/cd3_minus_final_seurat_obj_cells_number.pdf"), width =12, height = 6)
+
+
+seurat_cells_percentage %>%
+  filter(str_detect(clusters_anno,"B_cell")) %>%
+  filter(group == "refractory_PD_C1D1") %>%
+  arrange(clusters_anno) %>% View()
+```
+
+
+
+
+
+
+