Switch to side-by-side view

--- a
+++ b/RNA-seq/AnalysisPipeline/2.AnnotatedCluster.R
@@ -0,0 +1,235 @@
+#' @description: annotated the cell type
+
+library(Seurat)
+library(harmony)
+library(clustree)
+library(ggpubr)
+library(dplyr)
+library(patchwork)
+library(ComplexHeatmap)
+library(circlize)
+library(vegan)
+set.seed(101)
+library(openxlsx)
+library(future)
+plan("multiprocess", workers = 10) 
+options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM
+setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA")
+source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R")
+
+#### Harmony corrected result
+data.merge <- readRDS("data.merge.harmony.PC40.SCT.feature3000.rds")
+DefaultAssay(data.merge) <- "RNA"
+
+pdf("2.Cluster/cluster.pdf")
+DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "seurat_clusters")+NoLegend()
+DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "seurat_clusters")+NoLegend()
+dev.off()
+
+#### Plot the ratio of each cell type and sample situation
+pdf("2.Cluster/cluster.number.ratio.pdf", height = 4, width = 7)
+ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "seurat_clusters")
+dev.off()
+
+expMatrix <- GetAssayData(data.merge, slot = "scale.data")
+highVariableGenes <- VariableFeatures(data.merge)
+expMatrix.high <- expMatrix[highVariableGenes,]
+meanExpCluster <- apply(expMatrix.high, 1, function(x){
+    mean.value <- tapply(x, data.merge$seurat_clusters, mean)
+    return(mean.value)
+})
+
+corrMatrix <- (1- cor(t(meanExpCluster), method="pearson"))/2
+library(ape)
+## dd <- dist(M)
+hc <- hclust(as.dist(corrMatrix),method="complete")
+pdf("2.Cluster/clustersimilarity.HVG.pdf")
+plot.phylo(as.phylo(hc), type = "fan", cex = 0.8,no.margin = FALSE)
+dev.off()
+
+#### Differential expression
+Idents(data.merge) <- data.merge$seurat_clusters
+cluster.all.markers <- FindAllMarkers(data.merge, only.pos = TRUE, group.by = "seurat_clusters", test.use = "MAST", latent.vars = "orig.ident")
+cluster.sig.markers <- cluster.all.markers[which(cluster.all.markers$p_val_adj<0.05),]
+saveRDS(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.rds")
+write.table(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.txt", quote = F, sep = "\t", row.names = F)
+cluster.sig.markers.top <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
+write.table(cluster.sig.markers.top, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.top30.txt", col.names = T, row.names = F, sep = "\t", quote = F)
+top.genes <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
+pdf("2.Cluster/AnnotateCellType/cluster.topgenes.pdf")
+DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend()
+dev.off()
+
+####Method 1: The expression of the classic marker
+cell.type.markers <- read.table(file = "2.Cluster/AnnotateCellType/cellMarker.txt", header = T, stringsAsFactors = F, sep = "\t")
+exp.matrix <- GetAssayData(data.merge, slot = "data")
+index <- match(cell.type.markers$Gene, rownames(exp.matrix))
+gene.matrix <- exp.matrix[na.omit(index),]
+cell.type.markers <- cell.type.markers[which(!is.na(index)),]
+
+# Calculate the average expression of the cell type of each cluster
+cluster.score <- apply(gene.matrix, 1, function(x){
+    a <- tapply(x, data.merge$seurat_clusters, mean)
+    return(a)
+})
+cluster.score.normailzed <- decostand(cluster.score, "range", 2) 
+cellType.cluster.score <- apply(cluster.score, 1, function(x){
+    a <- tapply(x, cell.type.markers$CellType, mean)
+    return(a)
+})
+cellType.cluster.score.normailzed <- decostand(cellType.cluster.score, "range", 1)
+annotation.colors <- Palettes$stallion2[1:length(unique(cell.type.markers$CellType))]
+names(annotation.colors) <- unique(cell.type.markers$CellType)
+row.annotations <- rowAnnotation(Type = factor(cell.type.markers$CellType, 
+                                 levels = unique(cell.type.markers$CellType)),
+                                 col = list(Type = annotation.colors))
+pdf("2.Cluster/AnnotateCellType/cluster.signature.expression.pdf")
+col_fun1 <- colorRamp2(c(0, 3), c("white", "#ff5a36"))
+col_fun2 <- colorRamp2(c(0, 0.5, 1), c("#1e90ff", "white", "#ff5a36"))
+
+row_split <- factor(cell.type.markers$CellType, levels = unique(cell.type.markers$CellType))
+Heatmap(t(cluster.score.normailzed), col = col_fun2, row_split = row_split, width = unit(10, "cm"), height = unit(12, "cm"), cluster_columns = F, cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 6), column_names_gp = gpar(fontsize = 6),
+        heatmap_legend_param = list(
+                title = "Expression", at = c(0, 1), 
+                labels = c("min", "max")))
+Heatmap(cellType.cluster.score, name = "Expression", col = col_fun1, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = T, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6))
+Heatmap(cellType.cluster.score.normailzed, col = col_fun2, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6), 
+        heatmap_legend_param = list(
+                title = "Expression", at = c(0, 1), 
+                labels = c("min", "max")))
+
+a <- data.merge
+a$seurat_clusters <- factor(a$seurat_clusters, levels = rownames(cluster.score.normailzed))
+DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + theme(axis.text.x = element_text(size = 6)) + theme(axis.text.y = element_text(size = 6))
+DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + NoLegend()+ theme(axis.text.x = element_text(size = 8)) + theme(axis.text.y = element_text(size = 8))
+dev.off()
+
+#####annotated the cell type
+#RNA low resolution
+cluster.label <- data.merge@meta.data$seurat_clusters
+cluster.label <- gsub("^0$", "NK/NKT cell", cluster.label)
+cluster.label <- gsub("^1$", "Macrophage", cluster.label)
+cluster.label <- gsub("^2$", "CD8+ T cell", cluster.label)
+cluster.label <- gsub("^3$", "CD4+ T cell", cluster.label)
+cluster.label <- gsub("^4$", "Tumor", cluster.label)
+cluster.label <- gsub("^5$", "Endothelium (VCAM1-)", cluster.label)
+cluster.label <- gsub("^6$", "Mesangial cell", cluster.label)
+cluster.label <- gsub("^7$", "Monocyte", cluster.label) # --- monocyte CD14
+cluster.label <- gsub("^8$", "Mast cell", cluster.label)
+cluster.label <- gsub("^9$", "B cell", cluster.label)
+cluster.label <- gsub("^10$", "B cell", cluster.label)
+cluster.label <- gsub("^11$", "Monocyte", cluster.label) # --- monocyte CD16
+cluster.label <- gsub("^12$", "Endothelium (VCAM1+)", cluster.label)
+cluster.label <- gsub("^13$", "Treg", cluster.label)
+cluster.label <- gsub("^14$", "Dendritic cell", cluster.label)
+cluster.label <- gsub("^15$", "Unknown1", cluster.label)
+cluster.label <- gsub("^16$", "Proliferative CD8+ T cell", cluster.label)
+cluster.label <- gsub("^17$", "Neutrophil", cluster.label)
+cluster.label <- gsub("^18$", "Unknown2", cluster.label)
+data.merge <- AddMetaData(data.merge, cluster.label, col.name = "cellType_low")
+# Remove cells that cannot be annotated or the cell number less than 100 cells
+data.merge.pro <- subset(data.merge, subset = seurat_clusters %in% c(0:14, 16,17))
+data.merge.pro$seurat_clusters <- factor(data.merge.pro$seurat_clusters, levels = c(0:14, 16,17))
+saveRDS(data.merge.pro, "data.merge.pro.rds")
+
+pdf("2.Cluster/AnnotateCellType/cellType.pro.pdf")
+DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low")+NoLegend()
+DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low")
+DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low")+NoLegend()
+DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low")
+dev.off()
+
+##Plot--- celltype marker plot
+plot.cellType <- c("CD4+ T cell", "Treg", "CD8+ T cell", "NK/NKT cell", "Proliferative CD8+ T cell", "B cell", "Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell", "Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell", "Tumor")
+pdf("2.Cluster/AnnotateCellType/cellType.ratio.pdf", height = 4, width = 7)
+ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "cellType_low", angle = 60)
+dev.off()
+
+pdf("2.Cluster/AnnotateCellType/cellType.marker.dotplot.pdf", height = 4)
+data.merge$cellType_low <- factor(data.merge$cellType_low, levels = plot.cellType)
+DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5)  + NoLegend() + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 6, angle = 90))
+DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5)  + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 5, angle = 90))
+dev.off()
+
+#### Cell type specific gene
+Idents(data.merge) <- data.merge$cellType_low
+idents <- as.character(levels(data.merge))
+cellType.all.markers <- FindAllMarkers(data.merge, 
+                                       group.by = "cellType_low", 
+                                       logfc.threshold = 0, 
+                                       min.pct = 0.1, 
+                                       test.use = "MAST", 
+                                       latent.vars = "orig.ident")
+saveFormat <- lapply(idents, function(x){
+  index <- which(cellType.all.markers$cluster == x)
+  DEGs <- cellType.all.markers[index,]
+  DEGs.up <- DEGs %>% filter(avg_log2FC>0) %>% arrange(desc(avg_log2FC))
+  DEGs.down <- DEGs %>% filter(avg_log2FC<0) %>% arrange(avg_log2FC)
+  DEGs <- rbind(DEGs.up, DEGs.down)
+  return(DEGs)
+})
+write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/celltype.all.DEGs.xlsx", sheetName = idents, rowNames = F)
+saveRDS(cellType.all.markers, file = "2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
+
+#require logfc.threshold >= 0.25 & p_val_adj < 0.05
+cellType.sig.DEGs <- cellType.all.markers %>% filter(avg_log2FC >=0.25 & p_val_adj < 0.05) %>% arrange(desc(avg_log2FC))
+saveFormat <- lapply(idents, function(x){
+  index <- which(cellType.sig.DEGs$cluster == x)
+  DEGs <- cellType.sig.DEGs[index,]
+  DEGs <- DEGs %>% arrange(desc(avg_log2FC))
+  return(DEGs)
+})
+write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.xlsx", sheetName = idents, rowNames = F)
+saveRDS(cellType.sig.DEGs, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.rds")
+top.genes <- cellType.sig.DEGs %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
+pdf("2.Cluster/AnnotateCellType/cellType.topgenes.pdf")
+DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend()
+dev.off()
+
+Tumor.info <- data.merge@meta.data[which(data.merge@meta.data$cellType_low=="Tumor"),]
+cell.number <- as.data.frame(table(Tumor.info$orig.ident))
+pdf("2.Cluster/AnnotateCellType/Tumor.patient.pdf")
+ggbarplot(cell.number, x="Var1", y="Freq", fill = "Var1", color = "Var1", palette = Palettes$group_pal[1:length(unique(Tumor.info$orig.ident))],
+          sort.by.groups=FALSE,
+          label = T, xlab = "", ylab = "Cell Number") + theme(legend.position="none") 
+dev.off()
+
+cellType.ratio <- as.data.frame(table(data.merge$cellType_low))
+cellType.ratio$Type <- rep("Lymphoid cells", nrow(cellType.ratio))
+idx <- which(cellType.ratio$Var1 %in% c("Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell"))
+cellType.ratio$Type[idx] <- "Myeloid cells"
+idx <- which(cellType.ratio$Var1 %in% c("Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell"))
+cellType.ratio$Type[idx] <- "Other cells"
+idx <- which(cellType.ratio$Var1 == "Tumor")
+cellType.ratio$Type[idx] <- "Tumor cells"
+group.ratio <- tapply(cellType.ratio$Freq, cellType.ratio$Type, sum)
+group.ratio <- data.frame(Type = names(group.ratio), Ratio = group.ratio)
+labs <- paste0(group.ratio$Type, " (", round(group.ratio$Ratio/sum(group.ratio$Ratio), 4)*100, "%)")
+pdf("2.Cluster/AnnotateCellType/lineage.ratio.pdf")
+p <- ggpie(group.ratio, "Ratio", label = labs,
+      fill = "Type", color = "white", lab.pos = "in",
+      palette = "npgs")
+print(p)
+dev.off()
+
+#### functional enrichment
+## GSEA software --- recalculate the result
+a <- data.frame(geneName = Tumor.DEGs$gene, Rank = Tumor.DEGs$avg_log2FC)
+write.csv(a, file = "2.Cluster/AnnotateCellType/GSEA/Tumor.DEGs.csv")
+
+#### The proportion of various cell types in each patient
+library(plotly)
+pdf("2.Cluster/AnnotateCellType/cellType.patient.ratio.pdf")
+patients <- unique(data.merge$orig.ident)
+res <- sapply(patients, function(x){
+  a <- subset(data.merge, subset = orig.ident==x)
+  cellType.ratio <- as.data.frame(table(as.character(a$cellType_low)))
+  group.ratio <- cellType.ratio$Freq/sum(cellType.ratio$Freq)
+  group.ratio <- data.frame(Type = cellType.ratio$Var1, Ratio = group.ratio)
+  labs <- paste0(round(group.ratio$Ratio, 4)*100, "%")
+  p <- ggpie(group.ratio, "Ratio", label = labs,
+        fill = "Type", color = "white", lab.pos = "out",
+        palette = "npgs")
+  print(p)
+})
+dev.off()