a b/RNA-seq/AnalysisPipeline/2.AnnotatedCluster.R
1
#' @description: annotated the cell type
2
3
library(Seurat)
4
library(harmony)
5
library(clustree)
6
library(ggpubr)
7
library(dplyr)
8
library(patchwork)
9
library(ComplexHeatmap)
10
library(circlize)
11
library(vegan)
12
set.seed(101)
13
library(openxlsx)
14
library(future)
15
plan("multiprocess", workers = 10) 
16
options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM
17
setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA")
18
source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R")
19
20
#### Harmony corrected result
21
data.merge <- readRDS("data.merge.harmony.PC40.SCT.feature3000.rds")
22
DefaultAssay(data.merge) <- "RNA"
23
24
pdf("2.Cluster/cluster.pdf")
25
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "seurat_clusters")+NoLegend()
26
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "seurat_clusters")+NoLegend()
27
dev.off()
28
29
#### Plot the ratio of each cell type and sample situation
30
pdf("2.Cluster/cluster.number.ratio.pdf", height = 4, width = 7)
31
ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "seurat_clusters")
32
dev.off()
33
34
expMatrix <- GetAssayData(data.merge, slot = "scale.data")
35
highVariableGenes <- VariableFeatures(data.merge)
36
expMatrix.high <- expMatrix[highVariableGenes,]
37
meanExpCluster <- apply(expMatrix.high, 1, function(x){
38
    mean.value <- tapply(x, data.merge$seurat_clusters, mean)
39
    return(mean.value)
40
})
41
42
corrMatrix <- (1- cor(t(meanExpCluster), method="pearson"))/2
43
library(ape)
44
## dd <- dist(M)
45
hc <- hclust(as.dist(corrMatrix),method="complete")
46
pdf("2.Cluster/clustersimilarity.HVG.pdf")
47
plot.phylo(as.phylo(hc), type = "fan", cex = 0.8,no.margin = FALSE)
48
dev.off()
49
50
#### Differential expression
51
Idents(data.merge) <- data.merge$seurat_clusters
52
cluster.all.markers <- FindAllMarkers(data.merge, only.pos = TRUE, group.by = "seurat_clusters", test.use = "MAST", latent.vars = "orig.ident")
53
cluster.sig.markers <- cluster.all.markers[which(cluster.all.markers$p_val_adj<0.05),]
54
saveRDS(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.rds")
55
write.table(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.txt", quote = F, sep = "\t", row.names = F)
56
cluster.sig.markers.top <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC)
57
write.table(cluster.sig.markers.top, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.top30.txt", col.names = T, row.names = F, sep = "\t", quote = F)
58
top.genes <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
59
pdf("2.Cluster/AnnotateCellType/cluster.topgenes.pdf")
60
DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend()
61
dev.off()
62
63
####Method 1: The expression of the classic marker
64
cell.type.markers <- read.table(file = "2.Cluster/AnnotateCellType/cellMarker.txt", header = T, stringsAsFactors = F, sep = "\t")
65
exp.matrix <- GetAssayData(data.merge, slot = "data")
66
index <- match(cell.type.markers$Gene, rownames(exp.matrix))
67
gene.matrix <- exp.matrix[na.omit(index),]
68
cell.type.markers <- cell.type.markers[which(!is.na(index)),]
69
70
# Calculate the average expression of the cell type of each cluster
71
cluster.score <- apply(gene.matrix, 1, function(x){
72
    a <- tapply(x, data.merge$seurat_clusters, mean)
73
    return(a)
74
})
75
cluster.score.normailzed <- decostand(cluster.score, "range", 2) 
76
cellType.cluster.score <- apply(cluster.score, 1, function(x){
77
    a <- tapply(x, cell.type.markers$CellType, mean)
78
    return(a)
79
})
80
cellType.cluster.score.normailzed <- decostand(cellType.cluster.score, "range", 1)
81
annotation.colors <- Palettes$stallion2[1:length(unique(cell.type.markers$CellType))]
82
names(annotation.colors) <- unique(cell.type.markers$CellType)
83
row.annotations <- rowAnnotation(Type = factor(cell.type.markers$CellType, 
84
                                 levels = unique(cell.type.markers$CellType)),
85
                                 col = list(Type = annotation.colors))
86
pdf("2.Cluster/AnnotateCellType/cluster.signature.expression.pdf")
87
col_fun1 <- colorRamp2(c(0, 3), c("white", "#ff5a36"))
88
col_fun2 <- colorRamp2(c(0, 0.5, 1), c("#1e90ff", "white", "#ff5a36"))
89
90
row_split <- factor(cell.type.markers$CellType, levels = unique(cell.type.markers$CellType))
91
Heatmap(t(cluster.score.normailzed), col = col_fun2, row_split = row_split, width = unit(10, "cm"), height = unit(12, "cm"), cluster_columns = F, cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 6), column_names_gp = gpar(fontsize = 6),
92
        heatmap_legend_param = list(
93
                title = "Expression", at = c(0, 1), 
94
                labels = c("min", "max")))
95
Heatmap(cellType.cluster.score, name = "Expression", col = col_fun1, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = T, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6))
96
Heatmap(cellType.cluster.score.normailzed, col = col_fun2, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6), 
97
        heatmap_legend_param = list(
98
                title = "Expression", at = c(0, 1), 
99
                labels = c("min", "max")))
100
101
a <- data.merge
102
a$seurat_clusters <- factor(a$seurat_clusters, levels = rownames(cluster.score.normailzed))
103
DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + theme(axis.text.x = element_text(size = 6)) + theme(axis.text.y = element_text(size = 6))
104
DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + NoLegend()+ theme(axis.text.x = element_text(size = 8)) + theme(axis.text.y = element_text(size = 8))
105
dev.off()
106
107
#####annotated the cell type
108
#RNA low resolution
109
cluster.label <- data.merge@meta.data$seurat_clusters
110
cluster.label <- gsub("^0$", "NK/NKT cell", cluster.label)
111
cluster.label <- gsub("^1$", "Macrophage", cluster.label)
112
cluster.label <- gsub("^2$", "CD8+ T cell", cluster.label)
113
cluster.label <- gsub("^3$", "CD4+ T cell", cluster.label)
114
cluster.label <- gsub("^4$", "Tumor", cluster.label)
115
cluster.label <- gsub("^5$", "Endothelium (VCAM1-)", cluster.label)
116
cluster.label <- gsub("^6$", "Mesangial cell", cluster.label)
117
cluster.label <- gsub("^7$", "Monocyte", cluster.label) # --- monocyte CD14
118
cluster.label <- gsub("^8$", "Mast cell", cluster.label)
119
cluster.label <- gsub("^9$", "B cell", cluster.label)
120
cluster.label <- gsub("^10$", "B cell", cluster.label)
121
cluster.label <- gsub("^11$", "Monocyte", cluster.label) # --- monocyte CD16
122
cluster.label <- gsub("^12$", "Endothelium (VCAM1+)", cluster.label)
123
cluster.label <- gsub("^13$", "Treg", cluster.label)
124
cluster.label <- gsub("^14$", "Dendritic cell", cluster.label)
125
cluster.label <- gsub("^15$", "Unknown1", cluster.label)
126
cluster.label <- gsub("^16$", "Proliferative CD8+ T cell", cluster.label)
127
cluster.label <- gsub("^17$", "Neutrophil", cluster.label)
128
cluster.label <- gsub("^18$", "Unknown2", cluster.label)
129
data.merge <- AddMetaData(data.merge, cluster.label, col.name = "cellType_low")
130
# Remove cells that cannot be annotated or the cell number less than 100 cells
131
data.merge.pro <- subset(data.merge, subset = seurat_clusters %in% c(0:14, 16,17))
132
data.merge.pro$seurat_clusters <- factor(data.merge.pro$seurat_clusters, levels = c(0:14, 16,17))
133
saveRDS(data.merge.pro, "data.merge.pro.rds")
134
135
pdf("2.Cluster/AnnotateCellType/cellType.pro.pdf")
136
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low")+NoLegend()
137
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low")
138
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low")+NoLegend()
139
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low")
140
dev.off()
141
142
##Plot--- celltype marker plot
143
plot.cellType <- c("CD4+ T cell", "Treg", "CD8+ T cell", "NK/NKT cell", "Proliferative CD8+ T cell", "B cell", "Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell", "Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell", "Tumor")
144
pdf("2.Cluster/AnnotateCellType/cellType.ratio.pdf", height = 4, width = 7)
145
ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "cellType_low", angle = 60)
146
dev.off()
147
148
pdf("2.Cluster/AnnotateCellType/cellType.marker.dotplot.pdf", height = 4)
149
data.merge$cellType_low <- factor(data.merge$cellType_low, levels = plot.cellType)
150
DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5)  + NoLegend() + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 6, angle = 90))
151
DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5)  + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 5, angle = 90))
152
dev.off()
153
154
#### Cell type specific gene
155
Idents(data.merge) <- data.merge$cellType_low
156
idents <- as.character(levels(data.merge))
157
cellType.all.markers <- FindAllMarkers(data.merge, 
158
                                       group.by = "cellType_low", 
159
                                       logfc.threshold = 0, 
160
                                       min.pct = 0.1, 
161
                                       test.use = "MAST", 
162
                                       latent.vars = "orig.ident")
163
saveFormat <- lapply(idents, function(x){
164
  index <- which(cellType.all.markers$cluster == x)
165
  DEGs <- cellType.all.markers[index,]
166
  DEGs.up <- DEGs %>% filter(avg_log2FC>0) %>% arrange(desc(avg_log2FC))
167
  DEGs.down <- DEGs %>% filter(avg_log2FC<0) %>% arrange(avg_log2FC)
168
  DEGs <- rbind(DEGs.up, DEGs.down)
169
  return(DEGs)
170
})
171
write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/celltype.all.DEGs.xlsx", sheetName = idents, rowNames = F)
172
saveRDS(cellType.all.markers, file = "2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
173
174
#require logfc.threshold >= 0.25 & p_val_adj < 0.05
175
cellType.sig.DEGs <- cellType.all.markers %>% filter(avg_log2FC >=0.25 & p_val_adj < 0.05) %>% arrange(desc(avg_log2FC))
176
saveFormat <- lapply(idents, function(x){
177
  index <- which(cellType.sig.DEGs$cluster == x)
178
  DEGs <- cellType.sig.DEGs[index,]
179
  DEGs <- DEGs %>% arrange(desc(avg_log2FC))
180
  return(DEGs)
181
})
182
write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.xlsx", sheetName = idents, rowNames = F)
183
saveRDS(cellType.sig.DEGs, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.rds")
184
top.genes <- cellType.sig.DEGs %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
185
pdf("2.Cluster/AnnotateCellType/cellType.topgenes.pdf")
186
DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend()
187
dev.off()
188
189
Tumor.info <- data.merge@meta.data[which(data.merge@meta.data$cellType_low=="Tumor"),]
190
cell.number <- as.data.frame(table(Tumor.info$orig.ident))
191
pdf("2.Cluster/AnnotateCellType/Tumor.patient.pdf")
192
ggbarplot(cell.number, x="Var1", y="Freq", fill = "Var1", color = "Var1", palette = Palettes$group_pal[1:length(unique(Tumor.info$orig.ident))],
193
          sort.by.groups=FALSE,
194
          label = T, xlab = "", ylab = "Cell Number") + theme(legend.position="none") 
195
dev.off()
196
197
cellType.ratio <- as.data.frame(table(data.merge$cellType_low))
198
cellType.ratio$Type <- rep("Lymphoid cells", nrow(cellType.ratio))
199
idx <- which(cellType.ratio$Var1 %in% c("Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell"))
200
cellType.ratio$Type[idx] <- "Myeloid cells"
201
idx <- which(cellType.ratio$Var1 %in% c("Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell"))
202
cellType.ratio$Type[idx] <- "Other cells"
203
idx <- which(cellType.ratio$Var1 == "Tumor")
204
cellType.ratio$Type[idx] <- "Tumor cells"
205
group.ratio <- tapply(cellType.ratio$Freq, cellType.ratio$Type, sum)
206
group.ratio <- data.frame(Type = names(group.ratio), Ratio = group.ratio)
207
labs <- paste0(group.ratio$Type, " (", round(group.ratio$Ratio/sum(group.ratio$Ratio), 4)*100, "%)")
208
pdf("2.Cluster/AnnotateCellType/lineage.ratio.pdf")
209
p <- ggpie(group.ratio, "Ratio", label = labs,
210
      fill = "Type", color = "white", lab.pos = "in",
211
      palette = "npgs")
212
print(p)
213
dev.off()
214
215
#### functional enrichment
216
## GSEA software --- recalculate the result
217
a <- data.frame(geneName = Tumor.DEGs$gene, Rank = Tumor.DEGs$avg_log2FC)
218
write.csv(a, file = "2.Cluster/AnnotateCellType/GSEA/Tumor.DEGs.csv")
219
220
#### The proportion of various cell types in each patient
221
library(plotly)
222
pdf("2.Cluster/AnnotateCellType/cellType.patient.ratio.pdf")
223
patients <- unique(data.merge$orig.ident)
224
res <- sapply(patients, function(x){
225
  a <- subset(data.merge, subset = orig.ident==x)
226
  cellType.ratio <- as.data.frame(table(as.character(a$cellType_low)))
227
  group.ratio <- cellType.ratio$Freq/sum(cellType.ratio$Freq)
228
  group.ratio <- data.frame(Type = cellType.ratio$Var1, Ratio = group.ratio)
229
  labs <- paste0(round(group.ratio$Ratio, 4)*100, "%")
230
  p <- ggpie(group.ratio, "Ratio", label = labs,
231
        fill = "Type", color = "white", lab.pos = "out",
232
        palette = "npgs")
233
  print(p)
234
})
235
dev.off()