|
a |
|
b/RNA-seq/AnalysisPipeline/2.AnnotatedCluster.R |
|
|
1 |
#' @description: annotated the cell type |
|
|
2 |
|
|
|
3 |
library(Seurat) |
|
|
4 |
library(harmony) |
|
|
5 |
library(clustree) |
|
|
6 |
library(ggpubr) |
|
|
7 |
library(dplyr) |
|
|
8 |
library(patchwork) |
|
|
9 |
library(ComplexHeatmap) |
|
|
10 |
library(circlize) |
|
|
11 |
library(vegan) |
|
|
12 |
set.seed(101) |
|
|
13 |
library(openxlsx) |
|
|
14 |
library(future) |
|
|
15 |
plan("multiprocess", workers = 10) |
|
|
16 |
options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM |
|
|
17 |
setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA") |
|
|
18 |
source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R") |
|
|
19 |
|
|
|
20 |
#### Harmony corrected result |
|
|
21 |
data.merge <- readRDS("data.merge.harmony.PC40.SCT.feature3000.rds") |
|
|
22 |
DefaultAssay(data.merge) <- "RNA" |
|
|
23 |
|
|
|
24 |
pdf("2.Cluster/cluster.pdf") |
|
|
25 |
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "seurat_clusters")+NoLegend() |
|
|
26 |
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "seurat_clusters")+NoLegend() |
|
|
27 |
dev.off() |
|
|
28 |
|
|
|
29 |
#### Plot the ratio of each cell type and sample situation |
|
|
30 |
pdf("2.Cluster/cluster.number.ratio.pdf", height = 4, width = 7) |
|
|
31 |
ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "seurat_clusters") |
|
|
32 |
dev.off() |
|
|
33 |
|
|
|
34 |
expMatrix <- GetAssayData(data.merge, slot = "scale.data") |
|
|
35 |
highVariableGenes <- VariableFeatures(data.merge) |
|
|
36 |
expMatrix.high <- expMatrix[highVariableGenes,] |
|
|
37 |
meanExpCluster <- apply(expMatrix.high, 1, function(x){ |
|
|
38 |
mean.value <- tapply(x, data.merge$seurat_clusters, mean) |
|
|
39 |
return(mean.value) |
|
|
40 |
}) |
|
|
41 |
|
|
|
42 |
corrMatrix <- (1- cor(t(meanExpCluster), method="pearson"))/2 |
|
|
43 |
library(ape) |
|
|
44 |
## dd <- dist(M) |
|
|
45 |
hc <- hclust(as.dist(corrMatrix),method="complete") |
|
|
46 |
pdf("2.Cluster/clustersimilarity.HVG.pdf") |
|
|
47 |
plot.phylo(as.phylo(hc), type = "fan", cex = 0.8,no.margin = FALSE) |
|
|
48 |
dev.off() |
|
|
49 |
|
|
|
50 |
#### Differential expression |
|
|
51 |
Idents(data.merge) <- data.merge$seurat_clusters |
|
|
52 |
cluster.all.markers <- FindAllMarkers(data.merge, only.pos = TRUE, group.by = "seurat_clusters", test.use = "MAST", latent.vars = "orig.ident") |
|
|
53 |
cluster.sig.markers <- cluster.all.markers[which(cluster.all.markers$p_val_adj<0.05),] |
|
|
54 |
saveRDS(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.rds") |
|
|
55 |
write.table(cluster.sig.markers, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.txt", quote = F, sep = "\t", row.names = F) |
|
|
56 |
cluster.sig.markers.top <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_log2FC) |
|
|
57 |
write.table(cluster.sig.markers.top, file = "2.Cluster/AnnotateCellType/cluster.sig.markers.top30.txt", col.names = T, row.names = F, sep = "\t", quote = F) |
|
|
58 |
top.genes <- cluster.sig.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) |
|
|
59 |
pdf("2.Cluster/AnnotateCellType/cluster.topgenes.pdf") |
|
|
60 |
DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend() |
|
|
61 |
dev.off() |
|
|
62 |
|
|
|
63 |
####Method 1: The expression of the classic marker |
|
|
64 |
cell.type.markers <- read.table(file = "2.Cluster/AnnotateCellType/cellMarker.txt", header = T, stringsAsFactors = F, sep = "\t") |
|
|
65 |
exp.matrix <- GetAssayData(data.merge, slot = "data") |
|
|
66 |
index <- match(cell.type.markers$Gene, rownames(exp.matrix)) |
|
|
67 |
gene.matrix <- exp.matrix[na.omit(index),] |
|
|
68 |
cell.type.markers <- cell.type.markers[which(!is.na(index)),] |
|
|
69 |
|
|
|
70 |
# Calculate the average expression of the cell type of each cluster |
|
|
71 |
cluster.score <- apply(gene.matrix, 1, function(x){ |
|
|
72 |
a <- tapply(x, data.merge$seurat_clusters, mean) |
|
|
73 |
return(a) |
|
|
74 |
}) |
|
|
75 |
cluster.score.normailzed <- decostand(cluster.score, "range", 2) |
|
|
76 |
cellType.cluster.score <- apply(cluster.score, 1, function(x){ |
|
|
77 |
a <- tapply(x, cell.type.markers$CellType, mean) |
|
|
78 |
return(a) |
|
|
79 |
}) |
|
|
80 |
cellType.cluster.score.normailzed <- decostand(cellType.cluster.score, "range", 1) |
|
|
81 |
annotation.colors <- Palettes$stallion2[1:length(unique(cell.type.markers$CellType))] |
|
|
82 |
names(annotation.colors) <- unique(cell.type.markers$CellType) |
|
|
83 |
row.annotations <- rowAnnotation(Type = factor(cell.type.markers$CellType, |
|
|
84 |
levels = unique(cell.type.markers$CellType)), |
|
|
85 |
col = list(Type = annotation.colors)) |
|
|
86 |
pdf("2.Cluster/AnnotateCellType/cluster.signature.expression.pdf") |
|
|
87 |
col_fun1 <- colorRamp2(c(0, 3), c("white", "#ff5a36")) |
|
|
88 |
col_fun2 <- colorRamp2(c(0, 0.5, 1), c("#1e90ff", "white", "#ff5a36")) |
|
|
89 |
|
|
|
90 |
row_split <- factor(cell.type.markers$CellType, levels = unique(cell.type.markers$CellType)) |
|
|
91 |
Heatmap(t(cluster.score.normailzed), col = col_fun2, row_split = row_split, width = unit(10, "cm"), height = unit(12, "cm"), cluster_columns = F, cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 6), column_names_gp = gpar(fontsize = 6), |
|
|
92 |
heatmap_legend_param = list( |
|
|
93 |
title = "Expression", at = c(0, 1), |
|
|
94 |
labels = c("min", "max"))) |
|
|
95 |
Heatmap(cellType.cluster.score, name = "Expression", col = col_fun1, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = T, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6)) |
|
|
96 |
Heatmap(cellType.cluster.score.normailzed, col = col_fun2, width = unit(8, "cm"), height = unit(8, "cm"), cluster_columns = T , cluster_rows = F, show_column_names = T, show_row_names = T, row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 6), |
|
|
97 |
heatmap_legend_param = list( |
|
|
98 |
title = "Expression", at = c(0, 1), |
|
|
99 |
labels = c("min", "max"))) |
|
|
100 |
|
|
|
101 |
a <- data.merge |
|
|
102 |
a$seurat_clusters <- factor(a$seurat_clusters, levels = rownames(cluster.score.normailzed)) |
|
|
103 |
DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + theme(axis.text.x = element_text(size = 6)) + theme(axis.text.y = element_text(size = 6)) |
|
|
104 |
DotPlot(a, cols = c("#1e90ff", "#ff5a36"), features = cell.type.markers$Gene, group.by = "seurat_clusters", dot.scale = 4) + RotatedAxis() + NoLegend()+ theme(axis.text.x = element_text(size = 8)) + theme(axis.text.y = element_text(size = 8)) |
|
|
105 |
dev.off() |
|
|
106 |
|
|
|
107 |
#####annotated the cell type |
|
|
108 |
#RNA low resolution |
|
|
109 |
cluster.label <- data.merge@meta.data$seurat_clusters |
|
|
110 |
cluster.label <- gsub("^0$", "NK/NKT cell", cluster.label) |
|
|
111 |
cluster.label <- gsub("^1$", "Macrophage", cluster.label) |
|
|
112 |
cluster.label <- gsub("^2$", "CD8+ T cell", cluster.label) |
|
|
113 |
cluster.label <- gsub("^3$", "CD4+ T cell", cluster.label) |
|
|
114 |
cluster.label <- gsub("^4$", "Tumor", cluster.label) |
|
|
115 |
cluster.label <- gsub("^5$", "Endothelium (VCAM1-)", cluster.label) |
|
|
116 |
cluster.label <- gsub("^6$", "Mesangial cell", cluster.label) |
|
|
117 |
cluster.label <- gsub("^7$", "Monocyte", cluster.label) # --- monocyte CD14 |
|
|
118 |
cluster.label <- gsub("^8$", "Mast cell", cluster.label) |
|
|
119 |
cluster.label <- gsub("^9$", "B cell", cluster.label) |
|
|
120 |
cluster.label <- gsub("^10$", "B cell", cluster.label) |
|
|
121 |
cluster.label <- gsub("^11$", "Monocyte", cluster.label) # --- monocyte CD16 |
|
|
122 |
cluster.label <- gsub("^12$", "Endothelium (VCAM1+)", cluster.label) |
|
|
123 |
cluster.label <- gsub("^13$", "Treg", cluster.label) |
|
|
124 |
cluster.label <- gsub("^14$", "Dendritic cell", cluster.label) |
|
|
125 |
cluster.label <- gsub("^15$", "Unknown1", cluster.label) |
|
|
126 |
cluster.label <- gsub("^16$", "Proliferative CD8+ T cell", cluster.label) |
|
|
127 |
cluster.label <- gsub("^17$", "Neutrophil", cluster.label) |
|
|
128 |
cluster.label <- gsub("^18$", "Unknown2", cluster.label) |
|
|
129 |
data.merge <- AddMetaData(data.merge, cluster.label, col.name = "cellType_low") |
|
|
130 |
# Remove cells that cannot be annotated or the cell number less than 100 cells |
|
|
131 |
data.merge.pro <- subset(data.merge, subset = seurat_clusters %in% c(0:14, 16,17)) |
|
|
132 |
data.merge.pro$seurat_clusters <- factor(data.merge.pro$seurat_clusters, levels = c(0:14, 16,17)) |
|
|
133 |
saveRDS(data.merge.pro, "data.merge.pro.rds") |
|
|
134 |
|
|
|
135 |
pdf("2.Cluster/AnnotateCellType/cellType.pro.pdf") |
|
|
136 |
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low")+NoLegend() |
|
|
137 |
DimPlot(object = data.merge, reduction = 'tsne',label = TRUE, group.by = "cellType_low") |
|
|
138 |
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low")+NoLegend() |
|
|
139 |
DimPlot(object = data.merge, reduction = 'umap',label = TRUE, group.by = "cellType_low") |
|
|
140 |
dev.off() |
|
|
141 |
|
|
|
142 |
##Plot--- celltype marker plot |
|
|
143 |
plot.cellType <- c("CD4+ T cell", "Treg", "CD8+ T cell", "NK/NKT cell", "Proliferative CD8+ T cell", "B cell", "Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell", "Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell", "Tumor") |
|
|
144 |
pdf("2.Cluster/AnnotateCellType/cellType.ratio.pdf", height = 4, width = 7) |
|
|
145 |
ratio.plot(seurat.object = data.merge, id.vars1 = "orig.ident", id.vars2 = "cellType_low", angle = 60) |
|
|
146 |
dev.off() |
|
|
147 |
|
|
|
148 |
pdf("2.Cluster/AnnotateCellType/cellType.marker.dotplot.pdf", height = 4) |
|
|
149 |
data.merge$cellType_low <- factor(data.merge$cellType_low, levels = plot.cellType) |
|
|
150 |
DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5) + NoLegend() + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 6, angle = 90)) |
|
|
151 |
DotPlot(data.merge, cols = c("#1e90ff", "#F15F30"), features = cell.type.markers$Gene, group.by = "cellType_low", dot.scale = 3.5) + theme(axis.text.y = element_text(size = 4)) + labs(x= "", y = "") + theme(axis.text.x = element_text(size = 5, angle = 90)) |
|
|
152 |
dev.off() |
|
|
153 |
|
|
|
154 |
#### Cell type specific gene |
|
|
155 |
Idents(data.merge) <- data.merge$cellType_low |
|
|
156 |
idents <- as.character(levels(data.merge)) |
|
|
157 |
cellType.all.markers <- FindAllMarkers(data.merge, |
|
|
158 |
group.by = "cellType_low", |
|
|
159 |
logfc.threshold = 0, |
|
|
160 |
min.pct = 0.1, |
|
|
161 |
test.use = "MAST", |
|
|
162 |
latent.vars = "orig.ident") |
|
|
163 |
saveFormat <- lapply(idents, function(x){ |
|
|
164 |
index <- which(cellType.all.markers$cluster == x) |
|
|
165 |
DEGs <- cellType.all.markers[index,] |
|
|
166 |
DEGs.up <- DEGs %>% filter(avg_log2FC>0) %>% arrange(desc(avg_log2FC)) |
|
|
167 |
DEGs.down <- DEGs %>% filter(avg_log2FC<0) %>% arrange(avg_log2FC) |
|
|
168 |
DEGs <- rbind(DEGs.up, DEGs.down) |
|
|
169 |
return(DEGs) |
|
|
170 |
}) |
|
|
171 |
write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/celltype.all.DEGs.xlsx", sheetName = idents, rowNames = F) |
|
|
172 |
saveRDS(cellType.all.markers, file = "2.Cluster/AnnotateCellType/cellType.all.DEGs.rds") |
|
|
173 |
|
|
|
174 |
#require logfc.threshold >= 0.25 & p_val_adj < 0.05 |
|
|
175 |
cellType.sig.DEGs <- cellType.all.markers %>% filter(avg_log2FC >=0.25 & p_val_adj < 0.05) %>% arrange(desc(avg_log2FC)) |
|
|
176 |
saveFormat <- lapply(idents, function(x){ |
|
|
177 |
index <- which(cellType.sig.DEGs$cluster == x) |
|
|
178 |
DEGs <- cellType.sig.DEGs[index,] |
|
|
179 |
DEGs <- DEGs %>% arrange(desc(avg_log2FC)) |
|
|
180 |
return(DEGs) |
|
|
181 |
}) |
|
|
182 |
write.xlsx(saveFormat, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.xlsx", sheetName = idents, rowNames = F) |
|
|
183 |
saveRDS(cellType.sig.DEGs, file = "2.Cluster/AnnotateCellType/cellType.sig.pos.DEGs.rds") |
|
|
184 |
top.genes <- cellType.sig.DEGs %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) |
|
|
185 |
pdf("2.Cluster/AnnotateCellType/cellType.topgenes.pdf") |
|
|
186 |
DoHeatmap(data.merge, features = unique(top.genes$gene), size = 2) + NoLegend() |
|
|
187 |
dev.off() |
|
|
188 |
|
|
|
189 |
Tumor.info <- data.merge@meta.data[which(data.merge@meta.data$cellType_low=="Tumor"),] |
|
|
190 |
cell.number <- as.data.frame(table(Tumor.info$orig.ident)) |
|
|
191 |
pdf("2.Cluster/AnnotateCellType/Tumor.patient.pdf") |
|
|
192 |
ggbarplot(cell.number, x="Var1", y="Freq", fill = "Var1", color = "Var1", palette = Palettes$group_pal[1:length(unique(Tumor.info$orig.ident))], |
|
|
193 |
sort.by.groups=FALSE, |
|
|
194 |
label = T, xlab = "", ylab = "Cell Number") + theme(legend.position="none") |
|
|
195 |
dev.off() |
|
|
196 |
|
|
|
197 |
cellType.ratio <- as.data.frame(table(data.merge$cellType_low)) |
|
|
198 |
cellType.ratio$Type <- rep("Lymphoid cells", nrow(cellType.ratio)) |
|
|
199 |
idx <- which(cellType.ratio$Var1 %in% c("Macrophage", "Monocyte", "Dendritic cell", "Neutrophil", "Mast cell")) |
|
|
200 |
cellType.ratio$Type[idx] <- "Myeloid cells" |
|
|
201 |
idx <- which(cellType.ratio$Var1 %in% c("Endothelium (VCAM1+)", "Endothelium (VCAM1-)", "Mesangial cell")) |
|
|
202 |
cellType.ratio$Type[idx] <- "Other cells" |
|
|
203 |
idx <- which(cellType.ratio$Var1 == "Tumor") |
|
|
204 |
cellType.ratio$Type[idx] <- "Tumor cells" |
|
|
205 |
group.ratio <- tapply(cellType.ratio$Freq, cellType.ratio$Type, sum) |
|
|
206 |
group.ratio <- data.frame(Type = names(group.ratio), Ratio = group.ratio) |
|
|
207 |
labs <- paste0(group.ratio$Type, " (", round(group.ratio$Ratio/sum(group.ratio$Ratio), 4)*100, "%)") |
|
|
208 |
pdf("2.Cluster/AnnotateCellType/lineage.ratio.pdf") |
|
|
209 |
p <- ggpie(group.ratio, "Ratio", label = labs, |
|
|
210 |
fill = "Type", color = "white", lab.pos = "in", |
|
|
211 |
palette = "npgs") |
|
|
212 |
print(p) |
|
|
213 |
dev.off() |
|
|
214 |
|
|
|
215 |
#### functional enrichment |
|
|
216 |
## GSEA software --- recalculate the result |
|
|
217 |
a <- data.frame(geneName = Tumor.DEGs$gene, Rank = Tumor.DEGs$avg_log2FC) |
|
|
218 |
write.csv(a, file = "2.Cluster/AnnotateCellType/GSEA/Tumor.DEGs.csv") |
|
|
219 |
|
|
|
220 |
#### The proportion of various cell types in each patient |
|
|
221 |
library(plotly) |
|
|
222 |
pdf("2.Cluster/AnnotateCellType/cellType.patient.ratio.pdf") |
|
|
223 |
patients <- unique(data.merge$orig.ident) |
|
|
224 |
res <- sapply(patients, function(x){ |
|
|
225 |
a <- subset(data.merge, subset = orig.ident==x) |
|
|
226 |
cellType.ratio <- as.data.frame(table(as.character(a$cellType_low))) |
|
|
227 |
group.ratio <- cellType.ratio$Freq/sum(cellType.ratio$Freq) |
|
|
228 |
group.ratio <- data.frame(Type = cellType.ratio$Var1, Ratio = group.ratio) |
|
|
229 |
labs <- paste0(round(group.ratio$Ratio, 4)*100, "%") |
|
|
230 |
p <- ggpie(group.ratio, "Ratio", label = labs, |
|
|
231 |
fill = "Type", color = "white", lab.pos = "out", |
|
|
232 |
palette = "npgs") |
|
|
233 |
print(p) |
|
|
234 |
}) |
|
|
235 |
dev.off() |