|
a |
|
b/RNA-seq/AnalysisPipeline/Endothelial.R |
|
|
1 |
#' @description: Endothelium cells |
|
|
2 |
|
|
|
3 |
library(Seurat) |
|
|
4 |
library(harmony) |
|
|
5 |
library(clustree) |
|
|
6 |
library(ggpubr) |
|
|
7 |
library(dplyr) |
|
|
8 |
library(patchwork) |
|
|
9 |
library(ComplexHeatmap) |
|
|
10 |
library(circlize) |
|
|
11 |
library(vegan) |
|
|
12 |
set.seed(101) |
|
|
13 |
library(openxlsx) |
|
|
14 |
library(future) |
|
|
15 |
plan("multiprocess", workers = 10) |
|
|
16 |
options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM |
|
|
17 |
setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA") |
|
|
18 |
source(file = "/home/longzhilin/Analysis_Code/Visualization/colorPalettes.R") |
|
|
19 |
source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R") |
|
|
20 |
|
|
|
21 |
scRNA.data <- readRDS("data.merge.pro.rds") |
|
|
22 |
|
|
|
23 |
###########################1.differential gene################################# |
|
|
24 |
Idents(scRNA.data) <- scRNA.data$cellType_low |
|
|
25 |
idents <- as.character(levels(scRNA.data)) |
|
|
26 |
Endothelium.markers <- FindMarkers(scRNA.data, |
|
|
27 |
ident.1 = "Endothelium (VCAM1+)", |
|
|
28 |
ident.2 = "Endothelium (VCAM1-)", |
|
|
29 |
logfc.threshold = 0, |
|
|
30 |
min.pct = 0.1, |
|
|
31 |
test.use = "MAST", |
|
|
32 |
latent.vars = "orig.ident") |
|
|
33 |
Endothelium.markers <- arrange(Endothelium.markers, desc(avg_log2FC)) |
|
|
34 |
Endothelium.markers$gene <- rownames(Endothelium.markers) |
|
|
35 |
Endothelium.DEGs <- Endothelium.markers %>% filter(abs(avg_log2FC) >= 0.25 & p_val_adj < 0.05) |
|
|
36 |
write.xlsx(Endothelium.markers, file = "2.Cluster/AnnotateCellType/Endothelium.all.DEGs.xlsx", rowNames = F, overwrite = T) |
|
|
37 |
saveRDS(Endothelium.markers, file = "2.Cluster/AnnotateCellType/Endothelium.all.DEGs.rds") |
|
|
38 |
|
|
|
39 |
#### volcano map |
|
|
40 |
source("/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R") |
|
|
41 |
pdf("2.Cluster/AnnotateCellType/Endothelium.EnhancedVolcano.pdf") |
|
|
42 |
Plot.EnhancedVolcano(Endothelium.DEGs, x = "avg_log2FC", y = "p_val_adj", selected.showType = "Both", select.num = 15, title = "Endothelium (VCAM1+) VS Endothelium (VCAM1-)") |
|
|
43 |
dev.off() |
|
|
44 |
|
|
|
45 |
#### pathway enrichment |
|
|
46 |
source("/home/longzhilin/Analysis_Code/PathwayEnrichment/clusterProfiler.enricher.R") |
|
|
47 |
up.DEGs <- filter(Endothelium.DEGs, avg_log2FC > 1 & p_val_adj < 0.05) |
|
|
48 |
down.DEGs <- filter(Endothelium.DEGs, avg_log2FC< -1 & p_val_adj < 0.05) |
|
|
49 |
DEGs <- list(up = up.DEGs$gene, down = down.DEGs$gene) |
|
|
50 |
pdf("2.Cluster/AnnotateCellType/Endothelium.program.pathway.enrichment.pdf") |
|
|
51 |
res <- lapply(names(DEGs), function(x){ |
|
|
52 |
y <- DEGs[[x]] |
|
|
53 |
res <- cluterProfiler.enricher(gene = y, geneType = "SYMBOL", db.type = "MsigDB", saveDir = paste0(getwd(),"/2.Cluster/AnnotateCellType"), |
|
|
54 |
title = x, qvalueCutoff = 0.05, pvalueCutoff = 0.05) |
|
|
55 |
# gene ratio |
|
|
56 |
res <- res$em.res.genesymbol@result %>% filter(p.adjust<0.05) #fdr adjust |
|
|
57 |
pathway.geneNum <- unlist(strsplit(res$BgRatio, "/"))[seq(1, nrow(res),by=2)] |
|
|
58 |
gene.ratio <- as.numeric(res$Count)/as.numeric(pathway.geneNum) |
|
|
59 |
res$gene.ratio <- gene.ratio |
|
|
60 |
return(res) |
|
|
61 |
}) |
|
|
62 |
dev.off() |
|
|
63 |
|
|
|
64 |
####cluster profiler enrichment result display |
|
|
65 |
library(ggthemes) |
|
|
66 |
#top10 |
|
|
67 |
enrich1 <- read.csv("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/up_enricherResult.csv", header = T) |
|
|
68 |
enrich2 <- read.csv("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/down_enricherResult.csv", header = T) |
|
|
69 |
enrich.list <- list(up = enrich1, down = enrich2) |
|
|
70 |
enrich <- list(up = enrich1[c(1,3,8,9,16,17,18,21,22,23,25,26,27,28,42,49,50,72), c("ID")], |
|
|
71 |
down = enrich2[c(2:5,8:10,11,12,19,21,22), c("ID")]) |
|
|
72 |
enrich.path <- Reduce(function(x,y) c(x,y), enrich) |
|
|
73 |
enrich.path <- unique(enrich.path) |
|
|
74 |
enrich.path <- enrich.path[-c(3,6,26,28,29,4,13,11,5,20,23,27,30)] |
|
|
75 |
enrich.list.select <- lapply(enrich.list, function(x){ |
|
|
76 |
idx <- which(x$ID %in% enrich.path) |
|
|
77 |
return(x[idx, c("ID", "Count", "pvalue", "p.adjust")]) |
|
|
78 |
}) |
|
|
79 |
enrich.res <- Reduce(function(x,y) rbind(x,y), enrich.list.select) |
|
|
80 |
a <- sapply(enrich.list.select, function(x){nrow(x)}) |
|
|
81 |
enrich.res$Type <- c(rep("Endothelium (VCAM1+) ", as.numeric(a[1])), rep("Endothelium (VCAM1-) ", as.numeric(a[2]))) |
|
|
82 |
enrich.res$p.adjust <- -log10(enrich.res$p.adjust) |
|
|
83 |
|
|
|
84 |
# process gene name |
|
|
85 |
enrich.res$ID <- gsub("_", " ", enrich.res$ID) |
|
|
86 |
# factor ID |
|
|
87 |
idx1 <- na.omit(match(enrich$up, enrich.path)) |
|
|
88 |
idx2 <- na.omit(match(enrich$down, enrich.path)) |
|
|
89 |
enrich.path <- enrich.path[c(idx1, idx2)] |
|
|
90 |
enrich.res$ID <- factor(enrich.res$ID, levels = gsub("_", " ", enrich.path)) |
|
|
91 |
#source(file = "/home/longzhilin/Analysis_Code/Visualization/ggplot.bubble.plot.R") |
|
|
92 |
pdf("2.Cluster/AnnotateCellType/Endothelium.enrichment.bubble.pdf", width = 5, height = 5) |
|
|
93 |
p1 <- ggplot(enrich.res, |
|
|
94 |
aes(x = Type, |
|
|
95 |
y = ID, |
|
|
96 |
size = Count, |
|
|
97 |
fill = p.adjust)) |
|
|
98 |
p2 <- p1 + guides(color=FALSE) + geom_point(shape = 21, alpha = 0.7) + theme_few() + scale_size_continuous(breaks = c(3,5,7,9,11), labels = c(3,5,7,9,11)) + scale_fill_gradientn(colours = c("#fed71b", "#fcc41c", "#f27620", "#e92925"), breaks = c(1.69897, 4, 6, 8), labels = c(0.02, 0.0001, 0.000001, 0.00000001)) |
|
|
99 |
p2 <- p2 + theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1, size = 8), axis.text.y = element_text(size = 8)) + xlab("") + ylab("") |
|
|
100 |
print(p2) |
|
|
101 |
dev.off() |