a b/RNA-seq/AnalysisPipeline/Endothelial.R
1
#' @description: Endothelium cells
2
3
library(Seurat)
4
library(harmony)
5
library(clustree)
6
library(ggpubr)
7
library(dplyr)
8
library(patchwork)
9
library(ComplexHeatmap)
10
library(circlize)
11
library(vegan)
12
set.seed(101)
13
library(openxlsx)
14
library(future)
15
plan("multiprocess", workers = 10) 
16
options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM
17
setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA")
18
source(file = "/home/longzhilin/Analysis_Code/Visualization/colorPalettes.R")
19
source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R")
20
21
scRNA.data <- readRDS("data.merge.pro.rds")
22
23
###########################1.differential gene#################################
24
Idents(scRNA.data) <- scRNA.data$cellType_low
25
idents <- as.character(levels(scRNA.data))
26
Endothelium.markers <- FindMarkers(scRNA.data, 
27
                                    ident.1 = "Endothelium (VCAM1+)",
28
                                    ident.2 = "Endothelium (VCAM1-)",
29
                                    logfc.threshold = 0, 
30
                                    min.pct = 0.1, 
31
                                    test.use = "MAST", 
32
                                    latent.vars = "orig.ident")
33
Endothelium.markers <- arrange(Endothelium.markers, desc(avg_log2FC))
34
Endothelium.markers$gene <- rownames(Endothelium.markers)
35
Endothelium.DEGs <- Endothelium.markers %>% filter(abs(avg_log2FC) >= 0.25 & p_val_adj < 0.05)
36
write.xlsx(Endothelium.markers, file = "2.Cluster/AnnotateCellType/Endothelium.all.DEGs.xlsx", rowNames = F, overwrite = T)
37
saveRDS(Endothelium.markers, file = "2.Cluster/AnnotateCellType/Endothelium.all.DEGs.rds")
38
39
#### volcano map
40
source("/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R")
41
pdf("2.Cluster/AnnotateCellType/Endothelium.EnhancedVolcano.pdf")
42
Plot.EnhancedVolcano(Endothelium.DEGs, x = "avg_log2FC", y = "p_val_adj", selected.showType = "Both", select.num = 15, title = "Endothelium (VCAM1+) VS Endothelium (VCAM1-)")
43
dev.off()
44
45
#### pathway enrichment
46
source("/home/longzhilin/Analysis_Code/PathwayEnrichment/clusterProfiler.enricher.R")
47
up.DEGs <- filter(Endothelium.DEGs, avg_log2FC > 1 & p_val_adj < 0.05)
48
down.DEGs <- filter(Endothelium.DEGs, avg_log2FC< -1 & p_val_adj < 0.05)
49
DEGs <- list(up = up.DEGs$gene, down = down.DEGs$gene)
50
pdf("2.Cluster/AnnotateCellType/Endothelium.program.pathway.enrichment.pdf")
51
res <- lapply(names(DEGs), function(x){
52
    y <- DEGs[[x]]
53
    res <- cluterProfiler.enricher(gene = y, geneType = "SYMBOL", db.type = "MsigDB", saveDir = paste0(getwd(),"/2.Cluster/AnnotateCellType"),
54
                            title = x, qvalueCutoff = 0.05, pvalueCutoff = 0.05)
55
    # gene ratio
56
    res <- res$em.res.genesymbol@result %>% filter(p.adjust<0.05) #fdr adjust
57
    pathway.geneNum <- unlist(strsplit(res$BgRatio, "/"))[seq(1, nrow(res),by=2)]
58
    gene.ratio <- as.numeric(res$Count)/as.numeric(pathway.geneNum)
59
    res$gene.ratio <- gene.ratio
60
    return(res)
61
})
62
dev.off()
63
64
####cluster profiler enrichment result display
65
library(ggthemes)
66
#top10
67
enrich1 <- read.csv("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/up_enricherResult.csv", header = T)
68
enrich2 <- read.csv("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/down_enricherResult.csv", header = T)
69
enrich.list <- list(up = enrich1, down = enrich2)
70
enrich <- list(up = enrich1[c(1,3,8,9,16,17,18,21,22,23,25,26,27,28,42,49,50,72), c("ID")],
71
               down = enrich2[c(2:5,8:10,11,12,19,21,22), c("ID")])
72
enrich.path <- Reduce(function(x,y) c(x,y), enrich)
73
enrich.path <- unique(enrich.path)
74
enrich.path <- enrich.path[-c(3,6,26,28,29,4,13,11,5,20,23,27,30)]
75
enrich.list.select <- lapply(enrich.list, function(x){
76
  idx <- which(x$ID %in% enrich.path)
77
  return(x[idx, c("ID", "Count", "pvalue", "p.adjust")])
78
})
79
enrich.res <- Reduce(function(x,y) rbind(x,y), enrich.list.select)
80
a <- sapply(enrich.list.select, function(x){nrow(x)})
81
enrich.res$Type <- c(rep("Endothelium (VCAM1+) ", as.numeric(a[1])), rep("Endothelium (VCAM1-) ", as.numeric(a[2])))
82
enrich.res$p.adjust <- -log10(enrich.res$p.adjust)
83
84
# process gene name
85
enrich.res$ID <- gsub("_", " ", enrich.res$ID)
86
# factor ID
87
idx1 <- na.omit(match(enrich$up, enrich.path))
88
idx2 <- na.omit(match(enrich$down, enrich.path))
89
enrich.path <- enrich.path[c(idx1, idx2)]
90
enrich.res$ID <- factor(enrich.res$ID, levels = gsub("_", " ", enrich.path))
91
#source(file = "/home/longzhilin/Analysis_Code/Visualization/ggplot.bubble.plot.R")
92
pdf("2.Cluster/AnnotateCellType/Endothelium.enrichment.bubble.pdf", width = 5, height = 5)
93
p1 <- ggplot(enrich.res, 
94
            aes(x = Type, 
95
                y = ID, 
96
                size = Count, 
97
                fill = p.adjust))
98
p2 <- p1 + guides(color=FALSE) + geom_point(shape = 21, alpha = 0.7) + theme_few() + scale_size_continuous(breaks = c(3,5,7,9,11), labels = c(3,5,7,9,11)) + scale_fill_gradientn(colours = c("#fed71b", "#fcc41c", "#f27620", "#e92925"), breaks = c(1.69897, 4, 6, 8), labels = c(0.02, 0.0001, 0.000001, 0.00000001))
99
p2 <- p2 + theme(axis.text.x = element_text(angle = 45,vjust = 1,hjust = 1, size = 8), axis.text.y = element_text(size = 8)) + xlab("") + ylab("")
100
print(p2)
101
dev.off()