Switch to unified view

a b/ATAC/AnalysisPipeline/4.2.peakAnnotation.genomicLocation.R
1
#' @description: peak annotation and enrichment analysis
2
3
##reference:
4
#https://hbctraining.github.io/Intro-to-ChIPseq/lessons/12_functional_analysis.html
5
#http://bioconductor.org/packages/devel/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html
6
library(Signac)
7
library(Seurat)
8
library(ggpubr)
9
library(GenomicRanges)
10
library(tibble)
11
library(dplyr)
12
set.seed(101) 
13
14
setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
15
scATAC.data <- readRDS("scATAC.data.pro.rds")
16
DefaultAssay(scATAC.data) <- "Peaks"
17
18
## loading packages
19
library(ChIPseeker)
20
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
21
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
22
library(clusterProfiler)
23
library(org.Hs.eg.db)
24
25
#####################################All Peak Annotation
26
peakSet <- GetAssayData(scATAC.data, slot = "data") 
27
All.gr <- StringToGRanges(rownames(peakSet))
28
#TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb
29
All.peakAnno <- annotatePeak(All.gr, tssRegion=c(-3000, 3000), TxDb = txdb, annoDb="org.Hs.eg.db")
30
saveRDS(All.peakAnno, file = "4.Peak/peak.annotation.ChIPseeker.rds")
31
peakAnno.info <- All.peakAnno@anno
32
peakAnno.info$annotation <- gsub(" .*", "", peakAnno.info$annotation)
33
peakAnno.info$annotation <- gsub("'", "_UTR", peakAnno.info$annotation)
34
peak.info <- data.frame(peaks = rownames(Motifs(scATAC.data)@data),
35
                        originType = All.peakAnno@anno$annotation,
36
                        peakType = peakAnno.info$annotation, 
37
                        distanceToTSS = peakAnno.info$distanceToTSS,
38
                        SYMBOL = peakAnno.info$SYMBOL)
39
saveRDS(peak.info, file = "4.Peak/peak.annotation.simple.ChIPseeker.rds")
40
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
41
tagMatrix <- getTagMatrix(All.gr, windows=promoter)
42
43
pdf("4.Peak/Annotation/All.peakAnnotation.pdf")
44
plotAnnoPie(All.peakAnno)
45
plotAnnoBar(All.peakAnno)
46
vennpie(All.peakAnno) #some annotation overlap
47
upsetplot(All.peakAnno, vennpie=TRUE)
48
#Visualize distribution of TF-binding loci relative to TSS
49
plotDistToTSS(All.peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
50
tagHeatmap(tagMatrix, xlim=c(-3000, 3000), color="#4DBBD5")
51
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
52
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), conf = 0.95, resample = 1000)
53
dev.off()
54
55
#######################################annotated the cell-type specific peaks
56
cellType.sig.pos.DARs <- readRDS("4.Peak/cellType.sig.pos.DARs.rds")
57
cellType.sig.pos.gr <- StringToGRanges(unique(cellType.sig.pos.DARs$genomicRegion)) # 8797 DAR peaks
58
DAR.Anno <- annotatePeak(cellType.sig.pos.gr, tssRegion=c(-3000, 3000), TxDb = txdb, annoDb="org.Hs.eg.db")
59
60
idents <- unique(cellType.sig.pos.DARs$cellType)
61
cellType.gr.list <- lapply(idents, function(x){
62
  idx <- which(cellType.sig.pos.DARs$cellType == x)
63
  gr <- StringToGRanges(cellType.sig.pos.DARs$genomicRegion[idx])
64
})
65
names(cellType.gr.list) <- idents
66
67
#annotated DAR for each cell type
68
DAR.anno.list <- lapply(cellType.gr.list, function(x){
69
    DAR.cell <- annotatePeak(x, tssRegion=c(-3000, 3000), TxDb = txdb, annoDb="org.Hs.eg.db")
70
    return(DAR.cell)
71
})
72
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
73
tagMatrix <- getTagMatrix(cellType.sig.pos.gr, windows=promoter)
74
75
pdf("4.Peak/Annotation/cellType.DAR.annotation.pdf")
76
tagHeatmap(tagMatrix, xlim=c(-3000, 3000), color="#4DBBD5")
77
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
78
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), conf = 0.95, resample = 1000)
79
plotAnnoPie(DAR.Anno)
80
plotAnnoBar(DAR.Anno)
81
vennpie(DAR.Anno) #some annotation overlap
82
upsetplot(DAR.Anno, vennpie=TRUE)
83
84
#Visualize distribution of TF-binding loci relative to TSS
85
plotDistToTSS(DAR.Anno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
86
87
plotAnnoBar(DAR.anno.list)
88
plotDistToTSS(DAR.anno.list)
89
dev.off()
90
91
############################# write DARs to bed files
92
# write all peaks to file to serve as background set:
93
DefaultAssay(scATAC.data) <- "Peaks"
94
peak_ranges <- StringToGRanges(rownames(scATAC.data)) %>% sort
95
write.table(as.data.frame(peak_ranges)[,1:3], file='4.Peak/bed/allPeaks.bed', row.names=F, col.names=F, sep='\t', quote=F)
96
97
########### each cell type
98
All.DARs <- readRDS("4.Peak/cellType.all.DARs.rds") # 224807 region
99
idents <- levels(All.DARs$cellType)
100
res < sapply(idents, function(x){
101
  cat("writing ", x, "\n")
102
  DARs.up <- All.DARs %>% dplyr::filter(cellType == x & avg_log2FC >= 0.25 & p_val_adj < 0.05)
103
  DARs.down <- All.DARs %>% dplyr::filter(cellType == x & avg_log2FC < -0.25 & p_val_adj < 0.05)
104
105
  if(nrow(DARs.up) >= 25){
106
    DARs.up.info <- peakAnno.info.frame[match(DARs.up$genomicRegion, peakAnno.info.frame$id),]
107
    # distal
108
    distal.peak.up <- DARs.up.info[which(DARs.up.info$annotation == "Distal"),]
109
    write.table(distal.peak.up[,1:3], file = paste0('4.Peak/bed/', x,'_up_distalPeaks.bed'), row.names=F, col.names=F, sep='\t', quote=F)
110
    cat("Distal Up: ", nrow(distal.peak.up), "\n")
111
112
    # proximal
113
    proximal.peak.up <- DARs.up.info[which(DARs.up.info$annotation != "Distal"),]
114
    write.table(proximal.peak.up[,1:3], file = paste0('4.Peak/bed/', x,'_up_proximalPeaks.bed'), row.names=F, col.names=F, sep='\t', quote=F)
115
    cat("proximal Up: ", nrow(proximal.peak.up), "\n")
116
  }
117
118
  if(nrow(DARs.down) >= 25){
119
    DARs.down.info <- peakAnno.info.frame[match(DARs.down$genomicRegion, peakAnno.info.frame$id),]
120
    # distal
121
    distal.peak.down <- DARs.down.info[which(DARs.down.info$annotation == "Distal"),]
122
    write.table(distal.peak.down[,1:3], file = paste0('4.Peak/bed/', x,'_down_distalPeaks.bed'), row.names=F, col.names=F, sep='\t', quote=F)
123
    cat("Distal Down: ", nrow(distal.peak.down), "\n")
124
125
    # proximal
126
    proximal.peak.down <- DARs.down.info[which(DARs.down.info$annotation != "Distal"),]
127
    write.table(proximal.peak.down[,1:3], file = paste0('4.Peak/bed/', x,'_down_proximalPeaks.bed'), row.names=F, col.names=F, sep='\t', quote=F)
128
    cat("proximal Down: ", nrow(proximal.peak.down), "\n")
129
  }
130
  return("Finish!")
131
})
132
133
##rGREAT enrichment analysis
134
source(file = "/home/longzhilin/Analysis_Code/PathwayEnrichment/Great.enrich.R")
135
136
idents <- levels(scATAC.data)
137
bg <- read.table("4.Peak/bed/allPeaks.bed", sep = "\t", stringsAsFactors = F)
138
139
pdf("4.Peak/bed/GREAT.enrich.pdf")
140
res <- lapply(idents, function(x){
141
142
  ## up regulate
143
  up.distal.bed <- tryCatch({
144
    read.table(paste0("4.Peak/bed/", x, "_up_distalPeaks.bed"), sep = "\t", stringsAsFactors = F)}, error = function(e){return(NULL)})
145
  if(!is.null(up.distal.bed)){
146
    cat("Analysis: ", x, " up distal region...\n")
147
    up_distalPeaks_enrich <- Great.enrich(gr = up.distal.bed, bg = bg, title = paste0(x, "_up_distalPeaks"), Type = "GO Biological Process")
148
  }else{
149
    up_distalPeaks_enrich <- NULL
150
  }
151
  up.proximal.bed <- tryCatch({
152
    read.table(paste0("4.Peak/bed/", x, "_up_proximalPeaks.bed"), sep = "\t", stringsAsFactors = F)}, error = function(e){return(NULL)})
153
  if(!is.null(up.proximal.bed)){
154
    cat("Analysis: ", x, " up proximal region...\n")  
155
    up_proximalPeaks_enrich <- Great.enrich(gr = up.proximal.bed, bg = bg, title = paste0(x, "_up_proximalPeaks"), Type = "GO Biological Process")
156
  }else{
157
    up_proximalPeaks_enrich <- NULL
158
  }
159
160
  ## down regulate
161
  down.distal.bed <- tryCatch({
162
    read.table(paste0("4.Peak/bed/", x, "_down_distalPeaks.bed"), sep = "\t", stringsAsFactors = F)}, error = function(e){return(NULL)})
163
  if(!is.null(down.distal.bed)){
164
    cat("Analysis: ", x, " down distal region...\n")
165
    down_distalPeaks_enrich <- Great.enrich(gr = down.distal.bed, bg = bg, title = paste0(x, "_down_distalPeaks"), Type = "GO Biological Process")
166
  }else{
167
    down_distalPeaks_enrich <- NULL
168
  }
169
  down.proximal.bed <- tryCatch({
170
    read.table(paste0("4.Peak/bed/", x, "_down_proximalPeaks.bed"), sep = "\t", stringsAsFactors = F)}, error = function(e){return(NULL)})
171
  if(!is.null(down.proximal.bed)){
172
    cat("Analysis: ", x, " down proximal region...\n")  
173
    down_proximalPeaks_enrich <- Great.enrich(gr = down.proximal.bed, bg = bg, title = paste0(x, "_down_proximalPeaks"), Type = "GO Biological Process")
174
  }else{
175
    down_proximalPeaks_enrich <- NULL
176
  }
177
  return(list(up_distalPeaks_enrich = up_distalPeaks_enrich, up_proximalPeaks_enrich = up_proximalPeaks_enrich, 
178
              down_distalPeaks_enrich = down_distalPeaks_enrich, down_proximalPeaks_enrich = down_proximalPeaks_enrich))
179
})
180
dev.off()
181
names(res) <- idents
182
saveRDS(res, file = "4.Peak/bed/GREAT.enrichment.rds")
183
library(openxlsx)
184
write.xlsx(res$Tumor, file = "4.Peak/bed/GREAT.Tumor.xlsx", sheetName = names(res$Tumor), rowNames = F)
185
186
####plot --- tumor.up
187
tumor.up.proximal <- res$Tumor$up_proximalPeaks_enrich
188
tumor.up.distal <- res$Tumor$up_distalPeaks_enrich
189
pdf("4.Peak/bed/GREAT.Tumor.selection.pdf", height = unit(5, "inches"))
190
p <- ggbarplot(tumor.up.proximal, x = "wrap", y = "Hyper_Fold_Enrichment", fill = "grey", color = "grey", width = 0.4, ylab = "Fold Enrichment", xlab = "", orientation = "horiz", sort.val = c("asc"))+theme(legend.position="none") 
191
print(p)
192
p <- ggbarplot(tumor.up.distal, x = "wrap", y = "Hyper_Fold_Enrichment", fill = "grey", color = "grey", width = 0.4, ylab = "Fold Enrichment", xlab = "", orientation = "horiz", sort.val = c("asc"))+theme(legend.position="none") 
193
print(p)
194
dev.off()