a b/ATAC/AnalysisPipeline/5.1.motif.enrichment.R
1
#' @description: Motif enrichment with chromVAR
2
3
library(Signac)
4
library(Seurat)
5
library(JASPAR2020)
6
library(TFBSTools)
7
library(BSgenome.Hsapiens.UCSC.hg38)
8
library(patchwork)
9
set.seed(101)
10
library(ggpubr)
11
library(openxlsx)
12
library(chromVARmotifs)
13
data("human_pwms_v2")
14
library(ComplexHeatmap)
15
library(circlize)
16
library(future)
17
plan("multiprocess", workers = 10)
18
options(future.globals.maxSize = 50000 * 1024^2) #50G
19
20
## We will explore two complementary options for performing motif analysis: 
21
#one by finding overrepresented motifs in a set of differentially accessible peaks, 
22
#one method performing differential motif activity analysis between groups of cells.
23
24
source(file = "/home/longzhilin/Analysis_Code/Plot_colorPaletters.R")
25
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
26
27
scATAC.data <- readRDS("scATAC.data.pro.rds")
28
Idents(scATAC.data) <- scATAC.data$AnnotatedcellType
29
DefaultAssay(scATAC.data) <- "Peaks"
30
31
# Get a list of motif position frequency matrices from the JASPAR database
32
# pfm <- getMatrixSet(
33
#   x = JASPAR2020,
34
#   opts = list(species = 9606, all_versions = FALSE)
35
# )
36
37
# add motif information
38
scATAC.data <- AddMotifs(
39
  object = scATAC.data,
40
  genome = BSgenome.Hsapiens.UCSC.hg38,
41
  pfm = human_pwms_v2
42
)
43
44
# Finding overrepresented motifs
45
GetMotifs <- function(cellType, scATAC.object) {
46
  print(paste0("Finding motifs for: ",cellType))
47
  overrepresented.motifs <- FindMarkers(scATAC.object,
48
                                        ident.1 = cellType,
49
                                        test.use = 'LR',
50
                                        min.pct = 0.05,
51
                                        latent.vars = "nCount_Peaks")
52
  enriched.motifs <- FindMotifs(object = scATAC.object, features = rownames(overrepresented.motifs[overrepresented.motifs$p_val_adj < 0.05, ]))
53
  return(enriched.motifs)
54
}
55
# FindMarkers and write to an xlsx file with default parameters
56
idents <- as.character(levels(Idents(scATAC.data)))
57
cellType.motifs <- lapply(idents, function(x) GetMotifs(x, scATAC.object = scATAC.data))
58
names(cellType.motifs) <- idents
59
write.xlsx(cellType.motifs, file = "5.Motif/motifs.celltype.human_pwms_v2.xlsx", sheetName = idents, rowNames = T)
60
saveRDS(cellType.motifs, file = "5.Motif/cellType.motifs.human_pwms_v2.rds")
61
62
library(ggplot2)
63
library(ggseqlogo)
64
PWMatrixToProbMatrix <- function(x){
65
        if (class(x) != "PWMatrix") stop("x must be a TFBSTools::PWMatrix object")
66
        m <- (exp(as(x, "matrix"))) * TFBSTools::bg(x)/sum(TFBSTools::bg(x))
67
        m <- t(t(m)/colSums(m))
68
        m
69
}
70
71
pdf("5.Motif/cellType.specific.TFs.logo.pdf")
72
cellType.TFs.PPM <- sapply(idents, function(x){
73
    cellType.top.TFs <- head(rownames(cellType.motifs[[x]]))
74
    cellType.top.TFs <- gsub("-", "_", cellType.top.TFs)
75
    index <- match(cellType.top.TFs, names(human_pwms_v2))
76
    PPM.list <- lapply(index, function(y){
77
        PPM <- PWMatrixToProbMatrix(human_pwms_v2[[y]])
78
    })
79
    names(PPM.list) <- as.character(unlist(scATAC.data@assays$Peaks@motifs@motif.names[index]))
80
    p <- ggseqlogo(PPM.list) + ggtitle(x) + theme(plot.title = element_text(hjust = 0.5))
81
    print(p)
82
    return(PPM.list)
83
})
84
dev.off()
85
86
##########################################ChromVAR
87
#We can also compute a per-cell motif activity score by running chromVAR. 
88
#This allows us to visualize motif activities per cell, and also provides an alternative method of identifying differentially-active motifs between cell types.
89
90
# Computing motif activities
91
scATAC.data <- RunChromVAR(
92
  object = scATAC.data,
93
  genome = BSgenome.Hsapiens.UCSC.hg38
94
)
95
DefaultAssay(scATAC.data) <- 'chromvar'
96
97
#cellType.chromvar.activity <- FindAllMarkers(scATAC.data, group.by = "AnnotatedcellType", test.use = 'LR', latent.vars = "nCount_Peaks")
98
99
library(tidyverse)
100
GetChromvarActivities <- function(cellType, scATAC.object, motif.info) {
101
  print(paste0("Finding chromVAR activities for: ",cellType))
102
  differential.activity <- FindMarkers(scATAC.object, 
103
                     ident.1 = cellType,
104
                     test.use = 'LR',
105
                     logfc.threshold = 0, 
106
                     latent.vars = "nCount_Peaks") 
107
  motifs <- gsub("-", "_", rownames(differential.activity))
108
  motifNames <- sapply(motifs, function(x) motif.info@motif.names[[x]])
109
  return(cbind(differential.activity, gene = motifNames))
110
}
111
idents <- as.character(levels(Idents(scATAC.data)))
112
cellType.motifs.chromVAR <- lapply(idents, function(x) GetChromvarActivities(x, scATAC.object = scATAC.data, motif.info = scATAC.data@assays$Peaks@motifs))
113
names(cellType.motifs.chromVAR) <- idents
114
cellType.motifs.chromVAR <- lapply(cellType.motifs.chromVAR, function(x){
115
  up.x <- x %>% filter(avg_log2FC>0) %>% arrange(desc(avg_log2FC))
116
  down.x <- x %>% filter(avg_log2FC<=0) %>% arrange(avg_log2FC)
117
  x <- rbind(up.x, down.x)
118
  return(x)
119
})
120
names(cellType.motifs.chromVAR) <- idents
121
write.xlsx(cellType.motifs.chromVAR, file = "5.Motif/cellType.motifs.chromVAR.human_pwms_v2.xlsx", sheetName = idents, rowNames = T)
122
saveRDS(cellType.motifs.chromVAR, file = "5.Motif/cellType.motifs.chromVAR.human_pwms_v2.rds")
123
124
pdf("5.Motif/cellType.specific.TFs.chromVAR.logo.pdf")
125
cellType.TFs.PPM <- sapply(idents, function(x){
126
    cellType.top.TFs <- head(rownames(cellType.motifs.chromVAR[[x]]))
127
    cellType.top.TFs <- gsub("-", "_", cellType.top.TFs)
128
    index <- match(cellType.top.TFs, names(human_pwms_v2))
129
    PPM.list <- lapply(index, function(y){
130
        PPM <- PWMatrixToProbMatrix(human_pwms_v2[[y]])
131
    })
132
    names(PPM.list) <- as.character(unlist(scATAC.data@assays$Peaks@motifs@motif.names[index]))
133
    p <- ggseqlogo(PPM.list) + ggtitle(x) + theme(plot.title = element_text(hjust = 0.5))
134
    print(p)
135
    return(PPM.list)
136
})
137
dev.off()
138
saveRDS(scATAC.data, "scATAC.data.pro.rds")