|
a |
|
b/ATAC/AnalysisPipeline/5.1.motif.enrichment.R |
|
|
1 |
#' @description: Motif enrichment with chromVAR |
|
|
2 |
|
|
|
3 |
library(Signac) |
|
|
4 |
library(Seurat) |
|
|
5 |
library(JASPAR2020) |
|
|
6 |
library(TFBSTools) |
|
|
7 |
library(BSgenome.Hsapiens.UCSC.hg38) |
|
|
8 |
library(patchwork) |
|
|
9 |
set.seed(101) |
|
|
10 |
library(ggpubr) |
|
|
11 |
library(openxlsx) |
|
|
12 |
library(chromVARmotifs) |
|
|
13 |
data("human_pwms_v2") |
|
|
14 |
library(ComplexHeatmap) |
|
|
15 |
library(circlize) |
|
|
16 |
library(future) |
|
|
17 |
plan("multiprocess", workers = 10) |
|
|
18 |
options(future.globals.maxSize = 50000 * 1024^2) #50G |
|
|
19 |
|
|
|
20 |
## We will explore two complementary options for performing motif analysis: |
|
|
21 |
#one by finding overrepresented motifs in a set of differentially accessible peaks, |
|
|
22 |
#one method performing differential motif activity analysis between groups of cells. |
|
|
23 |
|
|
|
24 |
source(file = "/home/longzhilin/Analysis_Code/Plot_colorPaletters.R") |
|
|
25 |
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC") |
|
|
26 |
|
|
|
27 |
scATAC.data <- readRDS("scATAC.data.pro.rds") |
|
|
28 |
Idents(scATAC.data) <- scATAC.data$AnnotatedcellType |
|
|
29 |
DefaultAssay(scATAC.data) <- "Peaks" |
|
|
30 |
|
|
|
31 |
# Get a list of motif position frequency matrices from the JASPAR database |
|
|
32 |
# pfm <- getMatrixSet( |
|
|
33 |
# x = JASPAR2020, |
|
|
34 |
# opts = list(species = 9606, all_versions = FALSE) |
|
|
35 |
# ) |
|
|
36 |
|
|
|
37 |
# add motif information |
|
|
38 |
scATAC.data <- AddMotifs( |
|
|
39 |
object = scATAC.data, |
|
|
40 |
genome = BSgenome.Hsapiens.UCSC.hg38, |
|
|
41 |
pfm = human_pwms_v2 |
|
|
42 |
) |
|
|
43 |
|
|
|
44 |
# Finding overrepresented motifs |
|
|
45 |
GetMotifs <- function(cellType, scATAC.object) { |
|
|
46 |
print(paste0("Finding motifs for: ",cellType)) |
|
|
47 |
overrepresented.motifs <- FindMarkers(scATAC.object, |
|
|
48 |
ident.1 = cellType, |
|
|
49 |
test.use = 'LR', |
|
|
50 |
min.pct = 0.05, |
|
|
51 |
latent.vars = "nCount_Peaks") |
|
|
52 |
enriched.motifs <- FindMotifs(object = scATAC.object, features = rownames(overrepresented.motifs[overrepresented.motifs$p_val_adj < 0.05, ])) |
|
|
53 |
return(enriched.motifs) |
|
|
54 |
} |
|
|
55 |
# FindMarkers and write to an xlsx file with default parameters |
|
|
56 |
idents <- as.character(levels(Idents(scATAC.data))) |
|
|
57 |
cellType.motifs <- lapply(idents, function(x) GetMotifs(x, scATAC.object = scATAC.data)) |
|
|
58 |
names(cellType.motifs) <- idents |
|
|
59 |
write.xlsx(cellType.motifs, file = "5.Motif/motifs.celltype.human_pwms_v2.xlsx", sheetName = idents, rowNames = T) |
|
|
60 |
saveRDS(cellType.motifs, file = "5.Motif/cellType.motifs.human_pwms_v2.rds") |
|
|
61 |
|
|
|
62 |
library(ggplot2) |
|
|
63 |
library(ggseqlogo) |
|
|
64 |
PWMatrixToProbMatrix <- function(x){ |
|
|
65 |
if (class(x) != "PWMatrix") stop("x must be a TFBSTools::PWMatrix object") |
|
|
66 |
m <- (exp(as(x, "matrix"))) * TFBSTools::bg(x)/sum(TFBSTools::bg(x)) |
|
|
67 |
m <- t(t(m)/colSums(m)) |
|
|
68 |
m |
|
|
69 |
} |
|
|
70 |
|
|
|
71 |
pdf("5.Motif/cellType.specific.TFs.logo.pdf") |
|
|
72 |
cellType.TFs.PPM <- sapply(idents, function(x){ |
|
|
73 |
cellType.top.TFs <- head(rownames(cellType.motifs[[x]])) |
|
|
74 |
cellType.top.TFs <- gsub("-", "_", cellType.top.TFs) |
|
|
75 |
index <- match(cellType.top.TFs, names(human_pwms_v2)) |
|
|
76 |
PPM.list <- lapply(index, function(y){ |
|
|
77 |
PPM <- PWMatrixToProbMatrix(human_pwms_v2[[y]]) |
|
|
78 |
}) |
|
|
79 |
names(PPM.list) <- as.character(unlist(scATAC.data@assays$Peaks@motifs@motif.names[index])) |
|
|
80 |
p <- ggseqlogo(PPM.list) + ggtitle(x) + theme(plot.title = element_text(hjust = 0.5)) |
|
|
81 |
print(p) |
|
|
82 |
return(PPM.list) |
|
|
83 |
}) |
|
|
84 |
dev.off() |
|
|
85 |
|
|
|
86 |
##########################################ChromVAR |
|
|
87 |
#We can also compute a per-cell motif activity score by running chromVAR. |
|
|
88 |
#This allows us to visualize motif activities per cell, and also provides an alternative method of identifying differentially-active motifs between cell types. |
|
|
89 |
|
|
|
90 |
# Computing motif activities |
|
|
91 |
scATAC.data <- RunChromVAR( |
|
|
92 |
object = scATAC.data, |
|
|
93 |
genome = BSgenome.Hsapiens.UCSC.hg38 |
|
|
94 |
) |
|
|
95 |
DefaultAssay(scATAC.data) <- 'chromvar' |
|
|
96 |
|
|
|
97 |
#cellType.chromvar.activity <- FindAllMarkers(scATAC.data, group.by = "AnnotatedcellType", test.use = 'LR', latent.vars = "nCount_Peaks") |
|
|
98 |
|
|
|
99 |
library(tidyverse) |
|
|
100 |
GetChromvarActivities <- function(cellType, scATAC.object, motif.info) { |
|
|
101 |
print(paste0("Finding chromVAR activities for: ",cellType)) |
|
|
102 |
differential.activity <- FindMarkers(scATAC.object, |
|
|
103 |
ident.1 = cellType, |
|
|
104 |
test.use = 'LR', |
|
|
105 |
logfc.threshold = 0, |
|
|
106 |
latent.vars = "nCount_Peaks") |
|
|
107 |
motifs <- gsub("-", "_", rownames(differential.activity)) |
|
|
108 |
motifNames <- sapply(motifs, function(x) motif.info@motif.names[[x]]) |
|
|
109 |
return(cbind(differential.activity, gene = motifNames)) |
|
|
110 |
} |
|
|
111 |
idents <- as.character(levels(Idents(scATAC.data))) |
|
|
112 |
cellType.motifs.chromVAR <- lapply(idents, function(x) GetChromvarActivities(x, scATAC.object = scATAC.data, motif.info = scATAC.data@assays$Peaks@motifs)) |
|
|
113 |
names(cellType.motifs.chromVAR) <- idents |
|
|
114 |
cellType.motifs.chromVAR <- lapply(cellType.motifs.chromVAR, function(x){ |
|
|
115 |
up.x <- x %>% filter(avg_log2FC>0) %>% arrange(desc(avg_log2FC)) |
|
|
116 |
down.x <- x %>% filter(avg_log2FC<=0) %>% arrange(avg_log2FC) |
|
|
117 |
x <- rbind(up.x, down.x) |
|
|
118 |
return(x) |
|
|
119 |
}) |
|
|
120 |
names(cellType.motifs.chromVAR) <- idents |
|
|
121 |
write.xlsx(cellType.motifs.chromVAR, file = "5.Motif/cellType.motifs.chromVAR.human_pwms_v2.xlsx", sheetName = idents, rowNames = T) |
|
|
122 |
saveRDS(cellType.motifs.chromVAR, file = "5.Motif/cellType.motifs.chromVAR.human_pwms_v2.rds") |
|
|
123 |
|
|
|
124 |
pdf("5.Motif/cellType.specific.TFs.chromVAR.logo.pdf") |
|
|
125 |
cellType.TFs.PPM <- sapply(idents, function(x){ |
|
|
126 |
cellType.top.TFs <- head(rownames(cellType.motifs.chromVAR[[x]])) |
|
|
127 |
cellType.top.TFs <- gsub("-", "_", cellType.top.TFs) |
|
|
128 |
index <- match(cellType.top.TFs, names(human_pwms_v2)) |
|
|
129 |
PPM.list <- lapply(index, function(y){ |
|
|
130 |
PPM <- PWMatrixToProbMatrix(human_pwms_v2[[y]]) |
|
|
131 |
}) |
|
|
132 |
names(PPM.list) <- as.character(unlist(scATAC.data@assays$Peaks@motifs@motif.names[index])) |
|
|
133 |
p <- ggseqlogo(PPM.list) + ggtitle(x) + theme(plot.title = element_text(hjust = 0.5)) |
|
|
134 |
print(p) |
|
|
135 |
return(PPM.list) |
|
|
136 |
}) |
|
|
137 |
dev.off() |
|
|
138 |
saveRDS(scATAC.data, "scATAC.data.pro.rds") |