a b/ATAC/AnalysisPipeline/5.2.motif.analysis.R
1
#' @description: identify the tumor-specific TFs
2
3
library(Signac)
4
library(Seurat)
5
library(BSgenome.Hsapiens.UCSC.hg38)
6
library(patchwork)
7
set.seed(101)
8
library(ggpubr)
9
library(openxlsx)
10
library(ComplexHeatmap)
11
library(circlize)
12
library(tidyverse)
13
library(matrixStats)
14
library(future)
15
plan("multiprocess", workers = 10)
16
options(future.globals.maxSize = 50000 * 1024^2) #50G
17
18
source(file = "/home/longzhilin/Analysis_Code/Visualization/colorPalettes.R")
19
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
20
21
scATAC.data <- readRDS("scATAC.data.pro.rds")
22
motif.info <- data.frame(originName = names(scATAC.data@assays$Peaks@motifs@motif.names), TF = unlist(scATAC.data@assays$Peaks@motifs@motif.names))
23
rownames(motif.info) <- NULL
24
motif.info$originName <- gsub("_", "-", motif.info$originName)
25
26
#####基于chromVAR数据分析
27
cellType.motifs.chromVAR <- readRDS("5.Motif/cellType.motifs.chromVAR.human_pwms_v2.rds")
28
29
scRNA.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds")
30
###################1.plot motifs deviation score heatmap and number ratioplot
31
top5 <- sapply(cellType.motifs.chromVAR, function(x){
32
    x <- arrange(x, desc(avg_log2FC))
33
    return(x$gene[1:3])
34
})
35
top5 <- unique(as.character(top5))
36
top5 <- c(top5, "EOMES", "TBX10")
37
####Plot --- cellType specific motif heatmap
38
sig.motifs <- cellType.motifs.chromVAR %>% 
39
                                bind_rows %>%
40
                                filter(avg_log2FC > 1 & p_val_adj < 0.05) %>%
41
                                select(gene) %>%
42
                                distinct()
43
idx <- match(sig.motifs$gene, motif.info$TF)
44
sig.motifNames <- motif.info$originName[idx]
45
46
motifs.avgExp <- AverageExpression(scATAC.data, features = sig.motifNames, assays = "chromvar")
47
motifs.avgExp <- motifs.avgExp$chromvar
48
rownames(motifs.avgExp) <- sig.motifs$gene
49
zScore <- function(x){(x - mean(x)) /sd(x)}
50
motifs.avgExp.scale <- apply(motifs.avgExp, 1, zScore) %>% t() # row: TF; column: cell type
51
mark.idx <- match(top5, rownames(motifs.avgExp.scale))
52
pdf("5.Motif/Analysis/chromVAR.cellType.sig.motifs.heatmap.pdf")
53
ha <- rowAnnotation(link = anno_mark(at = mark.idx, labels = top5, link_width = unit(2, "mm"), labels_gp = gpar(fontsize = 5), padding = unit(1, "mm")))
54
Heatmap(motifs.avgExp.scale, name = "Deviation score", 
55
        width = unit(6, "cm"), height = unit(8, "cm"), right_annotation = ha,
56
        row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
57
        show_row_dend = F, show_column_dend = F, show_column_names = T, show_row_names = F, 
58
        heatmap_legend_param = list(labels_gp = gpar(fontsize = 8), by_row = T))
59
Heatmap(motifs.avgExp.scale, name = "Deviation score", 
60
        width = unit(6, "cm"), height = unit(8, "cm"), left_annotation = ha, 
61
        row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
62
        show_row_dend = F, show_column_dend = F, show_column_names = T, show_row_names = F, 
63
        heatmap_legend_param = list(labels_gp = gpar(fontsize = 8), by_row = T))      
64
dev.off()
65
66
####Plot --- top5 cell-type specific TFs
67
cellType.sig.motifs <- lapply(names(cellType.motifs.chromVAR), function(x){
68
    sig <- cellType.motifs.chromVAR[[x]] %>% filter(avg_log2FC > 1 & p_val_adj < 0.05) %>% mutate(celltype = x)
69
    return(sig)
70
})
71
names(cellType.sig.motifs) <- names(cellType.motifs.chromVAR)
72
sig.motifs.num <- cellType.sig.motifs %>% bind_rows()
73
sig.motifs.num <- as.data.frame(table(sig.motifs.num$celltype))
74
pdf("5.Motif/Analysis/chromVAR.cellType.sig.motifs.barplot.pdf")
75
ggbarplot(sig.motifs.num, x="Var1", y="Freq", fill = "Var1", color = "Var1",
76
          sort.by.groups=FALSE, sort.val = "desc", #不按组排序
77
          label = T, xlab = "", ylab = "Cell Number") + theme(legend.position="none") + rotate_x_text(30)
78
dev.off()
79
80
###################2.Screen cell-specific enriched motifs
81
source(file = "/home/longzhilin/Analysis_Code/code/analysis.diff.survival.TCGA.R")
82
DESeq2.normalized_counts <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.normalized_counts.rds")
83
DESeq2.normalized_counts <- log2(DESeq2.normalized_counts+1)
84
DESeq2.result <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.result.rds")
85
clin.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/clin.data.rds")
86
87
# extreact the avg_log2FC and FDR value
88
order.TFs <- cellType.motifs.chromVAR[[1]]$gene
89
motifs.FC <- sapply(cellType.motifs.chromVAR, function(x){
90
  index <- match(order.TFs, x$gene)
91
  return(round(x$avg_log2FC[index], 2))
92
})
93
rownames(motifs.FC) <- order.TFs
94
95
motifs.fdr <- sapply(cellType.motifs.chromVAR, function(x){
96
  index <- match(order.TFs, x$gene)
97
  return(x$p_val_adj[index])
98
})
99
rownames(motifs.fdr) <- order.TFs
100
a <- motifs.fdr
101
a[which(a==0)] <- 2
102
motifs.fdr[which(motifs.fdr==0)] <- min(a)*0.001 ###4.940656e-324, Multiply the minimum value by 0.01, instead of 0
103
motifs.fdr <- round(-log10(motifs.fdr), 2)
104
105
#3.deviation score
106
all.motifs.avgExp <- AverageExpression(scATAC.data, features = motif.info$originName, assays = "chromvar")
107
all.motifs.avgExp <- all.motifs.avgExp$chromvar
108
rownames(all.motifs.avgExp) <- motif.info$TF
109
110
################################## screening Strategy
111
#1.deviation score: sd > median(sd) + 4*mad
112
#2.Tumor cells: fdr> 0.0001 & avg_log2FC>=4
113
source(file = "/home/longzhilin/Analysis_Code/DataScience/MAD.R")
114
avg.sd <- rowSds(all.motifs.avgExp)
115
names(avg.sd) <- rownames(all.motifs.avgExp)
116
sd.mad <- DoubleMAD(avg.sd)
117
mad.threshold <- median(avg.sd) + 4*sd.mad[2] #Take the right
118
119
#Only in cell type: FDR < 0.0001 & log2FC>=4; log2FC<1 in other cell types
120
sig.label <- sapply(order.TFs, function(x){
121
     if(avg.sd[x] > mad.threshold){
122
        fdr <- motifs.fdr[x,]
123
        FC <- motifs.FC[x,]
124
        sig.fdr <- which(fdr > -log10(0.0001))
125
        sig.fc <- which(FC >= 4)  
126
        others <- which(FC >= 1)
127
128
        sig.idx <- intersect(sig.fdr, sig.fc)
129
        common.len <- length(sig.idx)
130
131
        if(common.len==1 & length(others)==1){
132
        #if(common.len==1){
133
            return("specific")
134
        }else{
135
            return("common")
136
        }
137
    }else{
138
        return("low variation")
139
    }
140
})
141
cellType.high.specific.TF <- names(which(sig.label=="specific"))
142
heatmapEM.fdr <- motifs.fdr[cellType.high.specific.TF,]
143
heatmapEM.FC <- motifs.FC[cellType.high.specific.TF,]
144
cellType.sd <- avg.sd[cellType.high.specific.TF]
145
write.xlsx(list(FDR = heatmapEM.fdr, FC = heatmapEM.FC), file = "5.Motif/Analysis/cellType.specific.TFs.xlsx", sheetName = c("FDR", "Log2FC"), rowNames = T)
146
147
#### Tumor specific TFs
148
idx <- which(colnames(heatmapEM.FC) == "Tumor")
149
index <- which(heatmapEM.FC[, idx] >= 4)
150
tumor.specific.TFs <- data.frame(Name = rownames(heatmapEM.fdr)[index], FDR = heatmapEM.fdr[index, idx], avg_log2FC = heatmapEM.FC[index, idx], sd = cellType.sd[index]) # 49
151
heatmapEM.fdr.tumor <- heatmapEM.fdr[index,]
152
heatmapEM.FC.tumor <- heatmapEM.FC[index,]
153
saveRDS(tumor.specific.TFs, file = "5.Motif/Analysis/tumor.specific.TFs.rds")
154
155
## Filter the TF result
156
source(file = "/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R")
157
##plot --- heatmap
158
gene.labels <- c("HNF1A", "HNF1B", "HNF4G", "HNF4A", 
159
                 "HOXC5", "OTP", "ISL1", "VENTX", 
160
                 "HOXB5", "HOXB4", "HOXA4", "HOXB7", "HOXB3", "HOXB2",
161
                 "BARX2", "POU6F2", "JUNB", "JUN", "JUND", "BATF")
162
mark.idx <- match(gene.labels, rownames(heatmapEM.FC.tumor))
163
pdf("5.Motif/Analysis/tumor.specific.TFs.heatmap.pdf")
164
#Plot.EnhancedVolcano(TF.DESeq2, x = "log2FoldChange", y = "padj", select.num = nrow(TF.DESeq2), drawConnectors = F)
165
ha = rowAnnotation(FDR = anno_barplot(heatmapEM.fdr.tumor[,idx], border = F, gp = gpar(fill = "#e5e4e2"), direction = "reverse"))
166
col_fun <- colorRamp2(c(min(heatmapEM.FC.tumor), 0, max(heatmapEM.FC.tumor)), c("blue", "white", "red"))
167
ht2 <- Heatmap(heatmapEM.FC.tumor, left_annotation = ha, col = col_fun, 
168
               show_row_dend = F, show_column_dend = F, 
169
               column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 8), 
170
               name = "avg_log2FC", width = unit(7.5, "cm"), height = unit(10, "cm"), 
171
               show_heatmap_legend = T, 
172
               heatmap_legend_param = list(col_fun = col_fun, title = "avg_log2FC", direction = "horizontal", title_position = "topcenter")) +
173
rowAnnotation(link = anno_mark(at = mark.idx, labels = gene.labels, link_width = unit(3, "mm"), labels_gp = gpar(fontsize = 8), padding = unit(1, "mm")))
174
draw(ht2, heatmap_legend_side = "top")
175
dev.off()
176
177
# survival in TCGA-KIRC data
178
write.xlsx(tumor.specific.TFs, file = "5.Motif/Analysis/tumor.specific.TFs.xlsx", sheetName = c("Tumor specific TFs"), rowNames = T)
179
pdf("5.Motif/Analysis/tumor.specific.TFs.survival.pdf")
180
tumor.TFs.signature.res <- analysis.diff.survival.TCGA(interest.gene = tumor.specific.TFs$Name, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, Box.plot = F, main = "tumor.specific.TFs", meta.signature = T, single.signature = T)
181
dev.off()
182
183
Idents(scRNA.data) <- scRNA.data$cellType_low
184
tumor.TFs.avgExp <- AverageExpression(scRNA.data, features = tumor.specific.TFs$Name, assays = "RNA", slot = "data")
185
tumor.TFs.avgExp <- scale(t(tumor.TFs.avgExp$RNA))
186
187
pdf("5.Motif/Analysis/tumor.TFs.expression.pdf")
188
Heatmap(tumor.TFs.avgExp, 
189
        show_row_dend = F, show_column_dend = F, 
190
        column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 8), 
191
        name = "Expression", height = unit(6, "cm"), 
192
        show_heatmap_legend = T, 
193
        heatmap_legend_param = list(title = "Expression", direction = "horizontal", title_position = "topcenter"))
194
dev.off()