|
a |
|
b/ATAC/AnalysisPipeline/5.2.motif.analysis.R |
|
|
1 |
#' @description: identify the tumor-specific TFs |
|
|
2 |
|
|
|
3 |
library(Signac) |
|
|
4 |
library(Seurat) |
|
|
5 |
library(BSgenome.Hsapiens.UCSC.hg38) |
|
|
6 |
library(patchwork) |
|
|
7 |
set.seed(101) |
|
|
8 |
library(ggpubr) |
|
|
9 |
library(openxlsx) |
|
|
10 |
library(ComplexHeatmap) |
|
|
11 |
library(circlize) |
|
|
12 |
library(tidyverse) |
|
|
13 |
library(matrixStats) |
|
|
14 |
library(future) |
|
|
15 |
plan("multiprocess", workers = 10) |
|
|
16 |
options(future.globals.maxSize = 50000 * 1024^2) #50G |
|
|
17 |
|
|
|
18 |
source(file = "/home/longzhilin/Analysis_Code/Visualization/colorPalettes.R") |
|
|
19 |
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC") |
|
|
20 |
|
|
|
21 |
scATAC.data <- readRDS("scATAC.data.pro.rds") |
|
|
22 |
motif.info <- data.frame(originName = names(scATAC.data@assays$Peaks@motifs@motif.names), TF = unlist(scATAC.data@assays$Peaks@motifs@motif.names)) |
|
|
23 |
rownames(motif.info) <- NULL |
|
|
24 |
motif.info$originName <- gsub("_", "-", motif.info$originName) |
|
|
25 |
|
|
|
26 |
#####基于chromVAR数据分析 |
|
|
27 |
cellType.motifs.chromVAR <- readRDS("5.Motif/cellType.motifs.chromVAR.human_pwms_v2.rds") |
|
|
28 |
|
|
|
29 |
scRNA.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds") |
|
|
30 |
###################1.plot motifs deviation score heatmap and number ratioplot |
|
|
31 |
top5 <- sapply(cellType.motifs.chromVAR, function(x){ |
|
|
32 |
x <- arrange(x, desc(avg_log2FC)) |
|
|
33 |
return(x$gene[1:3]) |
|
|
34 |
}) |
|
|
35 |
top5 <- unique(as.character(top5)) |
|
|
36 |
top5 <- c(top5, "EOMES", "TBX10") |
|
|
37 |
####Plot --- cellType specific motif heatmap |
|
|
38 |
sig.motifs <- cellType.motifs.chromVAR %>% |
|
|
39 |
bind_rows %>% |
|
|
40 |
filter(avg_log2FC > 1 & p_val_adj < 0.05) %>% |
|
|
41 |
select(gene) %>% |
|
|
42 |
distinct() |
|
|
43 |
idx <- match(sig.motifs$gene, motif.info$TF) |
|
|
44 |
sig.motifNames <- motif.info$originName[idx] |
|
|
45 |
|
|
|
46 |
motifs.avgExp <- AverageExpression(scATAC.data, features = sig.motifNames, assays = "chromvar") |
|
|
47 |
motifs.avgExp <- motifs.avgExp$chromvar |
|
|
48 |
rownames(motifs.avgExp) <- sig.motifs$gene |
|
|
49 |
zScore <- function(x){(x - mean(x)) /sd(x)} |
|
|
50 |
motifs.avgExp.scale <- apply(motifs.avgExp, 1, zScore) %>% t() # row: TF; column: cell type |
|
|
51 |
mark.idx <- match(top5, rownames(motifs.avgExp.scale)) |
|
|
52 |
pdf("5.Motif/Analysis/chromVAR.cellType.sig.motifs.heatmap.pdf") |
|
|
53 |
ha <- rowAnnotation(link = anno_mark(at = mark.idx, labels = top5, link_width = unit(2, "mm"), labels_gp = gpar(fontsize = 5), padding = unit(1, "mm"))) |
|
|
54 |
Heatmap(motifs.avgExp.scale, name = "Deviation score", |
|
|
55 |
width = unit(6, "cm"), height = unit(8, "cm"), right_annotation = ha, |
|
|
56 |
row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8), |
|
|
57 |
show_row_dend = F, show_column_dend = F, show_column_names = T, show_row_names = F, |
|
|
58 |
heatmap_legend_param = list(labels_gp = gpar(fontsize = 8), by_row = T)) |
|
|
59 |
Heatmap(motifs.avgExp.scale, name = "Deviation score", |
|
|
60 |
width = unit(6, "cm"), height = unit(8, "cm"), left_annotation = ha, |
|
|
61 |
row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8), |
|
|
62 |
show_row_dend = F, show_column_dend = F, show_column_names = T, show_row_names = F, |
|
|
63 |
heatmap_legend_param = list(labels_gp = gpar(fontsize = 8), by_row = T)) |
|
|
64 |
dev.off() |
|
|
65 |
|
|
|
66 |
####Plot --- top5 cell-type specific TFs |
|
|
67 |
cellType.sig.motifs <- lapply(names(cellType.motifs.chromVAR), function(x){ |
|
|
68 |
sig <- cellType.motifs.chromVAR[[x]] %>% filter(avg_log2FC > 1 & p_val_adj < 0.05) %>% mutate(celltype = x) |
|
|
69 |
return(sig) |
|
|
70 |
}) |
|
|
71 |
names(cellType.sig.motifs) <- names(cellType.motifs.chromVAR) |
|
|
72 |
sig.motifs.num <- cellType.sig.motifs %>% bind_rows() |
|
|
73 |
sig.motifs.num <- as.data.frame(table(sig.motifs.num$celltype)) |
|
|
74 |
pdf("5.Motif/Analysis/chromVAR.cellType.sig.motifs.barplot.pdf") |
|
|
75 |
ggbarplot(sig.motifs.num, x="Var1", y="Freq", fill = "Var1", color = "Var1", |
|
|
76 |
sort.by.groups=FALSE, sort.val = "desc", #不按组排序 |
|
|
77 |
label = T, xlab = "", ylab = "Cell Number") + theme(legend.position="none") + rotate_x_text(30) |
|
|
78 |
dev.off() |
|
|
79 |
|
|
|
80 |
###################2.Screen cell-specific enriched motifs |
|
|
81 |
source(file = "/home/longzhilin/Analysis_Code/code/analysis.diff.survival.TCGA.R") |
|
|
82 |
DESeq2.normalized_counts <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.normalized_counts.rds") |
|
|
83 |
DESeq2.normalized_counts <- log2(DESeq2.normalized_counts+1) |
|
|
84 |
DESeq2.result <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.result.rds") |
|
|
85 |
clin.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/clin.data.rds") |
|
|
86 |
|
|
|
87 |
# extreact the avg_log2FC and FDR value |
|
|
88 |
order.TFs <- cellType.motifs.chromVAR[[1]]$gene |
|
|
89 |
motifs.FC <- sapply(cellType.motifs.chromVAR, function(x){ |
|
|
90 |
index <- match(order.TFs, x$gene) |
|
|
91 |
return(round(x$avg_log2FC[index], 2)) |
|
|
92 |
}) |
|
|
93 |
rownames(motifs.FC) <- order.TFs |
|
|
94 |
|
|
|
95 |
motifs.fdr <- sapply(cellType.motifs.chromVAR, function(x){ |
|
|
96 |
index <- match(order.TFs, x$gene) |
|
|
97 |
return(x$p_val_adj[index]) |
|
|
98 |
}) |
|
|
99 |
rownames(motifs.fdr) <- order.TFs |
|
|
100 |
a <- motifs.fdr |
|
|
101 |
a[which(a==0)] <- 2 |
|
|
102 |
motifs.fdr[which(motifs.fdr==0)] <- min(a)*0.001 ###4.940656e-324, Multiply the minimum value by 0.01, instead of 0 |
|
|
103 |
motifs.fdr <- round(-log10(motifs.fdr), 2) |
|
|
104 |
|
|
|
105 |
#3.deviation score |
|
|
106 |
all.motifs.avgExp <- AverageExpression(scATAC.data, features = motif.info$originName, assays = "chromvar") |
|
|
107 |
all.motifs.avgExp <- all.motifs.avgExp$chromvar |
|
|
108 |
rownames(all.motifs.avgExp) <- motif.info$TF |
|
|
109 |
|
|
|
110 |
################################## screening Strategy |
|
|
111 |
#1.deviation score: sd > median(sd) + 4*mad |
|
|
112 |
#2.Tumor cells: fdr> 0.0001 & avg_log2FC>=4 |
|
|
113 |
source(file = "/home/longzhilin/Analysis_Code/DataScience/MAD.R") |
|
|
114 |
avg.sd <- rowSds(all.motifs.avgExp) |
|
|
115 |
names(avg.sd) <- rownames(all.motifs.avgExp) |
|
|
116 |
sd.mad <- DoubleMAD(avg.sd) |
|
|
117 |
mad.threshold <- median(avg.sd) + 4*sd.mad[2] #Take the right |
|
|
118 |
|
|
|
119 |
#Only in cell type: FDR < 0.0001 & log2FC>=4; log2FC<1 in other cell types |
|
|
120 |
sig.label <- sapply(order.TFs, function(x){ |
|
|
121 |
if(avg.sd[x] > mad.threshold){ |
|
|
122 |
fdr <- motifs.fdr[x,] |
|
|
123 |
FC <- motifs.FC[x,] |
|
|
124 |
sig.fdr <- which(fdr > -log10(0.0001)) |
|
|
125 |
sig.fc <- which(FC >= 4) |
|
|
126 |
others <- which(FC >= 1) |
|
|
127 |
|
|
|
128 |
sig.idx <- intersect(sig.fdr, sig.fc) |
|
|
129 |
common.len <- length(sig.idx) |
|
|
130 |
|
|
|
131 |
if(common.len==1 & length(others)==1){ |
|
|
132 |
#if(common.len==1){ |
|
|
133 |
return("specific") |
|
|
134 |
}else{ |
|
|
135 |
return("common") |
|
|
136 |
} |
|
|
137 |
}else{ |
|
|
138 |
return("low variation") |
|
|
139 |
} |
|
|
140 |
}) |
|
|
141 |
cellType.high.specific.TF <- names(which(sig.label=="specific")) |
|
|
142 |
heatmapEM.fdr <- motifs.fdr[cellType.high.specific.TF,] |
|
|
143 |
heatmapEM.FC <- motifs.FC[cellType.high.specific.TF,] |
|
|
144 |
cellType.sd <- avg.sd[cellType.high.specific.TF] |
|
|
145 |
write.xlsx(list(FDR = heatmapEM.fdr, FC = heatmapEM.FC), file = "5.Motif/Analysis/cellType.specific.TFs.xlsx", sheetName = c("FDR", "Log2FC"), rowNames = T) |
|
|
146 |
|
|
|
147 |
#### Tumor specific TFs |
|
|
148 |
idx <- which(colnames(heatmapEM.FC) == "Tumor") |
|
|
149 |
index <- which(heatmapEM.FC[, idx] >= 4) |
|
|
150 |
tumor.specific.TFs <- data.frame(Name = rownames(heatmapEM.fdr)[index], FDR = heatmapEM.fdr[index, idx], avg_log2FC = heatmapEM.FC[index, idx], sd = cellType.sd[index]) # 49 |
|
|
151 |
heatmapEM.fdr.tumor <- heatmapEM.fdr[index,] |
|
|
152 |
heatmapEM.FC.tumor <- heatmapEM.FC[index,] |
|
|
153 |
saveRDS(tumor.specific.TFs, file = "5.Motif/Analysis/tumor.specific.TFs.rds") |
|
|
154 |
|
|
|
155 |
## Filter the TF result |
|
|
156 |
source(file = "/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R") |
|
|
157 |
##plot --- heatmap |
|
|
158 |
gene.labels <- c("HNF1A", "HNF1B", "HNF4G", "HNF4A", |
|
|
159 |
"HOXC5", "OTP", "ISL1", "VENTX", |
|
|
160 |
"HOXB5", "HOXB4", "HOXA4", "HOXB7", "HOXB3", "HOXB2", |
|
|
161 |
"BARX2", "POU6F2", "JUNB", "JUN", "JUND", "BATF") |
|
|
162 |
mark.idx <- match(gene.labels, rownames(heatmapEM.FC.tumor)) |
|
|
163 |
pdf("5.Motif/Analysis/tumor.specific.TFs.heatmap.pdf") |
|
|
164 |
#Plot.EnhancedVolcano(TF.DESeq2, x = "log2FoldChange", y = "padj", select.num = nrow(TF.DESeq2), drawConnectors = F) |
|
|
165 |
ha = rowAnnotation(FDR = anno_barplot(heatmapEM.fdr.tumor[,idx], border = F, gp = gpar(fill = "#e5e4e2"), direction = "reverse")) |
|
|
166 |
col_fun <- colorRamp2(c(min(heatmapEM.FC.tumor), 0, max(heatmapEM.FC.tumor)), c("blue", "white", "red")) |
|
|
167 |
ht2 <- Heatmap(heatmapEM.FC.tumor, left_annotation = ha, col = col_fun, |
|
|
168 |
show_row_dend = F, show_column_dend = F, |
|
|
169 |
column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 8), |
|
|
170 |
name = "avg_log2FC", width = unit(7.5, "cm"), height = unit(10, "cm"), |
|
|
171 |
show_heatmap_legend = T, |
|
|
172 |
heatmap_legend_param = list(col_fun = col_fun, title = "avg_log2FC", direction = "horizontal", title_position = "topcenter")) + |
|
|
173 |
rowAnnotation(link = anno_mark(at = mark.idx, labels = gene.labels, link_width = unit(3, "mm"), labels_gp = gpar(fontsize = 8), padding = unit(1, "mm"))) |
|
|
174 |
draw(ht2, heatmap_legend_side = "top") |
|
|
175 |
dev.off() |
|
|
176 |
|
|
|
177 |
# survival in TCGA-KIRC data |
|
|
178 |
write.xlsx(tumor.specific.TFs, file = "5.Motif/Analysis/tumor.specific.TFs.xlsx", sheetName = c("Tumor specific TFs"), rowNames = T) |
|
|
179 |
pdf("5.Motif/Analysis/tumor.specific.TFs.survival.pdf") |
|
|
180 |
tumor.TFs.signature.res <- analysis.diff.survival.TCGA(interest.gene = tumor.specific.TFs$Name, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, Box.plot = F, main = "tumor.specific.TFs", meta.signature = T, single.signature = T) |
|
|
181 |
dev.off() |
|
|
182 |
|
|
|
183 |
Idents(scRNA.data) <- scRNA.data$cellType_low |
|
|
184 |
tumor.TFs.avgExp <- AverageExpression(scRNA.data, features = tumor.specific.TFs$Name, assays = "RNA", slot = "data") |
|
|
185 |
tumor.TFs.avgExp <- scale(t(tumor.TFs.avgExp$RNA)) |
|
|
186 |
|
|
|
187 |
pdf("5.Motif/Analysis/tumor.TFs.expression.pdf") |
|
|
188 |
Heatmap(tumor.TFs.avgExp, |
|
|
189 |
show_row_dend = F, show_column_dend = F, |
|
|
190 |
column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 8), |
|
|
191 |
name = "Expression", height = unit(6, "cm"), |
|
|
192 |
show_heatmap_legend = T, |
|
|
193 |
heatmap_legend_param = list(title = "Expression", direction = "horizontal", title_position = "topcenter")) |
|
|
194 |
dev.off() |