RenalTumor / Git / [40a513] /RNA-seq/AnalysisPipeline/4.2.NMF.cCRE.R

Models:
AlyssaS/
RenalTumor
Downloads: 1
[40a513]: / RNA-seq / AnalysisPipeline / 4.2.NMF.cCRE.R
History
Download this file
306 lines (283 with data), 18.7 kB

#' @description: Analyze the TFs involved in each NMF program
set.seed(101)
library(Seurat)
library(Signac)
library(openxlsx)
library(dplyr)
library(tibble)
library(data.table)
library(ComplexHeatmap)
library(circlize)
library(ggpubr)

setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/4.NMF/SCT/SD")
meta.Signature <- readRDS("meta.Signature.rds")
cCREs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/Tumor_peak_gene_correlation.rds")

source(file = "/home/longzhilin/Analysis_Code/code/analysis.diff.survival.TCGA.R")
DESeq2.normalized_counts <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.normalized_counts.rds")
DESeq2.normalized_counts <- log2(DESeq2.normalized_counts+1)
DESeq2.result <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.result.rds")
clin.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/clin.data.rds")

scATAC.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/scATAC.data.pro.rds")
scRNA.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds")

####---------------------------------------------------1.Observe the expression of metaSiganture in scATAC data
DefaultAssay(scATAC.data) <- "ACTIVITY"
Tumor.scATAC <- subset(scATAC.data, subset = AnnotatedcellType == "Tumor")
exp.matrix <- GetAssayData(Tumor.scATAC, slot = "scale.data")
meta.genes <- data.frame(genes = unlist(meta.Signature[c(1:length(meta.Signature))]), type = c(rep("Meta-program1", 30), rep("Meta-program2", 30)))
rownames(meta.genes) <- NULL
idx <- match(meta.genes$genes, rownames(exp.matrix))
meta.exp.matrix <- exp.matrix[na.omit(idx),]
meta.genes <- meta.genes[which(!is.na(idx)),]
row_split <- meta.genes$type
column_split <- gsub("_.+", "", colnames(exp.matrix))
pdf("TF_cCREs/MetaSignature.Expression.pdf")
Heatmap(as.matrix(meta.exp.matrix), cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = F, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Gene activity"))   
Heatmap(as.matrix(meta.exp.matrix), cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Gene activity"))   
dev.off()

####---------------------------------------------------2.The intersection of metaSignature and DEG, DAR
DEGs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
Tumor.DEGs <- DEGs %>% filter(cluster == "Tumor") %>% filter(abs(avg_log2FC)>0.25 & p_val_adj < 0.05)
NMF.DEGs <- lapply(meta.Signature, function(x){
    a <- sapply(x, function(y){
        idx <- which(Tumor.DEGs$gene == y)
        if(length(idx)>0){
            return(Tumor.DEGs$avg_log2FC[idx])
        }else{
            return(NA)
        }
    })
    return(a)
})

DARs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/4.Peak/cellType.all.DARs.rds")
Tumor.DARs <- DARs %>% filter(cellType == "Tumor") %>% filter(abs(avg_log2FC)>0.25 & p_val_adj < 0.05)
NMF.DARs <- lapply(meta.Signature, function(x){
    a <- sapply(x, function(y){
        idx <- which(Tumor.DARs$gene == y)
        if(length(idx)>0){
            return(max(Tumor.DARs$avg_log2FC[idx]))
        }else{
            return(NA)
        }
    })
    return(a)
})
col_fun <- colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
pdf("TF_cCREs/MetaSignature.DEG.DAR.pdf")
a <- matrix(NMF.DEGs$metaProgram1)
rownames(a) <- names(NMF.DEGs$metaProgram1)
p1 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
        row_names_gp = gpar(fontsize = 8))
a <- matrix(NMF.DARs$metaProgram1)
rownames(a) <- names(NMF.DARs$metaProgram1)
p2 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
        row_names_gp = gpar(fontsize = 8))
p1+p2
a <- matrix(NMF.DEGs$metaProgram2)
rownames(a) <- names(NMF.DEGs$metaProgram2)
p1 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
        row_names_gp = gpar(fontsize = 8))
a <- matrix(NMF.DARs$metaProgram2)
rownames(a) <- names(NMF.DARs$metaProgram2)
p2 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
        row_names_gp = gpar(fontsize = 8))
p1+p2
dev.off()

####---------------------------------------------------3. peak in NMF program gene promoter
# cCREs in NMF program
meta.program.cCREs <- lapply(meta.Signature, function(x){
    idx <- match(x, cCREs$peak1_nearestGene)
    return(cCREs[na.omit(idx),])
})
write.xlsx(meta.program.cCREs, file = "TF_cCREs/meta.program.cCREs.xlsx", sheetName = names(meta.program.cCREs), rowNames = F)

# TF target meta-program genes
All.TF.targets <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/7.TF.analysis/targets/Tumor.All.TF.targets.rds")
TF.metaProgram <- lapply(meta.Signature, function(x){
    idx <- which(All.TF.targets$to %in% x)
    return(All.TF.targets[na.omit(idx),])
})

pdf("TF_cCREs/TF.regulation.Type.pdf", height = 4)
TF.regulate.number <- lapply(names(TF.metaProgram), function(y){
    x <- TF.metaProgram[[y]]
    a <- as.data.frame(table(x[, c("from","type")]), stringsAsFactors = F)
    #取top30
    regulation.num <- tapply(a$Freq, a$from, sum)
    regulation.num <- sort(regulation.num, decreasing = T)
    top30 <- names(regulation.num)[1:30]
    top.data <- a[which(a$from %in% top30),]
    top.data$from <- factor(top.data$from, levels = top30)
    write.xlsx(arrange(top.data, from), file = paste0("TF_cCREs/TF.", y, ".top30.xlsx"), rowNames = F)
    p <- ggbarplot(top.data, x = "from", y = "Freq", fill = "type", color = "type", lab.pos = "in",
                   label = T, xlab = "", ylab = paste0("Target numbers in", y)) + theme(legend.position="top", axis.text.x = element_text(angle = 45, hjust = 1))
    print(p)

    # calculate the regulation number of each TFs
    targetNum <- table(unique(x[, c("from","to")])[,1])
    targetNum <- sort(targetNum, decreasing = T)
    targetNum <- data.frame(TF = names(targetNum), targetNum = as.numeric(targetNum))
    return(targetNum)
})
dev.off()
names(TF.regulate.number) <- names(TF.metaProgram)
# regulation number
pdf("TF_cCREs/TF.regulation.number.pdf" , height = 3)
p <- ggbarplot(TF.regulate.number$metaProgram1[1:30,], x = "TF", y = "targetNum", fill = "grey", color = "grey", lab.pos = "out", sort.val = "desc",
                label = T, xlab = "", ylab = "Target numbers in meta-program1") + theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1))
print(p)
p <- ggbarplot(TF.regulate.number$metaProgram2[1:30,], x = "TF", y = "targetNum", fill = "grey", color = "grey", lab.pos = "out", sort.val = "desc",
                label = T, xlab = "", ylab = "Target numbers in meta-program2") + theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1))
print(p)
dev.off()

### Screen TF-specific regulation of a program
program1 <- TF.regulate.number$metaProgram1[which(TF.regulate.number$metaProgram1$targetNum>=10),]
program2 <- TF.regulate.number$metaProgram2[which(TF.regulate.number$metaProgram2$targetNum>=10),]
spe.TF1 <- setdiff(program1$TF, program2$TF)
spe.TF2 <- setdiff(program2$TF, program1$TF)
spe.TFs <- c(spe.TF1, spe.TF2)
data1 <- TF.regulate.number$metaProgram1[match(spe.TFs, TF.regulate.number$metaProgram1$TF),]
data2 <- TF.regulate.number$metaProgram2[match(spe.TFs, TF.regulate.number$metaProgram2$TF),]
TF.data <- cbind(data1, data2[,2])
colnames(TF.data) <- c("TF", "Meta_program1", "Meta_program2")
TF.data1 <- TF.data %>% filter(Meta_program1>=10 & Meta_program2<5)
TF.data2 <- TF.data %>% filter(Meta_program1<5 & Meta_program2>10)

####### screen the TF associated NMF program
#### 1.combined the top 30 TF in each meta-program
TF.regulations <- Reduce(function(x,y) merge(x, y, by="TF", all.x=TRUE), TF.regulate.number)
colnames(TF.regulations)[2:ncol(TF.regulations)] <- names(TF.metaProgram)
rownames(TF.regulations) <- TF.regulations$TF
top.list <- sapply(TF.regulate.number, function(x){return(x$TF[1:30])})
top.list <- unique(as.character(top.list))
TF.regulation.data <- TF.regulations[top.list,]

#### 2.combined the tumor enriched TF information
Tumor.enriched.TFs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/5.Motif/Analysis/tumor.specific.TFs.rds")
chromVAR.TFs <- TF.regulations[Tumor.enriched.TFs$Name,]
chromVAR.TFs <- chromVAR.TFs[which(!is.na(chromVAR.TFs$TF)),]
pdf("TF_cCREs/TF.cCREs.chromVAR.pdf")
col_fun <- colorRamp2(c(0, 15, 30), c("#00B3B6", "white", "#FF2610"))
a <- TF.regulation.data[,2:ncol(TF.regulation.data)]
p <- Heatmap(a, show_row_dend = F, show_column_dend = F, col = col_fun,
             width = unit(2, "cm"), name = "Target number",
             row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
             cell_fun = function(j, i, x, y, width, height, fill) {
                grid.text(a[i, j], x, y, gp = gpar(fontsize = 7))})
print(p)

col_fun <- colorRamp2(c(0, 7, 14), c("#00B3B6", "white", "#FF2610"))
a <- chromVAR.TFs[,2:ncol(chromVAR.TFs)]
p <- Heatmap(a, show_row_dend = F, show_column_dend = F, col = col_fun, cluster_rows = F, cluster_columns = F,
             width = unit(2, "cm"), height = unit(14, "cm"), name = "Target number",
             row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
             cell_fun = function(j, i, x, y, width, height, fill) {
                grid.text(a[i, j], x, y, gp = gpar(fontsize = 7))})
print(p)
dev.off()

#### 3. TF within DEG in tumor
source(file = "/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R")
scRNA.DEGs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
tumor.DEGs <- scRNA.DEGs %>% dplyr::filter(cluster == "Tumor")
pdf("TF_cCREs/TF.metaProgram.DEGs.pdf")
TF.regulate.DEGs <- lapply(TF.regulate.number, function(x){
    idx <- match(x$TF, tumor.DEGs$gene)
    x$avg_log2FC <- tumor.DEGs$avg_log2FC[idx]
    x$p_val_adj <- tumor.DEGs$p_val_adj[idx]
    return(x) 
})
##Draw a volcano map that regulates more than 5 genes
plot.data <- TF.regulate.DEGs$metaProgram1[TF.regulate.DEGs$metaProgram1$targetNum >=5, ]
plot.data$pointSize <- plot.data$targetNum*0.2 # 384
p <- Plot.EnhancedVolcano(plot.data, x = "avg_log2FC", y = "p_val_adj", id.column = "TF", 
                          FC.cutoff = 0.25, selected.showType = c("FC"), select.num = 10, 
                          drawConnectors = T, pointSize = plot.data$pointSize, title = "Meta-program1 associated with TFs")
p <- p + guides(colour = guide_legend(override.aes = list(size= c(1, 2, 3, 4))))
print(p)

plot.data <- TF.regulate.DEGs$metaProgram2[TF.regulate.DEGs$metaProgram2$targetNum >=5, ]
plot.data$pointSize <- plot.data$targetNum*0.2 # 435
p <- Plot.EnhancedVolcano(plot.data, x = "avg_log2FC", y = "p_val_adj", id.column = "TF", 
                            FC.cutoff = 0.25, selected.showType = c("FC"), select.num = 10, 
                            drawConnectors = T, pointSize = plot.data$pointSize, title = "Meta-program2 associated with TFs")
p <- p + guides(colour = guide_legend(override.aes = list(size= c(1, 2, 3, 4))))
print(p)
write.xlsx(TF.regulate.DEGs, file = "TF_cCREs/TF.regulate.DEGs.xlsx", sheetName = names(TF.regulate.DEGs), rowNames = F)
dev.off()

interest.genes <- c("EGR1", "HSF4", "JUND", "ATF5", "ZBTB7A", "SPI1", "KLF2", "NR0B1", "ZNF263", "SP1")
pdf("TF_cCREs/NMF.TF.survival.pdf")
NMF.TCGA <- analysis.diff.survival.TCGA(interest.gene = interest.genes, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, main = "NMF TFs", Box.plot = F, meta.signature = F, single.signature = T)
dev.off()

####---- plot sankey 
# look TF motif
meta.program1.specific <- setdiff(TF.regulate.number$metaProgram1$TF, TF.regulate.number$metaProgram2$TF)
idx <- match(meta.program1.specific, TF.regulate.number$metaProgram1$TF)
metaProgram1.regulate.number <- TF.regulate.number$metaProgram1[idx,]
meta.program2.specific <- setdiff(TF.regulate.number$metaProgram2$TF, TF.regulate.number$metaProgram1$TF)
idx <- match(meta.program2.specific, TF.regulate.number$metaProgram2$TF)
metaProgram2.regulate.number <- TF.regulate.number$metaProgram2[idx,]
metaProgram1.regulate.number <- filter(metaProgram1.regulate.number, targetNum >= 30*0.1)
metaProgram2.regulate.number <- filter(metaProgram2.regulate.number, targetNum >= 30*0.1)
# plot sankey
program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)]), stringsAsFactors = F) %>% filter(Freq>0) %>% filter(from %in% meta.program1.specific)
program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)]), stringsAsFactors = F) %>% filter(Freq>0) %>% filter(from %in% meta.program2.specific)
source("/home/longzhilin/Analysis_Code/Visualization/Plot.Sankey.R")
Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.specific.sankey")
Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.specific.sankey")

# top30 TF motifs
program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% TF.regulate.number$metaProgram1$TF[1:30])
program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% TF.regulate.number$metaProgram2$TF[1:30])
Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.top30.sankey")
Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.top30.sankey")

# OTP, VENTX, ISL1, HOXC5
program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% c("HOXC5", "ISL1", "VENTX", "OTP"))
program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% c("HOXC5", "ISL1", "VENTX", "OTP"))
Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.TF4.sankey", nodeWidth = 15, fontSize = 12, width = 600, height = 300)
Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.TF4.sankey", nodeWidth = 15, fontSize = 12, width = 600, height = 300)

# cancer specific TFs
program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% Tumor.enriched.TFs$Name)
program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% Tumor.enriched.TFs$Name)
Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.tumorTFs.sankey")
Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.tumorTFs.sankey")

#### ---- Coverage plot: VEGFA & ALDOB, PCK1
source(file = "/home/longzhilin/Analysis_Code/Plot_colorPaletters.R")
source(file = "/home/longzhilin/Analysis_Code/SingleCell/user.CoveragePlot.R")
df <- readRDS(file = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/Tumor_peak_gene_correlation.rds")
cCREs.genes <- na.omit(unique(c(df$peak1_nearestGene, df$peak2_nearestGene))) # 5407
interest.genes <- c("VEGFA", "EGR1", "ALDOB", "PCK1", "CXCL14", "CRYAB")
NMF.cCREs <- intersect(interest.genes, cCREs.genes)
NMF.cCREs.info <- apply(df, 1, function(x){
  gene1 <- as.character(na.omit(x[7]))
  gene2 <- as.character(na.omit(x[10]))
  idx1 <- which(gene1 %in% NMF.cCREs)
  idx2 <- which(gene2 %in% NMF.cCREs)
  if(length(idx1)>0 | length(idx2)>0){
      return(t(data.frame(c(x))))
  }else{
    return(data.frame())
  }
})
NMF.cCREs.info <- Reduce(rbind, NMF.cCREs.info)
rownames(NMF.cCREs.info) <- NULL
write.table(NMF.cCREs.info, file = "TF_cCREs/Tumor.NMF.cCREs.info.csv", sep = ",", quote=FALSE, row.names=F)
saveRDS(NMF.cCREs.info, file = "TF_cCREs/Tumor.NMF.cCREs.info.rds")

gene_model <- readRDS("/data/ExtraDisk/sdd/longzhilin/Data/ReferenceFasta/Annotation/gene_model.rds")
#CRYAB coverage plot
interest.genes <- c("VEGFA", "EGR1", "ALDOB", "CXCL14", "CRYAB")
Tumor.cicero.conns <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/ccans/tumor.conns.rds")
Tumor.ccans <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/ccans/tumor.ccans.rds")
Tumor.ccans$Peak <- gsub("_", "-", Tumor.ccans$Peak)
rownames(Tumor.ccans) <- Tumor.ccans$Peak
DefaultAssay(scATAC.data) <- "Peaks"
pdf("TF_cCREs/Tumor.NMF.coveragePlot.pdf")
res <- user.CoveragePlot(scATAC.object = scATAC.data,
                        interest.genes = interest.genes, 
                        interested.conns = NMF.cCREs.info, 
                        ccans = Tumor.ccans,
                        gene_model = gene_model,
                        coaccess_cutoff = 0.2,
                        cicero = T,
                        idents = "Tumor",
                        heights = c(2,1,1,2))
dev.off()
pdf("TF_cCREs/Tumor.NMF.gene.pdf")
FeaturePlot(scRNA.data, features = interest.genes, cols = c("lightgrey", "red"), reduction = 'tsne')
FeaturePlot(scRNA.data, features = interest.genes, cols = c("lightgrey", "red"), reduction = 'umap') 
VlnPlot(scRNA.data, features = interest.genes, group.by = "cellType_low", ncol = 2, pt.size = 0)
dev.off()