Switch to side-by-side view

--- a
+++ b/RNA-seq/AnalysisPipeline/4.2.NMF.cCRE.R
@@ -0,0 +1,306 @@
+#' @description: Analyze the TFs involved in each NMF program
+set.seed(101)
+library(Seurat)
+library(Signac)
+library(openxlsx)
+library(dplyr)
+library(tibble)
+library(data.table)
+library(ComplexHeatmap)
+library(circlize)
+library(ggpubr)
+
+setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/4.NMF/SCT/SD")
+meta.Signature <- readRDS("meta.Signature.rds")
+cCREs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/Tumor_peak_gene_correlation.rds")
+
+source(file = "/home/longzhilin/Analysis_Code/code/analysis.diff.survival.TCGA.R")
+DESeq2.normalized_counts <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.normalized_counts.rds")
+DESeq2.normalized_counts <- log2(DESeq2.normalized_counts+1)
+DESeq2.result <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.result.rds")
+clin.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/clin.data.rds")
+
+scATAC.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/scATAC.data.pro.rds")
+scRNA.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds")
+
+####---------------------------------------------------1.Observe the expression of metaSiganture in scATAC data
+DefaultAssay(scATAC.data) <- "ACTIVITY"
+Tumor.scATAC <- subset(scATAC.data, subset = AnnotatedcellType == "Tumor")
+exp.matrix <- GetAssayData(Tumor.scATAC, slot = "scale.data")
+meta.genes <- data.frame(genes = unlist(meta.Signature[c(1:length(meta.Signature))]), type = c(rep("Meta-program1", 30), rep("Meta-program2", 30)))
+rownames(meta.genes) <- NULL
+idx <- match(meta.genes$genes, rownames(exp.matrix))
+meta.exp.matrix <- exp.matrix[na.omit(idx),]
+meta.genes <- meta.genes[which(!is.na(idx)),]
+row_split <- meta.genes$type
+column_split <- gsub("_.+", "", colnames(exp.matrix))
+pdf("TF_cCREs/MetaSignature.Expression.pdf")
+Heatmap(as.matrix(meta.exp.matrix), cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = F, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Gene activity"))   
+Heatmap(as.matrix(meta.exp.matrix), cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Gene activity"))   
+dev.off()
+
+####---------------------------------------------------2.The intersection of metaSignature and DEG, DAR
+DEGs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
+Tumor.DEGs <- DEGs %>% filter(cluster == "Tumor") %>% filter(abs(avg_log2FC)>0.25 & p_val_adj < 0.05)
+NMF.DEGs <- lapply(meta.Signature, function(x){
+    a <- sapply(x, function(y){
+        idx <- which(Tumor.DEGs$gene == y)
+        if(length(idx)>0){
+            return(Tumor.DEGs$avg_log2FC[idx])
+        }else{
+            return(NA)
+        }
+    })
+    return(a)
+})
+
+DARs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/4.Peak/cellType.all.DARs.rds")
+Tumor.DARs <- DARs %>% filter(cellType == "Tumor") %>% filter(abs(avg_log2FC)>0.25 & p_val_adj < 0.05)
+NMF.DARs <- lapply(meta.Signature, function(x){
+    a <- sapply(x, function(y){
+        idx <- which(Tumor.DARs$gene == y)
+        if(length(idx)>0){
+            return(max(Tumor.DARs$avg_log2FC[idx]))
+        }else{
+            return(NA)
+        }
+    })
+    return(a)
+})
+col_fun <- colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
+pdf("TF_cCREs/MetaSignature.DEG.DAR.pdf")
+a <- matrix(NMF.DEGs$metaProgram1)
+rownames(a) <- names(NMF.DEGs$metaProgram1)
+p1 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
+        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
+        row_names_gp = gpar(fontsize = 8))
+a <- matrix(NMF.DARs$metaProgram1)
+rownames(a) <- names(NMF.DARs$metaProgram1)
+p2 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
+        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
+        row_names_gp = gpar(fontsize = 8))
+p1+p2
+a <- matrix(NMF.DEGs$metaProgram2)
+rownames(a) <- names(NMF.DEGs$metaProgram2)
+p1 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
+        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
+        row_names_gp = gpar(fontsize = 8))
+a <- matrix(NMF.DARs$metaProgram2)
+rownames(a) <- names(NMF.DARs$metaProgram2)
+p2 <- Heatmap(a, name = "log2FC", col = col_fun, width = unit(1, "cm"), height = unit(8, "cm"), na_col = "grey", cluster_rows = F, cluster_columns = F,
+        show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, row_names_side = "left",
+        row_names_gp = gpar(fontsize = 8))
+p1+p2
+dev.off()
+
+####---------------------------------------------------3. peak in NMF program gene promoter
+# cCREs in NMF program
+meta.program.cCREs <- lapply(meta.Signature, function(x){
+    idx <- match(x, cCREs$peak1_nearestGene)
+    return(cCREs[na.omit(idx),])
+})
+write.xlsx(meta.program.cCREs, file = "TF_cCREs/meta.program.cCREs.xlsx", sheetName = names(meta.program.cCREs), rowNames = F)
+
+# TF target meta-program genes
+All.TF.targets <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/7.TF.analysis/targets/Tumor.All.TF.targets.rds")
+TF.metaProgram <- lapply(meta.Signature, function(x){
+    idx <- which(All.TF.targets$to %in% x)
+    return(All.TF.targets[na.omit(idx),])
+})
+
+pdf("TF_cCREs/TF.regulation.Type.pdf", height = 4)
+TF.regulate.number <- lapply(names(TF.metaProgram), function(y){
+    x <- TF.metaProgram[[y]]
+    a <- as.data.frame(table(x[, c("from","type")]), stringsAsFactors = F)
+    #取top30
+    regulation.num <- tapply(a$Freq, a$from, sum)
+    regulation.num <- sort(regulation.num, decreasing = T)
+    top30 <- names(regulation.num)[1:30]
+    top.data <- a[which(a$from %in% top30),]
+    top.data$from <- factor(top.data$from, levels = top30)
+    write.xlsx(arrange(top.data, from), file = paste0("TF_cCREs/TF.", y, ".top30.xlsx"), rowNames = F)
+    p <- ggbarplot(top.data, x = "from", y = "Freq", fill = "type", color = "type", lab.pos = "in",
+                   label = T, xlab = "", ylab = paste0("Target numbers in", y)) + theme(legend.position="top", axis.text.x = element_text(angle = 45, hjust = 1))
+    print(p)
+
+    # calculate the regulation number of each TFs
+    targetNum <- table(unique(x[, c("from","to")])[,1])
+    targetNum <- sort(targetNum, decreasing = T)
+    targetNum <- data.frame(TF = names(targetNum), targetNum = as.numeric(targetNum))
+    return(targetNum)
+})
+dev.off()
+names(TF.regulate.number) <- names(TF.metaProgram)
+# regulation number
+pdf("TF_cCREs/TF.regulation.number.pdf" , height = 3)
+p <- ggbarplot(TF.regulate.number$metaProgram1[1:30,], x = "TF", y = "targetNum", fill = "grey", color = "grey", lab.pos = "out", sort.val = "desc",
+                label = T, xlab = "", ylab = "Target numbers in meta-program1") + theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1))
+print(p)
+p <- ggbarplot(TF.regulate.number$metaProgram2[1:30,], x = "TF", y = "targetNum", fill = "grey", color = "grey", lab.pos = "out", sort.val = "desc",
+                label = T, xlab = "", ylab = "Target numbers in meta-program2") + theme(legend.position="top", axis.text.x = element_text(angle = 90, hjust = 1))
+print(p)
+dev.off()
+
+### Screen TF-specific regulation of a program
+program1 <- TF.regulate.number$metaProgram1[which(TF.regulate.number$metaProgram1$targetNum>=10),]
+program2 <- TF.regulate.number$metaProgram2[which(TF.regulate.number$metaProgram2$targetNum>=10),]
+spe.TF1 <- setdiff(program1$TF, program2$TF)
+spe.TF2 <- setdiff(program2$TF, program1$TF)
+spe.TFs <- c(spe.TF1, spe.TF2)
+data1 <- TF.regulate.number$metaProgram1[match(spe.TFs, TF.regulate.number$metaProgram1$TF),]
+data2 <- TF.regulate.number$metaProgram2[match(spe.TFs, TF.regulate.number$metaProgram2$TF),]
+TF.data <- cbind(data1, data2[,2])
+colnames(TF.data) <- c("TF", "Meta_program1", "Meta_program2")
+TF.data1 <- TF.data %>% filter(Meta_program1>=10 & Meta_program2<5)
+TF.data2 <- TF.data %>% filter(Meta_program1<5 & Meta_program2>10)
+
+####### screen the TF associated NMF program
+#### 1.combined the top 30 TF in each meta-program
+TF.regulations <- Reduce(function(x,y) merge(x, y, by="TF", all.x=TRUE), TF.regulate.number)
+colnames(TF.regulations)[2:ncol(TF.regulations)] <- names(TF.metaProgram)
+rownames(TF.regulations) <- TF.regulations$TF
+top.list <- sapply(TF.regulate.number, function(x){return(x$TF[1:30])})
+top.list <- unique(as.character(top.list))
+TF.regulation.data <- TF.regulations[top.list,]
+
+#### 2.combined the tumor enriched TF information
+Tumor.enriched.TFs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/5.Motif/Analysis/tumor.specific.TFs.rds")
+chromVAR.TFs <- TF.regulations[Tumor.enriched.TFs$Name,]
+chromVAR.TFs <- chromVAR.TFs[which(!is.na(chromVAR.TFs$TF)),]
+pdf("TF_cCREs/TF.cCREs.chromVAR.pdf")
+col_fun <- colorRamp2(c(0, 15, 30), c("#00B3B6", "white", "#FF2610"))
+a <- TF.regulation.data[,2:ncol(TF.regulation.data)]
+p <- Heatmap(a, show_row_dend = F, show_column_dend = F, col = col_fun,
+             width = unit(2, "cm"), name = "Target number",
+             row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
+             cell_fun = function(j, i, x, y, width, height, fill) {
+                grid.text(a[i, j], x, y, gp = gpar(fontsize = 7))})
+print(p)
+
+col_fun <- colorRamp2(c(0, 7, 14), c("#00B3B6", "white", "#FF2610"))
+a <- chromVAR.TFs[,2:ncol(chromVAR.TFs)]
+p <- Heatmap(a, show_row_dend = F, show_column_dend = F, col = col_fun, cluster_rows = F, cluster_columns = F,
+             width = unit(2, "cm"), height = unit(14, "cm"), name = "Target number",
+             row_names_gp = gpar(fontsize = 8), column_names_gp = gpar(fontsize = 8),
+             cell_fun = function(j, i, x, y, width, height, fill) {
+                grid.text(a[i, j], x, y, gp = gpar(fontsize = 7))})
+print(p)
+dev.off()
+
+#### 3. TF within DEG in tumor
+source(file = "/home/longzhilin/Analysis_Code/Visualization/Plot.EnhancedVolcano.R")
+scRNA.DEGs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/2.Cluster/AnnotateCellType/cellType.all.DEGs.rds")
+tumor.DEGs <- scRNA.DEGs %>% dplyr::filter(cluster == "Tumor")
+pdf("TF_cCREs/TF.metaProgram.DEGs.pdf")
+TF.regulate.DEGs <- lapply(TF.regulate.number, function(x){
+    idx <- match(x$TF, tumor.DEGs$gene)
+    x$avg_log2FC <- tumor.DEGs$avg_log2FC[idx]
+    x$p_val_adj <- tumor.DEGs$p_val_adj[idx]
+    return(x) 
+})
+##Draw a volcano map that regulates more than 5 genes
+plot.data <- TF.regulate.DEGs$metaProgram1[TF.regulate.DEGs$metaProgram1$targetNum >=5, ]
+plot.data$pointSize <- plot.data$targetNum*0.2 # 384
+p <- Plot.EnhancedVolcano(plot.data, x = "avg_log2FC", y = "p_val_adj", id.column = "TF", 
+                          FC.cutoff = 0.25, selected.showType = c("FC"), select.num = 10, 
+                          drawConnectors = T, pointSize = plot.data$pointSize, title = "Meta-program1 associated with TFs")
+p <- p + guides(colour = guide_legend(override.aes = list(size= c(1, 2, 3, 4))))
+print(p)
+
+plot.data <- TF.regulate.DEGs$metaProgram2[TF.regulate.DEGs$metaProgram2$targetNum >=5, ]
+plot.data$pointSize <- plot.data$targetNum*0.2 # 435
+p <- Plot.EnhancedVolcano(plot.data, x = "avg_log2FC", y = "p_val_adj", id.column = "TF", 
+                            FC.cutoff = 0.25, selected.showType = c("FC"), select.num = 10, 
+                            drawConnectors = T, pointSize = plot.data$pointSize, title = "Meta-program2 associated with TFs")
+p <- p + guides(colour = guide_legend(override.aes = list(size= c(1, 2, 3, 4))))
+print(p)
+write.xlsx(TF.regulate.DEGs, file = "TF_cCREs/TF.regulate.DEGs.xlsx", sheetName = names(TF.regulate.DEGs), rowNames = F)
+dev.off()
+
+interest.genes <- c("EGR1", "HSF4", "JUND", "ATF5", "ZBTB7A", "SPI1", "KLF2", "NR0B1", "ZNF263", "SP1")
+pdf("TF_cCREs/NMF.TF.survival.pdf")
+NMF.TCGA <- analysis.diff.survival.TCGA(interest.gene = interest.genes, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, main = "NMF TFs", Box.plot = F, meta.signature = F, single.signature = T)
+dev.off()
+
+####---- plot sankey 
+# look TF motif
+meta.program1.specific <- setdiff(TF.regulate.number$metaProgram1$TF, TF.regulate.number$metaProgram2$TF)
+idx <- match(meta.program1.specific, TF.regulate.number$metaProgram1$TF)
+metaProgram1.regulate.number <- TF.regulate.number$metaProgram1[idx,]
+meta.program2.specific <- setdiff(TF.regulate.number$metaProgram2$TF, TF.regulate.number$metaProgram1$TF)
+idx <- match(meta.program2.specific, TF.regulate.number$metaProgram2$TF)
+metaProgram2.regulate.number <- TF.regulate.number$metaProgram2[idx,]
+metaProgram1.regulate.number <- filter(metaProgram1.regulate.number, targetNum >= 30*0.1)
+metaProgram2.regulate.number <- filter(metaProgram2.regulate.number, targetNum >= 30*0.1)
+# plot sankey
+program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)]), stringsAsFactors = F) %>% filter(Freq>0) %>% filter(from %in% meta.program1.specific)
+program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)]), stringsAsFactors = F) %>% filter(Freq>0) %>% filter(from %in% meta.program2.specific)
+source("/home/longzhilin/Analysis_Code/Visualization/Plot.Sankey.R")
+Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.specific.sankey")
+Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.specific.sankey")
+
+# top30 TF motifs
+program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% TF.regulate.number$metaProgram1$TF[1:30])
+program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% TF.regulate.number$metaProgram2$TF[1:30])
+Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.top30.sankey")
+Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.top30.sankey")
+
+# OTP, VENTX, ISL1, HOXC5
+program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% c("HOXC5", "ISL1", "VENTX", "OTP"))
+program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% c("HOXC5", "ISL1", "VENTX", "OTP"))
+Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.TF4.sankey", nodeWidth = 15, fontSize = 12, width = 600, height = 300)
+Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.TF4.sankey", nodeWidth = 15, fontSize = 12, width = 600, height = 300)
+
+# cancer specific TFs
+program1 <- as.data.frame(table(TF.metaProgram$metaProgram1[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% Tumor.enriched.TFs$Name)
+program2 <- as.data.frame(table(TF.metaProgram$metaProgram2[,c(1,2)])) %>% filter(Freq>0) %>% filter(from %in% Tumor.enriched.TFs$Name)
+Plot.Sankey(data = program1, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram1.tumorTFs.sankey")
+Plot.Sankey(data = program2, saveDir = file.path(getwd(), "TF_cCREs"), filename = "TF.metaProgram2.tumorTFs.sankey")
+
+#### ---- Coverage plot: VEGFA & ALDOB, PCK1
+source(file = "/home/longzhilin/Analysis_Code/Plot_colorPaletters.R")
+source(file = "/home/longzhilin/Analysis_Code/SingleCell/user.CoveragePlot.R")
+df <- readRDS(file = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/Tumor_peak_gene_correlation.rds")
+cCREs.genes <- na.omit(unique(c(df$peak1_nearestGene, df$peak2_nearestGene))) # 5407
+interest.genes <- c("VEGFA", "EGR1", "ALDOB", "PCK1", "CXCL14", "CRYAB")
+NMF.cCREs <- intersect(interest.genes, cCREs.genes)
+NMF.cCREs.info <- apply(df, 1, function(x){
+  gene1 <- as.character(na.omit(x[7]))
+  gene2 <- as.character(na.omit(x[10]))
+  idx1 <- which(gene1 %in% NMF.cCREs)
+  idx2 <- which(gene2 %in% NMF.cCREs)
+  if(length(idx1)>0 | length(idx2)>0){
+      return(t(data.frame(c(x))))
+  }else{
+    return(data.frame())
+  }
+})
+NMF.cCREs.info <- Reduce(rbind, NMF.cCREs.info)
+rownames(NMF.cCREs.info) <- NULL
+write.table(NMF.cCREs.info, file = "TF_cCREs/Tumor.NMF.cCREs.info.csv", sep = ",", quote=FALSE, row.names=F)
+saveRDS(NMF.cCREs.info, file = "TF_cCREs/Tumor.NMF.cCREs.info.rds")
+
+gene_model <- readRDS("/data/ExtraDisk/sdd/longzhilin/Data/ReferenceFasta/Annotation/gene_model.rds")
+#CRYAB coverage plot
+interest.genes <- c("VEGFA", "EGR1", "ALDOB", "CXCL14", "CRYAB")
+Tumor.cicero.conns <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/ccans/tumor.conns.rds")
+Tumor.ccans <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC/6.Co-Accessible/ccans/tumor.ccans.rds")
+Tumor.ccans$Peak <- gsub("_", "-", Tumor.ccans$Peak)
+rownames(Tumor.ccans) <- Tumor.ccans$Peak
+DefaultAssay(scATAC.data) <- "Peaks"
+pdf("TF_cCREs/Tumor.NMF.coveragePlot.pdf")
+res <- user.CoveragePlot(scATAC.object = scATAC.data,
+                        interest.genes = interest.genes, 
+                        interested.conns = NMF.cCREs.info, 
+                        ccans = Tumor.ccans,
+                        gene_model = gene_model,
+                        coaccess_cutoff = 0.2,
+                        cicero = T,
+                        idents = "Tumor",
+                        heights = c(2,1,1,2))
+dev.off()
+pdf("TF_cCREs/Tumor.NMF.gene.pdf")
+FeaturePlot(scRNA.data, features = interest.genes, cols = c("lightgrey", "red"), reduction = 'tsne')
+FeaturePlot(scRNA.data, features = interest.genes, cols = c("lightgrey", "red"), reduction = 'umap') 
+VlnPlot(scRNA.data, features = interest.genes, group.by = "cellType_low", ncol = 2, pt.size = 0)
+dev.off()
\ No newline at end of file