RenalTumor / Git / [40a513] /ATAC/AnalysisPipeline/8.1.Immune.CD8T.TF.target.R

Models:
AlyssaS/
RenalTumor
Downloads: 1
[40a513]: / ATAC / AnalysisPipeline / 8.1.Immune.CD8T.TF.target.R
History
Download this file
537 lines (457 with data), 25.1 kB

#' @description: regulated network in CD8+ T cell

library(Signac)
library(Seurat)
library(harmony)
library(SeuratWrappers)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v86) #---GRCh38 (hg38)
library(ggplot2)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(clustree)
library(vegan)
library(reshape2)
library(openxlsx)
library(tidyverse)
library(data.table)
set.seed(101)
library(future)
plan("multiprocess", workers = 5)
options(future.globals.maxSize = 50000 * 1024^2) #50G
library(GenomicRanges)
library(ggpubr)
library(BSgenome.Hsapiens.UCSC.hg38)
library(chromVARmotifs)
data("human_pwms_v2")
source(file = "/home/longzhilin/Analysis_Code/Plot_colorPaletters.R")
source("/home/longzhilin/Analysis_Code/SingleCell/scATAC.Integrate.multipleSample.R")
source(file = "/home/longzhilin/Analysis_Code/SingleCell/Integrate.scRNA.scATAC.R")
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
source(file = "/home/longzhilin/Analysis_Code/code/ratio.plot.R")

sub.scATAC.harmony <- readRDS("8.Immune/CD8T/sub.scATAC.harmony.pro.rds")
sub.scATAC.harmony$cellType <- sub.scATAC.harmony$cellType3
Idents(sub.scATAC.harmony) <- sub.scATAC.harmony$cellType
cellType.colors <- c("#F8766D", "#00BA38", "#619CFF")
DefaultAssay(sub.scATAC.harmony) <- "Peaks"

sub.scRNA.harmony <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/5.Immune/CD8T/sub.scRNA.harmony.pro.rds")
sub.scRNA.harmony$cellType <- sub.scRNA.harmony$cellType3
Idents(sub.scRNA.harmony) <- sub.scRNA.harmony$cellType
DefaultAssay(sub.scRNA.harmony) <- "RNA"

####--------------------------------------------------------------------------- 5. TF regulated network
library(ggseqlogo)
interest.genes <- "CD27"
celltype <- "Exhaustion"

# motif information
motif.info <- data.frame(originName = names(sub.scATAC.harmony@assays$Peaks@motifs@motif.names), TF = unlist(sub.scATAC.harmony@assays$Peaks@motifs@motif.names))
rownames(motif.info) <- NULL
# subset by celltype
scATAC.sub <- subset(sub.scATAC.harmony, cellType == celltype) # 376
scRNA.sub <- subset(sub.scRNA.harmony, cellType == celltype)  # 1803

scATAC.sub <- FindTopFeatures(scATAC.sub, min.cutoff=ncol(scATAC.sub)*0.05) # present in what % of cells
scRNA.sub <- FindVariableFeatures(scRNA.sub, nfeatures = 3000)

# use chipseeker annotate each peak in scATAC-seq
peak.info <- readRDS("4.Peak/peak.annotation.simple.ChIPseeker.rds")

PWMatrixToProbMatrix <- function(x){
        if (class(x) != "PWMatrix") stop("x must be a TFBSTools::PWMatrix object")
        m <- (exp(as(x, "matrix"))) * TFBSTools::bg(x)/sum(TFBSTools::bg(x))
        m <- t(t(m)/colSums(m))
        m
}

################################################################################
# Find promoters & genes with accessible TF binding sites
################################################################################
# settings for featureplot
order_values <- TRUE
reduction <- 'umap'

res <- sapply(interest.genes, function(x){
    motif_name <- x
    motif_ID <- motif.info$originName[match(x, motif.info$TF)] # e.g., ENSG00000008196_LINE2_TFAP2B_D_N1

    # get all regions with motif binding site:
    DefaultAssay(scATAC.sub) <- "Peaks"
    motif_accessible <- Motifs(scATAC.sub)@data[, motif_ID]
    motif_accessible <- names(motif_accessible)[motif_accessible > 0]

    # subset this list by top features
    motif_accessible <- motif_accessible[motif_accessible %in% VariableFeatures(scATAC.sub)]

    # which of these peaks are at promoters?
    motif_accessible_promoters <- motif_accessible[motif_accessible %in% peak.info$peaks[which(peak.info$originType == "Promoter (<=1kb)")]]
    # which genes are associated with these promoters?
    motif_target_genes <- peak.info$SYMBOL[match(motif_accessible_promoters, peak.info$peaks)]

    # optional:
    # which of these genes are highly expressed in snRNA-seq?
    motif_target_genes.RNA <- motif_target_genes[motif_target_genes %in% VariableFeatures(scRNA.sub)] %>% as.character %>% unique

    # remove genes that are not in the seurat obj
    motif_target_genes <- as.character(motif_target_genes)
    motif_target_genes <- motif_target_genes[motif_target_genes %in% rownames(scRNA.sub)]

    # output 
    target.genelists <- list(data.frame(motif_target_genes.RNA), data.frame(motif_target_genes))
    write.xlsx(target.genelists, file = paste0("8.Immune/CD8T/TF.analysis/", x, "_targetGenes.xlsx"), sheetName = c("overlap with hvgs in scRNA-seq", "all target genes"))
    
    ################################################################################
    # Compute module score for these target genes
    ################################################################################
    gene_list <- list(motif_target_genes)
    names(gene_list) <- paste0(motif_name, '_targets')

    # add in whole data
    sub.scRNA.harmony <- AddModuleScore(
        sub.scRNA.harmony,
        features=gene_list,
        pool = rownames(sub.scRNA.harmony),
        name=paste0(motif_name, '_targets')
    )

    ################################################################################
    # plot module score feature plot:
    ################################################################################

    # plot promoter target gene module score for this TF:
    p1 <- FeaturePlot(sub.scRNA.harmony, features = paste0(motif_name, '_targets1'), order = order_values, reduction = reduction) +
    scale_color_gradient2(low=scales::muted('blue'), mid='white', high=scales::muted('red')) +
    theme(plot.margin = unit(c(0, 0, 0, 0), "in"))  +
    ggtitle(paste0(motif_name, ' target score'))
    pdf(paste0('8.Immune/CD8T/TF.analysis/', celltype, '_', motif_name, '_targets.pdf'), width=5, height=4, useDingbats=FALSE)
    print(p1)

    # plot chromVAR deviation for this TF:
    DefaultAssay(sub.scATAC.harmony) <- "chromvar"
    p2 <- FeaturePlot(sub.scATAC.harmony, features = gsub("_", "-", motif_ID), order = order_values, reduction = reduction) +
    scale_color_gradient2(
        low=rgb(32, 67, 37, maxColorValue=255), mid='white', high=rgb(58, 22, 72, maxColorValue=255)) +
        theme(plot.margin = unit(c(0, 0, 0, 0), "in")) +
        ggtitle(paste0(motif_name, ' motif'))
    print(p2)

    ################################################################################
    # cluster violin plot for target expression modules
    ################################################################################
    my_comparisons <- as.list(as.data.frame(combn(levels(sub.scRNA.harmony$cellType),2)))
    a <- sub.scRNA.harmony@meta.data[, c(paste0(motif_name, '_targets1'), "cellType")]
    colnames(a) <- c("Signature score", "group")
    p3 <- ggboxplot(a, x = "group", y = "Signature score", title = x, color = "black", fill = "group", add = "jitter", add.params = list(color = "black", size = 0.1)) + 
         xlab("")  + ylab(paste0(motif_name, ' targets')) + rotate_x_text(angle = 45, vjust = 1) + scale_fill_manual(values = cellType.colors) + 
         stat_compare_means(comparisons = my_comparisons, label = "p.signif") + NoLegend()
    print(p3)

    DefaultAssay(sub.scATAC.harmony) <- "chromvar"
    a <- FetchData(sub.scATAC.harmony, vars = gsub("_", "-", motif_ID), slot = "data")
    a$group <- sub.scATAC.harmony$cellType
    my_comparisons <- as.list(as.data.frame(combn(levels(sub.scATAC.harmony$cellType),2)))
    colnames(a) <- c("Signature score", "group")
    p4 <- ggboxplot(a, x = "group", y = "Signature score", title = x, color = "black", fill = "group", add = "jitter", add.params = list(color = "black", size = 0.1)) + 
         xlab("")  + ylab(paste0(motif_name, ' deviation')) + rotate_x_text(angle = 45, vjust = 1) + scale_fill_manual(values = cellType.colors) + 
         stat_compare_means(comparisons = my_comparisons, label = "p.signif") + NoLegend()
    print(p4)

    # motif logo
    PPM <- PWMatrixToProbMatrix(human_pwms_v2[[motif_ID]])
    p5 <- ggseqlogo(PPM) + ggtitle(motif_name) + theme(plot.title = element_text(hjust = 0.5))
    print(p5)    
    dev.off()
})

################################################################################
# co-accessibility
################################################################################
library(cicero)
library(monocle3)
# input Seurat object
CreateCiceroCDS <- function(seurat_atac_obj, assay = "Peaks", reduction_method = c("UMAP", "tSNE")) {
    DefaultAssay(seurat_atac_obj) <- assay
    count_data <- GetAssayData(seurat_atac_obj, slots = "counts")
    summ <- summary(count_data)
    rownames(count_data) <- gsub("-", "_", rownames(count_data))
    summ_frame <- data.frame(peak = rownames(count_data)[summ$i],
                            cell.id = colnames(count_data)[summ$j],
                            count = summ$x)

    # create cell data set object with cicero constructor
    input_cds <- make_atac_cds(summ_frame, binarize = TRUE)
    input_cds <- detect_genes(input_cds)
    input_cds <- input_cds[Matrix::rowSums(exprs(input_cds)) != 0,] 
    input_cds <- estimate_size_factors(input_cds)
    input_cds <- preprocess_cds(input_cds, method="LSI")
    input_cds <- reduce_dimension(input_cds, reduction_method=reduction_method, preprocess_method="LSI")

    if(reduction_method == "UMAP"){
        umap_coords <- reducedDims(input_cds)$UMAP
        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
    }

    if(reduction_method == "tSNE"){
        umap_coords <- reducedDims(input_cds)$tSNE
        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
    }
    return(cicero_cds)
}

FindCiceroConnection <- function(seurat_atac_obj, cds, chrom = NULL) {
    # require default assay is peak assay
    
    # get the chromosome sizes from the Seurat object
    genome <- seqlengths(seurat_atac_obj)

    # run on the whole genome
    levels <- paste0("chr",c(seq(1,22),"X","Y"))
    genome <- genome[levels]

    # convert chromosome sizes to a dataframe
    genome.df <- data.frame("chr" = names(genome), "length" = genome)
    conns <- run_cicero(cds, genomic_coords = genome.df) 
    return(conns)
}

Get_Ccans <- function(seurat_atac_obj, clusterID = NULL, reduction_method = "UMAP") {
  if(!is.null(clusterID)) {
    print(paste0("Subsetting seurat object for: ",clusterID))
    seurat_atac_obj <- subset(seurat_atac_obj, ident = clusterID) # create a subset
  } 
  # convert seurat objects into cicero cell datasets in preparation for detecting cicero connections
  print("Preparing Cicero CDS")
  ciceroCds <- CreateCiceroCDS(seurat_atac_obj, reduction_method = reduction_method)
  
  # generate disease-specific CCANS for all chromsomes of a particular celltype
  print("Finding Cicero connections")
  conns <- FindCiceroConnection(seurat_atac_obj = seurat_atac_obj, cds = ciceroCds)
  
  # Finding cis-Co-accessibility Networks (CCANS)
  CCAN_assigns <- generate_ccans(conns)

  # create a column that identifies which connections belong to a CCAN
  ccan1 <- left_join(conns, CCAN_assigns, by=c("Peak1" = "Peak"), all.x=TRUE)
  colnames(ccan1)[4] <- "CCAN1"
  ccan2 <- left_join(conns, CCAN_assigns, by=c("Peak2" = "Peak"), all.x=TRUE)
  colnames(ccan2)[4] <- "CCAN2"
  df <- cbind(ccan1, CCAN2=ccan2$CCAN2) %>%
    dplyr::mutate(CCAN = ifelse(CCAN1 == CCAN2, CCAN1, 0)) %>%
    dplyr::select(-CCAN1, -CCAN2)
  res <- list(ciceroCds = ciceroCds, conns = conns, CCAN_assigns = CCAN_assigns, df = df)
  return(res)
}

####################################################################
# subset the snATACseq object by disease state within celltype of interest and find ccans
idents <- levels(sub.scATAC.harmony)
# 不分析mast cell，由于细胞数目小于100
list.ccan <- lapply(idents, function(ident) {Get_Ccans(ident, seurat_atac_obj = sub.scATAC.harmony)})

# write to file
names(list.ccan) <- idents
lapply(names(list.ccan), function(x) {
  fwrite(list.ccan[[x]]$df, file = paste0("8.Immune/CD8T/Co-Accessible/ciceroConns.",x,".csv"), row.names = F)
})
saveRDS(list.ccan, file = "8.Immune/CD8T/Co-Accessible/cellType.ccans.rds")

# calculate a global CCAN for all celltypes
ccan <- Get_Ccans(seurat_atac_obj = sub.scATAC.harmony)
fwrite(ccan$df, file = "8.Immune/CD8T/Co-Accessible/ciceroConns.allcells.csv", row.names = F)
saveRDS(ccan, file = "8.Immune/CD8T/Co-Accessible/ccans/All.ccans.rds")

#### 提取感兴趣的ccan
ciceroCds <- list.ccan[["celltype"]]$ciceroCds
conns <- list.ccan[["celltype"]]$conns
ccans <- list.ccan[["celltype"]]$CCAN_assigns
saveRDS(ciceroCds, file = paste0("8.Immune/CD8T/Co-Accessible/", celltype,".ciceroCds.rds"))
saveRDS(conns, file = paste0("8.Immune/CD8T/Co-Accessible/", celltype,".conns.rds"))
saveRDS(ccans, file = paste0("8.Immune/CD8T/Co-Accessible/", celltype,".ccans.rds"))
fwrite(list.ccan[[celltype]]$df, file = paste0("8.Immune/CD8T/Co-Accessible/ciceroConns.", celltype, ".csv"), row.names = F)

################################################################################
# annotated ccans
################################################################################
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
# read in cell-type-specific CCAN and filter by coaccess threshold > 0.2
dar_files <- list.files("8.Immune/CD8T/Co-Accessible", recursive = TRUE, pattern = "ciceroConns.*.csv$", full.names = TRUE)
idents <- gsub(".*ciceroConns.", "", dar_files)
idents <- gsub(".csv", "", idents)
list.dar <- lapply(dar_files, function(file_path) {
  fread(file_path) %>%
    dplyr::filter(coaccess > 0.2) %>%
    dplyr::select(Peak1, Peak2)
})
names(list.dar) <- idents

# convert the DAR to GRanges objects to annotate
list.dar.peak1.gr <- lapply(seq(list.dar), function(x) {
  df <- list.dar[[x]]
  gr <- StringToGRanges(df$Peak1, sep = c("_","_"))
  return(gr)
})
names(list.dar.peak1.gr) <- idents

list.dar.peak2.gr <- lapply(seq(list.dar), function(x) {
  df <- list.dar[[x]]
  gr <- StringToGRanges(df$Peak2, sep = c("_","_"))
  return(gr)
})
names(list.dar.peak2.gr) <- idents

# annotate the list of GRanges DAR for each cell type with the peak location
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
list.peak1.Anno <- lapply(list.dar.peak1.gr, annotatePeak, TxDb = txdb,
                        tssRegion = c(-3000, 3000), annoDb="org.Hs.eg.db", verbose = FALSE)
list.peak1.loc <- lapply(seq(list.peak1.Anno), function(x) {
  loc <- list.peak1.Anno[[x]]@anno$annotation 
  loc <- str_split(loc, pattern = " ", simplify = TRUE)[,1]
  loc <- str_replace(loc, pattern="3'", replacement="3p_UTR")
  loc <- str_replace(loc, pattern="5'", replacement="5p_UTR")
  loc <- str_replace(loc, pattern="Distal", replacement="Intergenic")
  return(loc)
})

list.peak2.Anno <- lapply(list.dar.peak2.gr, annotatePeak, TxDb = txdb,
                          tssRegion = c(-3000, 3000), annoDb="org.Hs.eg.db", verbose = FALSE)
list.peak2.loc <- lapply(seq(list.peak2.Anno), function(x) {
  loc <- list.peak2.Anno[[x]]@anno$annotation 
  loc <- str_split(loc, pattern = " ", simplify = TRUE)[,1]
  loc <- str_replace(loc, pattern="3'", replacement="3p_UTR")
  loc <- str_replace(loc, pattern="5'", replacement="5p_UTR")
  loc <- str_replace(loc, pattern="Distal", replacement="Intergenic")
  return(loc)
})

# create df with peak1 and peak2 locations
list.peaks.loc.df <- lapply(seq(idents), function(x) {
  df <- cbind(list.peak1.loc[[x]], list.peak2.loc[[x]]) %>% as.data.frame()
  colnames(df) <- c("Peak1","Peak2")
  counts <- dplyr::count(df, Peak1, Peak2) %>% as.data.frame()
})
names(list.peaks.loc.df) <- idents
save(list.peak1.Anno, list.peak2.Anno, file = "8.Immune/CD8T/Co-Accessible/ciceroConns.annotation.RData")

################################################################################
# identify cis-regulated elements
################################################################################
####one of the peaks overlap a gene's promoter (1kb)
peak1.anno <- list.peak1.Anno[[celltype]]@anno
peak2.anno <- list.peak2.Anno[[celltype]]@anno
conns <- fread(paste0("8.Immune/CD8T/Co-Accessible/ciceroConns.", celltype, ".csv"))
conns <- conns %>% dplyr::filter(coaccess > 0.2) # 249226 peak-pairs
conns <- conns %>% mutate(peak1_type = peak1.anno$annotation, peak1_distanceToTSS = peak1.anno$distanceToTSS, peak1_nearestGene = peak1.anno$SYMBOL,
                                      peak2_type = peak2.anno$annotation, peak2_distanceToTSS = peak2.anno$distanceToTSS, peak2_nearestGene = peak2.anno$SYMBOL)
bg <- na.omit(unique(c(conns$peak1_nearestGene, conns$peak2_nearestGene))) # 作为background gene：17567
saveRDS(bg, file = paste0("8.Immune/CD8T/Co-Accessible/bg.", celltype,".rds")) # coaccess > 0.2

#筛选出包含promoter的conns
cCREs <- conns %>% dplyr::filter(str_detect(peak1_type, pattern = "Promoter \\(<=1kb\\)"))
cCREs <- cCREs[!is.na(cCREs$peak1_nearestGene),]
conns_list <- group_split(cCREs, peak1_nearestGene) #基于peak1_nearestGene分组，通常按字母顺序排序
names(conns_list) <- sort(unique(cCREs$peak1_nearestGene))

sub.scRNA <- subset(sub.scRNA.harmony, subset = cellType == "Exhausted")
source(file = "/home/longzhilin/Analysis_Code/code/Filter.gene.R")
sub.scRNA <- Filter.gene(sub.scRNA)
cTargetGenes <- names(conns_list)[names(conns_list) %in% rownames(sub.scRNA)] # 11453 genes

# loop over all genes to compute correlation between accessibility & expression
df <- data.frame()
corr_list <- lapply(cTargetGenes, function(x){
  cat("calculated gene:", x, "\n")
  # subset by cur_gene
  cur_conns <- conns_list[[x]]
  cur_conns <- cur_conns[!(cur_conns$Peak2 %in% cur_conns$Peak1),]
  cur_conns <- subset(cur_conns, coaccess >= 0.2)

  # skip this gene if there are no co-accessible connections
  if(nrow(cur_conns) == 0){return(data.frame())}
  return(cur_conns)
})
# combine into one df and remove incomplete entries:
df <- Reduce(rbind, corr_list)
df$peak2_type <- gsub(" .*", "", df$peak2_type)
df$peak2_type <- gsub("'", "_UTR", df$peak2_type)

# distance between peak and target gene
peak1_ranges <- StringToGRanges(gsub("_", "-", df$Peak1))
peak2_ranges <- StringToGRanges(gsub("_", "-", df$Peak2))
df$distance_bp <- abs(start(peak1_ranges) - start(peak2_ranges))
# write df to file:
write.table(df, file = paste0("8.Immune/CD8T/Co-Accessible/", celltype,"_peak_gene_correlation.csv"), sep = ",", quote=FALSE, row.names=F)
saveRDS(df, file = paste0("8.Immune/CD8T/Co-Accessible/", celltype,"_peak_gene_correlation.rds"))

#### defined the enhancer
cCREs <- df
#gene-cCRE links
cCREs <- cCREs %>% subset(distance_bp/1000>=10 & peak2_type != 'Promoter')
cCREs$target_gene <- cCREs$peak1_nearestGene
cCREs$peak_gene <- paste0(cCREs$Peak2, '_', cCREs$peak1_nearestGene)

################################################################################
# Construct TF nets
################################################################################
library(igraph)
library(RColorBrewer)

TFs.info <- motif.info[match(interest.genes, motif.info$TF),]

# for each motif, find genes:
use_variable_genes <- T
motif_list <- list()
edge_df.list <- data.frame()
vertex_df.list <- data.frame()
for(i in 1:nrow(TFs.info)){
  motif_name <- TFs.info[i,2]
  motif_ID <- TFs.info[i,1]

  # get list of promoter and enhancer targets of these TFs
  DefaultAssay(scATAC.sub) <- "Peaks"
  motif_accessible <- Motifs(scATAC.sub)@data[,motif_ID]
  motif_accessible <- names(motif_accessible)[motif_accessible > 0]
  motif_accessible <- motif_accessible[motif_accessible %in% VariableFeatures(scATAC.sub)]  
  motif_accessible_promoters <- motif_accessible[motif_accessible %in% peak.info$peaks[which(peak.info$originType == "Promoter (<=1kb)")]]
  motif_target_genes <- peak.info$SYMBOL[match(motif_accessible_promoters, peak.info$peaks)]
  motif_target_genes <- as.character(motif_target_genes)

  # variable genes only?
  if(use_variable_genes){
    motif_target_genes <- motif_target_genes[motif_target_genes %in% VariableFeatures(scRNA.sub)] %>% as.character %>% unique
  } else{
    motif_target_genes<- motif_target_genes %>% as.character %>% unique
  }

  # enhancer target genes
  motif_accessible_enhancers <- motif_accessible[motif_accessible %in% cCREs$Peak2]
  motif_cCREs <- subset(cCREs, Peak2 %in% motif_accessible_enhancers)
  motif_enhancers_target_genes <- motif_cCREs$target_gene

  # variable genes only?
  if(use_variable_genes){
    motif_enhancers_target_genes <- motif_enhancers_target_genes[motif_enhancers_target_genes %in% VariableFeatures(scRNA.sub)] %>% as.character %>% unique
  }else{
    motif_enhancers_target_genes<- motif_enhancers_target_genes %>% as.character %>% unique
  }

  vertex_df <- data.frame(name = c(motif_name, as.character(unique(c(motif_enhancers_target_genes, motif_target_genes)))))

  # check if there are promoter targets:
  if(length(motif_target_genes) > 0){
    promoter_edge_df <- data.frame(
      from = motif_name,
      to = as.character(motif_target_genes),
      type = 'promoter'
    )
  } else{promoter_edge_df <- data.frame()}

  # check if there are enhancer targets:
  if(length(motif_enhancers_target_genes) > 0){
    enhancer_edge_df <- data.frame(
      from = motif_name,
      to = as.character(motif_enhancers_target_genes),
      type = 'enhancer'
    )
  } else{enhancer_edge_df <- data.frame()}

  #cur_edge_df <- rbind(cur_promoter_edge_df, cur_enhancer_edge_df, cur_repressors_edge_df)
  edge_df <- rbind(promoter_edge_df, enhancer_edge_df)

  edge_df.list <- rbind(edge_df.list, edge_df)
  vertex_df.list <- rbind(vertex_df.list, vertex_df)
}

vertex_df.list <- data.frame(name=na.omit(as.character(unique(vertex_df.list$name))))
vertex_df.list$name <- as.character(vertex_df.list$name)
edge_df.list <- na.omit(edge_df.list)

################################################################################
# visual settings for network
################################################################################
# add DEGs
sub.scRNA.DEGs <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/5.Immune/CD8T/cellType.DE.pro.rds")
sub.scRNA.DEGs <- filter(sub.scRNA.DEGs, abs(avg_log2FC) > 0.25 & p_val_adj < 0.05)
cellType.DEGs <- sub.scRNA.DEGs[which(sub.scRNA.DEGs$cluster == celltype),]

# color vertices based on Diagnosis DEGs:
up_genes <- cellType.DEGs %>% filter(p_val_adj < 0.05 & avg_log2FC >= 0.5) %>% .$gene
down_genes <- cellType.DEGs %>% filter(p_val_adj < 0.05 & avg_log2FC < -0.5) %>% .$gene

# remove labels if gene is not DE, or not a TF:
de_targets <- as.character(vertex_df.list$name[vertex_df.list$name %in% unique(c(up_genes, down_genes))])
vertex_df.list$label <- ifelse(vertex_df.list$name %in% de_targets, vertex_df.list$name, '')
vertex_df.list$label <- ifelse(vertex_df.list$name %in% as.character(motif.info$TF), vertex_df.list$name, vertex_df.list$label)
#vertex_df.list$label <- ifelse(vertex_df.list$name %in% gwas_genes, vertex_df.list$name, vertex_df.list$label)

# set node color based on control vs AD DEGs:
vertex_df.list$color <- ifelse(vertex_df.list$name %in% as.character(motif.info$TF), '#1E90FF', "#FFFFFF")
vertex_df.list$color <- ifelse(vertex_df.list$name %in% up_genes, "#E87D72",  "#FFFFFF")
vertex_df.list$color <- ifelse(vertex_df.list$name %in% down_genes, '#55BCC2', vertex_df.list$color)
vertex_df.list$color <- ifelse(vertex_df.list$name %in% as.character(motif.info$TF), '#1E90FF',vertex_df.list$color)
#vertex_df.list$color <- ifelse(vertex_df.list$name %in% gwas_genes, 'orange', vertex_df.list$color)

# italics font for genes:
vertex_df.list$font <- ifelse(vertex_df.list$name %in% as.character(motif.info$TF), 2, 1)

# set size to larger if the gene is a TF:
vertex_df.list$size <- ifelse(vertex_df.list$name %in% as.character(motif.info$TF), 10, 2)

other_tfs <- setdiff(motif.info$TF, interest.genes)

#vertex_df.list$size <- ifelse((vertex_df.list$name %in% de_targets | vertex_df.list$name %in% gwas_genes | vertex_df.list$name %in% other_tfs), 5, 2)
vertex_df.list$size <- ifelse((vertex_df.list$name %in% de_targets | vertex_df.list$name %in% other_tfs), 5, 2)
vertex_df.list$size <- ifelse(vertex_df.list$name %in% interest.genes, 10, vertex_df.list$size)

################################################################################
# graph all nodes
################################################################################
enhancer_color <- '#FFC125'
promoter_color <- '#00CED1'

# repressor_color <- 'gray'

g <- igraph::graph_from_data_frame(edge_df.list, directed=TRUE, vertices = vertex_df.list)
l <- layout_with_fr(g)

edge_colors <- ifelse(E(g)$type == 'promoter', promoter_color, enhancer_color)
# edge_colors <- ifelse(E(g)$type == 'repressor', repressor_color, edge_colors)

pdf(paste0('8.Immune/CD8T/TF.analysis/', celltype, '_TF_interaction_graph.pdf'), width=10, height=10, useDingbats=FALSE)
plot(
  g, layout=l,
  vertex.size=vertex_df.list$size,
  edge.color=edge_colors,
  edge.alpha=0.5,
  vertex.color=vertex_df.list$color,
  vertex.label=vertex_df.list$label, vertex.label.family='Helvetica', vertex.label.font=vertex_df.list$font,
  vertex.label.color = 'black',
  vertex.frame.color = "grey",
  vertex.alpha = 0.8,
  edge.arrow.size=0.25
)
dev.off()