a b/ATAC/AnalysisPipeline/6.1.cis-coassessibility.R
1
#' @description: identify the cis-coassessibility peaks
2
3
library(Signac)
4
library(Seurat)
5
library(SeuratWrappers)
6
library(ggplot2)
7
library(patchwork)
8
library(cicero)
9
library(monocle3)
10
set.seed(101)
11
library(ggpubr)
12
library(openxlsx)
13
library(ComplexHeatmap)
14
library(circlize)
15
library(tidyverse)
16
library(data.table)
17
library(future)
18
plan("multiprocess", workers = 10)
19
options(future.globals.maxSize = 50000 * 1024^2) #50G
20
21
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
22
23
scATAC.data <- readRDS("scATAC.data.pro.rds")
24
Idents(scATAC.data) <- scATAC.data$AnnotatedcellType
25
DefaultAssay(scATAC.data) <- "Peaks"
26
27
# input Seurat object
28
CreateCiceroCDS <- function(seurat_atac_obj, assay = "Peaks", reduction_method = c("UMAP", "tSNE")) {
29
    DefaultAssay(seurat_atac_obj) <- assay
30
    count_data <- GetAssayData(seurat_atac_obj, slots = "counts")
31
    summ <- summary(count_data)
32
    rownames(count_data) <- gsub("-", "_", rownames(count_data))
33
    summ_frame <- data.frame(peak = rownames(count_data)[summ$i],
34
                            cell.id = colnames(count_data)[summ$j],
35
                            count = summ$x)
36
37
    # create cell data set object with cicero constructor
38
    input_cds <- make_atac_cds(summ_frame, binarize = TRUE)
39
    input_cds <- detect_genes(input_cds)
40
    input_cds <- input_cds[Matrix::rowSums(exprs(input_cds)) != 0,] 
41
    input_cds <- estimate_size_factors(input_cds)
42
    input_cds <- preprocess_cds(input_cds, method="LSI")
43
    input_cds <- reduce_dimension(input_cds, reduction_method=reduction_method, preprocess_method="LSI")
44
45
    if(reduction_method == "UMAP"){
46
        umap_coords <- reducedDims(input_cds)$UMAP
47
        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
48
    }
49
50
    if(reduction_method == "tSNE"){
51
        umap_coords <- reducedDims(input_cds)$tSNE
52
        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
53
    }
54
    return(cicero_cds)
55
}
56
57
FindCiceroConnection <- function(seurat_atac_obj, cds, chrom = NULL) {
58
    # require default assay is peak assay
59
    
60
    # get the chromosome sizes from the Seurat object
61
    genome <- seqlengths(seurat_atac_obj)
62
63
    # run on the whole genome
64
    levels <- paste0("chr",c(seq(1,22),"X","Y"))
65
    genome <- genome[levels]
66
67
    # convert chromosome sizes to a dataframe
68
    genome.df <- data.frame("chr" = names(genome), "length" = genome)
69
    conns <- run_cicero(cds, genomic_coords = genome.df) 
70
    return(conns)
71
}
72
73
Get_Ccans <- function(seurat_atac_obj, clusterID = NULL, reduction_method = "UMAP") {
74
  if(!is.null(clusterID)) {
75
    print(paste0("Subsetting seurat object for: ",clusterID))
76
    seurat_atac_obj <- subset(seurat_atac_obj, ident = clusterID) # create a subset
77
  } 
78
  # convert seurat objects into cicero cell datasets in preparation for detecting cicero connections
79
  print("Preparing Cicero CDS")
80
  ciceroCds <- CreateCiceroCDS(seurat_atac_obj, reduction_method = reduction_method)
81
  
82
  # generate disease-specific CCANS for all chromsomes of a particular celltype
83
  print("Finding Cicero connections")
84
  conns <- FindCiceroConnection(seurat_atac_obj = seurat_atac_obj, cds = ciceroCds)
85
  
86
  # Finding cis-Co-accessibility Networks (CCANS)
87
  CCAN_assigns <- generate_ccans(conns)
88
89
  # create a column that identifies which connections belong to a CCAN
90
  ccan1 <- left_join(conns, CCAN_assigns, by=c("Peak1" = "Peak"), all.x=TRUE)
91
  colnames(ccan1)[4] <- "CCAN1"
92
  ccan2 <- left_join(conns, CCAN_assigns, by=c("Peak2" = "Peak"), all.x=TRUE)
93
  colnames(ccan2)[4] <- "CCAN2"
94
  df <- cbind(ccan1, CCAN2=ccan2$CCAN2) %>%
95
    dplyr::mutate(CCAN = ifelse(CCAN1 == CCAN2, CCAN1, 0)) %>%
96
    dplyr::select(-CCAN1, -CCAN2)
97
  res <- list(ciceroCds = ciceroCds, conns = conns, CCAN_assigns = CCAN_assigns, df = df)
98
  return(res)
99
}
100
101
####################################################################
102
# subset the snATACseq object by disease state within celltype of interest and find ccans
103
idents <- levels(scATAC.data)
104
# remove mast cell due to the cell number less than 100
105
idents <- idents[-12]
106
list.ccan <- lapply(idents, function(ident) {Get_Ccans(ident, seurat_atac_obj = scATAC.data)})
107
108
# write to file
109
names(list.ccan) <- idents
110
lapply(names(list.ccan), function(x) {
111
  fwrite(list.ccan[[x]]$df, file = paste0("6.Co-Accessible/ccans/ciceroConns.",x,".csv"), row.names = F)
112
})
113
saveRDS(list.ccan, file = "6.Co-Accessible/ccans/cellType.ccans.rds")
114
115
# calculate a global CCAN for all celltypes
116
ccan <- Get_Ccans(seurat_atac_obj = scATAC.data)
117
fwrite(ccan$df, file = "6.Co-Accessible/ccans/ciceroConns.allcells.csv", row.names = F)
118
saveRDS(ccan, file = "6.Co-Accessible/ccans/All.ccans.rds")
119
120
#### extract the tumor-ccan
121
tumor.ciceroCds <- list.ccan$Tumor$ciceroCds
122
tumor.conns <- list.ccan$Tumor$conns
123
tumor.ccans <- list.ccan$Tumor$CCAN_assigns
124
saveRDS(tumor.ciceroCds, file = "6.Co-Accessible/ccans/tumor.ciceroCds.rds")
125
saveRDS(tumor.conns, file = "6.Co-Accessible/ccans/tumor.conns.rds")