Switch to side-by-side view

--- a
+++ b/ATAC/AnalysisPipeline/6.1.cis-coassessibility.R
@@ -0,0 +1,125 @@
+#' @description: identify the cis-coassessibility peaks
+
+library(Signac)
+library(Seurat)
+library(SeuratWrappers)
+library(ggplot2)
+library(patchwork)
+library(cicero)
+library(monocle3)
+set.seed(101)
+library(ggpubr)
+library(openxlsx)
+library(ComplexHeatmap)
+library(circlize)
+library(tidyverse)
+library(data.table)
+library(future)
+plan("multiprocess", workers = 10)
+options(future.globals.maxSize = 50000 * 1024^2) #50G
+
+setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
+
+scATAC.data <- readRDS("scATAC.data.pro.rds")
+Idents(scATAC.data) <- scATAC.data$AnnotatedcellType
+DefaultAssay(scATAC.data) <- "Peaks"
+
+# input Seurat object
+CreateCiceroCDS <- function(seurat_atac_obj, assay = "Peaks", reduction_method = c("UMAP", "tSNE")) {
+    DefaultAssay(seurat_atac_obj) <- assay
+    count_data <- GetAssayData(seurat_atac_obj, slots = "counts")
+    summ <- summary(count_data)
+    rownames(count_data) <- gsub("-", "_", rownames(count_data))
+    summ_frame <- data.frame(peak = rownames(count_data)[summ$i],
+                            cell.id = colnames(count_data)[summ$j],
+                            count = summ$x)
+
+    # create cell data set object with cicero constructor
+    input_cds <- make_atac_cds(summ_frame, binarize = TRUE)
+    input_cds <- detect_genes(input_cds)
+    input_cds <- input_cds[Matrix::rowSums(exprs(input_cds)) != 0,] 
+    input_cds <- estimate_size_factors(input_cds)
+    input_cds <- preprocess_cds(input_cds, method="LSI")
+    input_cds <- reduce_dimension(input_cds, reduction_method=reduction_method, preprocess_method="LSI")
+
+    if(reduction_method == "UMAP"){
+        umap_coords <- reducedDims(input_cds)$UMAP
+        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
+    }
+
+    if(reduction_method == "tSNE"){
+        umap_coords <- reducedDims(input_cds)$tSNE
+        cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
+    }
+    return(cicero_cds)
+}
+
+FindCiceroConnection <- function(seurat_atac_obj, cds, chrom = NULL) {
+    # require default assay is peak assay
+    
+    # get the chromosome sizes from the Seurat object
+    genome <- seqlengths(seurat_atac_obj)
+
+    # run on the whole genome
+    levels <- paste0("chr",c(seq(1,22),"X","Y"))
+    genome <- genome[levels]
+
+    # convert chromosome sizes to a dataframe
+    genome.df <- data.frame("chr" = names(genome), "length" = genome)
+    conns <- run_cicero(cds, genomic_coords = genome.df) 
+    return(conns)
+}
+
+Get_Ccans <- function(seurat_atac_obj, clusterID = NULL, reduction_method = "UMAP") {
+  if(!is.null(clusterID)) {
+    print(paste0("Subsetting seurat object for: ",clusterID))
+    seurat_atac_obj <- subset(seurat_atac_obj, ident = clusterID) # create a subset
+  } 
+  # convert seurat objects into cicero cell datasets in preparation for detecting cicero connections
+  print("Preparing Cicero CDS")
+  ciceroCds <- CreateCiceroCDS(seurat_atac_obj, reduction_method = reduction_method)
+  
+  # generate disease-specific CCANS for all chromsomes of a particular celltype
+  print("Finding Cicero connections")
+  conns <- FindCiceroConnection(seurat_atac_obj = seurat_atac_obj, cds = ciceroCds)
+  
+  # Finding cis-Co-accessibility Networks (CCANS)
+  CCAN_assigns <- generate_ccans(conns)
+
+  # create a column that identifies which connections belong to a CCAN
+  ccan1 <- left_join(conns, CCAN_assigns, by=c("Peak1" = "Peak"), all.x=TRUE)
+  colnames(ccan1)[4] <- "CCAN1"
+  ccan2 <- left_join(conns, CCAN_assigns, by=c("Peak2" = "Peak"), all.x=TRUE)
+  colnames(ccan2)[4] <- "CCAN2"
+  df <- cbind(ccan1, CCAN2=ccan2$CCAN2) %>%
+    dplyr::mutate(CCAN = ifelse(CCAN1 == CCAN2, CCAN1, 0)) %>%
+    dplyr::select(-CCAN1, -CCAN2)
+  res <- list(ciceroCds = ciceroCds, conns = conns, CCAN_assigns = CCAN_assigns, df = df)
+  return(res)
+}
+
+####################################################################
+# subset the snATACseq object by disease state within celltype of interest and find ccans
+idents <- levels(scATAC.data)
+# remove mast cell due to the cell number less than 100
+idents <- idents[-12]
+list.ccan <- lapply(idents, function(ident) {Get_Ccans(ident, seurat_atac_obj = scATAC.data)})
+
+# write to file
+names(list.ccan) <- idents
+lapply(names(list.ccan), function(x) {
+  fwrite(list.ccan[[x]]$df, file = paste0("6.Co-Accessible/ccans/ciceroConns.",x,".csv"), row.names = F)
+})
+saveRDS(list.ccan, file = "6.Co-Accessible/ccans/cellType.ccans.rds")
+
+# calculate a global CCAN for all celltypes
+ccan <- Get_Ccans(seurat_atac_obj = scATAC.data)
+fwrite(ccan$df, file = "6.Co-Accessible/ccans/ciceroConns.allcells.csv", row.names = F)
+saveRDS(ccan, file = "6.Co-Accessible/ccans/All.ccans.rds")
+
+#### extract the tumor-ccan
+tumor.ciceroCds <- list.ccan$Tumor$ciceroCds
+tumor.conns <- list.ccan$Tumor$conns
+tumor.ccans <- list.ccan$Tumor$CCAN_assigns
+saveRDS(tumor.ciceroCds, file = "6.Co-Accessible/ccans/tumor.ciceroCds.rds")
+saveRDS(tumor.conns, file = "6.Co-Accessible/ccans/tumor.conns.rds")
\ No newline at end of file