[40a513]: / ATAC / AnalysisPipeline / 6.1.cis-coassessibility.R

Download this file

125 lines (106 with data), 5.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#' @description: identify the cis-coassessibility peaks
library(Signac)
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(cicero)
library(monocle3)
set.seed(101)
library(ggpubr)
library(openxlsx)
library(ComplexHeatmap)
library(circlize)
library(tidyverse)
library(data.table)
library(future)
plan("multiprocess", workers = 10)
options(future.globals.maxSize = 50000 * 1024^2) #50G
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
scATAC.data <- readRDS("scATAC.data.pro.rds")
Idents(scATAC.data) <- scATAC.data$AnnotatedcellType
DefaultAssay(scATAC.data) <- "Peaks"
# input Seurat object
CreateCiceroCDS <- function(seurat_atac_obj, assay = "Peaks", reduction_method = c("UMAP", "tSNE")) {
DefaultAssay(seurat_atac_obj) <- assay
count_data <- GetAssayData(seurat_atac_obj, slots = "counts")
summ <- summary(count_data)
rownames(count_data) <- gsub("-", "_", rownames(count_data))
summ_frame <- data.frame(peak = rownames(count_data)[summ$i],
cell.id = colnames(count_data)[summ$j],
count = summ$x)
# create cell data set object with cicero constructor
input_cds <- make_atac_cds(summ_frame, binarize = TRUE)
input_cds <- detect_genes(input_cds)
input_cds <- input_cds[Matrix::rowSums(exprs(input_cds)) != 0,]
input_cds <- estimate_size_factors(input_cds)
input_cds <- preprocess_cds(input_cds, method="LSI")
input_cds <- reduce_dimension(input_cds, reduction_method=reduction_method, preprocess_method="LSI")
if(reduction_method == "UMAP"){
umap_coords <- reducedDims(input_cds)$UMAP
cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
}
if(reduction_method == "tSNE"){
umap_coords <- reducedDims(input_cds)$tSNE
cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates=umap_coords)
}
return(cicero_cds)
}
FindCiceroConnection <- function(seurat_atac_obj, cds, chrom = NULL) {
# require default assay is peak assay
# get the chromosome sizes from the Seurat object
genome <- seqlengths(seurat_atac_obj)
# run on the whole genome
levels <- paste0("chr",c(seq(1,22),"X","Y"))
genome <- genome[levels]
# convert chromosome sizes to a dataframe
genome.df <- data.frame("chr" = names(genome), "length" = genome)
conns <- run_cicero(cds, genomic_coords = genome.df)
return(conns)
}
Get_Ccans <- function(seurat_atac_obj, clusterID = NULL, reduction_method = "UMAP") {
if(!is.null(clusterID)) {
print(paste0("Subsetting seurat object for: ",clusterID))
seurat_atac_obj <- subset(seurat_atac_obj, ident = clusterID) # create a subset
}
# convert seurat objects into cicero cell datasets in preparation for detecting cicero connections
print("Preparing Cicero CDS")
ciceroCds <- CreateCiceroCDS(seurat_atac_obj, reduction_method = reduction_method)
# generate disease-specific CCANS for all chromsomes of a particular celltype
print("Finding Cicero connections")
conns <- FindCiceroConnection(seurat_atac_obj = seurat_atac_obj, cds = ciceroCds)
# Finding cis-Co-accessibility Networks (CCANS)
CCAN_assigns <- generate_ccans(conns)
# create a column that identifies which connections belong to a CCAN
ccan1 <- left_join(conns, CCAN_assigns, by=c("Peak1" = "Peak"), all.x=TRUE)
colnames(ccan1)[4] <- "CCAN1"
ccan2 <- left_join(conns, CCAN_assigns, by=c("Peak2" = "Peak"), all.x=TRUE)
colnames(ccan2)[4] <- "CCAN2"
df <- cbind(ccan1, CCAN2=ccan2$CCAN2) %>%
dplyr::mutate(CCAN = ifelse(CCAN1 == CCAN2, CCAN1, 0)) %>%
dplyr::select(-CCAN1, -CCAN2)
res <- list(ciceroCds = ciceroCds, conns = conns, CCAN_assigns = CCAN_assigns, df = df)
return(res)
}
####################################################################
# subset the snATACseq object by disease state within celltype of interest and find ccans
idents <- levels(scATAC.data)
# remove mast cell due to the cell number less than 100
idents <- idents[-12]
list.ccan <- lapply(idents, function(ident) {Get_Ccans(ident, seurat_atac_obj = scATAC.data)})
# write to file
names(list.ccan) <- idents
lapply(names(list.ccan), function(x) {
fwrite(list.ccan[[x]]$df, file = paste0("6.Co-Accessible/ccans/ciceroConns.",x,".csv"), row.names = F)
})
saveRDS(list.ccan, file = "6.Co-Accessible/ccans/cellType.ccans.rds")
# calculate a global CCAN for all celltypes
ccan <- Get_Ccans(seurat_atac_obj = scATAC.data)
fwrite(ccan$df, file = "6.Co-Accessible/ccans/ciceroConns.allcells.csv", row.names = F)
saveRDS(ccan, file = "6.Co-Accessible/ccans/All.ccans.rds")
#### extract the tumor-ccan
tumor.ciceroCds <- list.ccan$Tumor$ciceroCds
tumor.conns <- list.ccan$Tumor$conns
tumor.ccans <- list.ccan$Tumor$CCAN_assigns
saveRDS(tumor.ciceroCds, file = "6.Co-Accessible/ccans/tumor.ciceroCds.rds")
saveRDS(tumor.conns, file = "6.Co-Accessible/ccans/tumor.conns.rds")