a b/ATAC/AnalysisPipeline/2.cluster.R
1
#' @description: cluster and dimension reduction
2
3
library(Signac)
4
library(Seurat)
5
library(harmony)
6
library(SeuratWrappers)
7
library(GenomeInfoDb)
8
library(EnsDb.Hsapiens.v86) #---GRCh38 (hg38)
9
library(ggplot2)
10
library(patchwork)
11
library(clustree)
12
set.seed(101)
13
library(future)
14
plan("multiprocess", workers = 4)
15
options(future.globals.maxSize = 50000 * 1024^2) #50G
16
library(GenomicRanges)
17
library(ggpubr)
18
source("/home/longzhilin/Analysis_Code/SingleCell/scATAC.Integrate.multipleSample.R")
19
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scATAC")
20
21
scATAC.merge.pro <- readRDS("scATAC.merge.pro.rds")
22
23
#Normalization: Signac performs term frequency-inverse document frequency (TF-IDF) normalization. 
24
#This is a two-step normalization procedure, that both normalizes across cells to correct for differences in cellular sequencing depth, and across peaks to give higher values to more rare peaks.
25
scATAC.merge.pro <- RunTFIDF(scATAC.merge.pro)
26
27
#Feature selection:though we note that we see very similar results when using only a subset of features (try setting min.cutoff to ‘q75’ to use the top 25% all peaks)
28
scATAC.merge.pro <- FindTopFeatures(scATAC.merge.pro, min.cutoff = 'q1')
29
30
saveRDS(scATAC.merge.pro, file = "scATAC.merge.pro.rds")
31
32
#####################1.observe batch effect########################
33
#Dimension reduction: We next run singular value decomposition (SVD) on the TD-IDF matrix, using the features (peaks) selected above. 
34
scATAC.merge.pro <- RunSVD(scATAC.merge.pro)
35
scATAC.merge.pro <- RunUMAP(scATAC.merge.pro, dims = 2:30, reduction = 'lsi')
36
scATAC.merge.pro <- RunTSNE(scATAC.merge.pro, dims = 2:30, reduction = 'lsi')
37
pdf("2.Cluster/observe.patient.effect.pdf")
38
ElbowPlot(object = scATAC.merge.pro, ndims = 30, reduction = "lsi")
39
DepthCor(scATAC.merge.pro)
40
DimPlot(scATAC.merge.pro, group.by = 'dataset', pt.size = 0.1, reduction = 'umap')
41
DimPlot(scATAC.merge.pro, group.by = 'dataset', pt.size = 0.1, reduction = 'tsne')
42
dev.off()
43
44
#####################2.correct batch effect########################
45
DefaultAssay(scATAC.merge.pro) <- "ATAC"
46
47
pdf("2.Cluster/Harmony.integration.PC15.pdf")
48
Harmony.scATAC.PC15 <- Harmony.integration.reduceDimension(scATAC.object = scATAC.merge.pro, set.resolutions = seq(0.2, 1.2, by = 0.1), groups = "dataset", assay = "ATAC", PC = 15, npcs = 30)
49
dev.off()
50
saveRDS(Harmony.scATAC.PC15, file = "Harmony.scATAC.PC15.rds")
51
#####################3.gene activity########################
52
#PC 15
53
gene.activities <- GeneActivity(Harmony.scATAC.PC15)
54
# add the gene activity matrix to the Seurat object as a new assay and normalize it
55
Harmony.scATAC.PC15[['ACTIVITY']] <- CreateAssayObject(counts = gene.activities)
56
Harmony.scATAC.PC15 <- NormalizeData(
57
  object = Harmony.scATAC.PC15,
58
  assay = 'ACTIVITY',
59
  normalization.method = 'LogNormalize',
60
  scale.factor = median(Harmony.scATAC.PC15$nCount_ACTIVITY)
61
)
62
saveRDS(Harmony.scATAC.PC15, file = "Harmony.scATAC.PC15.rds")