Switch to unified view

a b/RNA-seq/AnalysisPipeline/3.CNV.estimate.R
1
#' @description: Speculate the copy number change of cancer cells
2
3
####Method 1: inferCNV
4
#2021.4.14
5
###Create GRCh38 gene location file --- Stored on 231 server
6
#Reference: https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file
7
cd /data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result
8
#Homo_sapiens.GRCh38.100.gtf
9
python /home/longzhilin/softwares/infercnv-master/scripts/gtf_to_position_file.py --attribute_name gene_name /home/longzhilin/softwares/ReferenceFasta/Annotation/Homo_sapiens.GRCh38.100.gtf GRCh38_gen_pos.txt
10
#'@note Note that you need to modify the GRCh38_gen_pos.txt file
11
#'1. Add "chr" to the second column to mark the chromosome
12
#'2. Refer to the gencode_v19_gene_pos.txt provided by it to remove genes other than chr1-22, X, Y, MT
13
#'3. It should be noted that the order of chromosomes may be reversed. In gencode_v19_gene_pos.txt it is chr1-22,MT,X,Y, but it seems that the order of chromosomes is not mandatory, so no additional processing is currently performed.
14
#R code
15
genecode.modify <- read.table("/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify.txt", header = F, stringsAsFactors = F, sep = "\t") #60591
16
chrom.order <- c(paste0("chr",1:22), "chrM", "chrX", "chrY")
17
index <- sapply(chrom.order, function(x){
18
    index <- which(genecode.modify[,2] == x)
19
    return(index)
20
})
21
index <- unlist(index)
22
genecode.modify.order <- genecode.modify[index,]
23
write.table(genecode.modify.order, "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify_order.txt", row.name = F, col.names = F, quote = F, sep = "\t ")
24
25
###The first model: cancer cells
26
##Predict the CNV of potential cancer cells based on the annotated normal cells
27
library(Seurat)
28
data.merge <- readRDS(file = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds")
29
30
##Extract NK/NKT cell and CD8+ T cell as reference cells
31
index <- which(data.merge@meta.data$cellType_low %in% c("CD8+ T cell", "NK/NKT cell"))
32
Normal.cell <- rownames(data.merge@meta.data)[index] # 9949 cells
33
Normal.cellType <- as.character(data.merge@meta.data$cellType[index])
34
35
##Need to guess the cell population of the copy number
36
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/3.CNV")
37
possible.cancer.cell <- rownames(data.merge@meta.data)[which(data.merge@meta.data$seurat_clusters %in% c("4"))] # 3564 cells
38
DefaultAssay(data.merge) <- "RNA"
39
cancer.count <- GetAssayData(data.merge, slot = "counts")
40
counts_matrix <- cancer.count[, c(Normal.cell, possible.cancer.cell)] #26118 13513
41
cell.id <- gsub("-", "\\.", colnames(counts_matrix))
42
colnames(counts_matrix) <- cell.id
43
write.table(counts_matrix, file = "countmatrix.txt", sep = "\t", row.names = T, col.names = T, quote = F)
44
45
##Building cell type annotation file
46
index <- match(possible.cancer.cell, rownames(data.merge@meta.data))
47
cellLabel1 <- as.character(data.merge$orig.ident[index])
48
cellLabel1 <- paste0("Sample_", cellLabel1)
49
annoFile <- data.frame(cell.id = cell.id, celltype = c(Normal.cellType, cellLabel1))
50
write.table(annoFile, file = "annoFile.txt", col.names = F, row.names = F, quote = F, sep = "\t")
51
52
###231 Server
53
setwd("/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803")
54
library(infercnv) #Version:1.6
55
infercnv_obj = CreateInfercnvObject(raw_counts_matrix= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803/countmatrix.txt",
56
                                    annotations_file= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803/annoFile.txt",
57
                                    delim="\t",
58
                                    gene_order_file="/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify_order.txt",
59
                                    ref_group_names=c("CD8+ T cell", "NK/NKT cell"))
60
infercnv_obj = infercnv::run(infercnv_obj,
61
                             cutoff=0.1,# cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
62
                             out_dir= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803",
63
                             cluster_by_groups=T,
64
                             denoise=TRUE,
65
                             HMM=TRUE,
66
                             num_threads = 48)