|
a |
|
b/RNA-seq/AnalysisPipeline/3.CNV.estimate.R |
|
|
1 |
#' @description: Speculate the copy number change of cancer cells |
|
|
2 |
|
|
|
3 |
####Method 1: inferCNV |
|
|
4 |
#2021.4.14 |
|
|
5 |
###Create GRCh38 gene location file --- Stored on 231 server |
|
|
6 |
#Reference: https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file |
|
|
7 |
cd /data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result |
|
|
8 |
#Homo_sapiens.GRCh38.100.gtf |
|
|
9 |
python /home/longzhilin/softwares/infercnv-master/scripts/gtf_to_position_file.py --attribute_name gene_name /home/longzhilin/softwares/ReferenceFasta/Annotation/Homo_sapiens.GRCh38.100.gtf GRCh38_gen_pos.txt |
|
|
10 |
#'@note Note that you need to modify the GRCh38_gen_pos.txt file |
|
|
11 |
#'1. Add "chr" to the second column to mark the chromosome |
|
|
12 |
#'2. Refer to the gencode_v19_gene_pos.txt provided by it to remove genes other than chr1-22, X, Y, MT |
|
|
13 |
#'3. It should be noted that the order of chromosomes may be reversed. In gencode_v19_gene_pos.txt it is chr1-22,MT,X,Y, but it seems that the order of chromosomes is not mandatory, so no additional processing is currently performed. |
|
|
14 |
#R code |
|
|
15 |
genecode.modify <- read.table("/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify.txt", header = F, stringsAsFactors = F, sep = "\t") #60591 |
|
|
16 |
chrom.order <- c(paste0("chr",1:22), "chrM", "chrX", "chrY") |
|
|
17 |
index <- sapply(chrom.order, function(x){ |
|
|
18 |
index <- which(genecode.modify[,2] == x) |
|
|
19 |
return(index) |
|
|
20 |
}) |
|
|
21 |
index <- unlist(index) |
|
|
22 |
genecode.modify.order <- genecode.modify[index,] |
|
|
23 |
write.table(genecode.modify.order, "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify_order.txt", row.name = F, col.names = F, quote = F, sep = "\t ") |
|
|
24 |
|
|
|
25 |
###The first model: cancer cells |
|
|
26 |
##Predict the CNV of potential cancer cells based on the annotated normal cells |
|
|
27 |
library(Seurat) |
|
|
28 |
data.merge <- readRDS(file = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds") |
|
|
29 |
|
|
|
30 |
##Extract NK/NKT cell and CD8+ T cell as reference cells |
|
|
31 |
index <- which(data.merge@meta.data$cellType_low %in% c("CD8+ T cell", "NK/NKT cell")) |
|
|
32 |
Normal.cell <- rownames(data.merge@meta.data)[index] # 9949 cells |
|
|
33 |
Normal.cellType <- as.character(data.merge@meta.data$cellType[index]) |
|
|
34 |
|
|
|
35 |
##Need to guess the cell population of the copy number |
|
|
36 |
setwd("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/3.CNV") |
|
|
37 |
possible.cancer.cell <- rownames(data.merge@meta.data)[which(data.merge@meta.data$seurat_clusters %in% c("4"))] # 3564 cells |
|
|
38 |
DefaultAssay(data.merge) <- "RNA" |
|
|
39 |
cancer.count <- GetAssayData(data.merge, slot = "counts") |
|
|
40 |
counts_matrix <- cancer.count[, c(Normal.cell, possible.cancer.cell)] #26118 13513 |
|
|
41 |
cell.id <- gsub("-", "\\.", colnames(counts_matrix)) |
|
|
42 |
colnames(counts_matrix) <- cell.id |
|
|
43 |
write.table(counts_matrix, file = "countmatrix.txt", sep = "\t", row.names = T, col.names = T, quote = F) |
|
|
44 |
|
|
|
45 |
##Building cell type annotation file |
|
|
46 |
index <- match(possible.cancer.cell, rownames(data.merge@meta.data)) |
|
|
47 |
cellLabel1 <- as.character(data.merge$orig.ident[index]) |
|
|
48 |
cellLabel1 <- paste0("Sample_", cellLabel1) |
|
|
49 |
annoFile <- data.frame(cell.id = cell.id, celltype = c(Normal.cellType, cellLabel1)) |
|
|
50 |
write.table(annoFile, file = "annoFile.txt", col.names = F, row.names = F, quote = F, sep = "\t") |
|
|
51 |
|
|
|
52 |
###231 Server |
|
|
53 |
setwd("/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803") |
|
|
54 |
library(infercnv) #Version:1.6 |
|
|
55 |
infercnv_obj = CreateInfercnvObject(raw_counts_matrix= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803/countmatrix.txt", |
|
|
56 |
annotations_file= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803/annoFile.txt", |
|
|
57 |
delim="\t", |
|
|
58 |
gene_order_file="/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/GRCh38_gen_pos_modify_order.txt", |
|
|
59 |
ref_group_names=c("CD8+ T cell", "NK/NKT cell")) |
|
|
60 |
infercnv_obj = infercnv::run(infercnv_obj, |
|
|
61 |
cutoff=0.1,# cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics |
|
|
62 |
out_dir= "/data/activate_data/longzhilin/RenalTumor-20200713/inferCNV_result/RNA_20210803", |
|
|
63 |
cluster_by_groups=T, |
|
|
64 |
denoise=TRUE, |
|
|
65 |
HMM=TRUE, |
|
|
66 |
num_threads = 48) |