--- a +++ b/RNA-seq/AnalysisPipeline/4.1.NMF.R @@ -0,0 +1,475 @@ +#' @description: NMF analysis for tumor cell + +seed.data <- 101 +set.seed(seed.data) +library(NMF) #0.23.0 +library(ggpubr) +library(Seurat) +library(ComplexHeatmap) +library(circlize) +library(corrplot) +library(cowplot) +library(dendsort) +library(openxlsx) +setwd(dir = "/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/4.NMF/SCT") + +#### data preparation +library(future) +plan("multiprocess", workers = 5) +options(future.globals.maxSize = 50000 * 1024^2) # set 50G RAM +# Since the similarity of the cells needs to be evaluated in the overall scale expression, the overall scale needs to be performed +data.merge <- readRDS("/data/active_data/lzl/RenalTumor-20200713/DataAnalysis-20210803/scRNA/data.merge.pro.rds") +DefaultAssay(data.merge) <- "RNA" +CancerCell <- subset(data.merge, subset = cellType_low == "Tumor") +source(file = "/data/active_data/lzl/RenalTumor-20200713/code/Filter.gene.R") +CancerCell <- Filter.gene(CancerCell) #Remove genes less than 3 cells expressed +# scale.data: do.scale = T, do.centre = T +CancerCell <- SCTransform(CancerCell, vars.to.regress = c("nCount_RNA", "percent.mt"), return.only.var.genes = F, verbose = FALSE) +saveRDS(CancerCell, file = "CancerCell.rds") + +stallion = c("1"="#EF7F48","2"="#D69100","3"="#C69900","4"="#83AD00","5"="#00BE6B", "6"="#00C0BA","7"="#00B9E1","8"="#00B3F1","19"="#E6C2DC", "10"="#8794FF", "11"="#B086FF","12"="#E46DF6","13"="#F365E6","14"="#D24B27","15"="#3BBCA8", "16"="#6E4B9E","17"="#0C727C", "18"="#7E1416","9"="#00ABFD","20"="#3D3D3D") +## NMF, only analyze the cancer cell cluster, calculate in a single sample +## NMF requires that the input data has been standardized +DefaultAssay(data.merge) <- "RNA" +T1.cancer.data <- subset(data.merge, subset = cellType_low == "Tumor" & orig.ident == "T1") # 606 cells +T1.cancer.data <- SCTransform(T1.cancer.data, vars.to.regress = c("nCount_RNA", "percent.mt"), return.only.var.genes = F, verbose = FALSE) + +T2.cancer.data <- subset(data.merge, subset = cellType_low == "Tumor" & orig.ident == "T2") # 2382 cells +T2.cancer.data <- SCTransform(T2.cancer.data, vars.to.regress = c("nCount_RNA", "percent.mt"), return.only.var.genes = F, verbose = FALSE) + +T3.cancer.data <- subset(data.merge, subset = cellType_low == "Tumor" & orig.ident == "T3") # 320 cells +T3.cancer.data <- SCTransform(T3.cancer.data, vars.to.regress = c("nCount_RNA", "percent.mt"), return.only.var.genes = F, verbose = FALSE) + +T4.cancer.data <- subset(data.merge, subset = cellType_low == "Tumor" & orig.ident == "T4") # 256 cells +T4.cancer.data <- SCTransform(T4.cancer.data, vars.to.regress = c("nCount_RNA", "percent.mt"), return.only.var.genes = F, verbose = FALSE) + +saveRDS(T1.cancer.data, file = "T1.cancer.data.rds") +saveRDS(T2.cancer.data, file = "T2.cancer.data.rds") +saveRDS(T3.cancer.data, file = "T3.cancer.data.rds") +saveRDS(T4.cancer.data, file = "T4.cancer.data.rds") + +#######################################################1.NMF calculate################################################### +T1.cancer.data <- readRDS("T1.cancer.data.rds") # 14896*606 +T2.cancer.data <- readRDS("T2.cancer.data.rds") # 17153*2382 +T3.cancer.data <- readRDS("T3.cancer.data.rds") # 14685*320 +T4.cancer.data <- readRDS("T4.cancer.data.rds") # 12468*256 +# Convert the negative value in the scale.data data to a value of 0, and it is required to have expression in at least 10 cells +Filter.gene <- function(data, min.cell = 10){ + exp.num <- apply(data, 1, function(x){ + return(length(which(x>0))) + }) + data <- data[which(exp.num>=min.cell),] + return(data) +} +T1.cancer.scale.data <- GetAssayData(T1.cancer.data, slot = "scale.data") +T1.cancer.scale.data[which(T1.cancer.scale.data < 0)] <- 0 +T1.cancer.scale.data <- Filter.gene(T1.cancer.scale.data) # 14634*606 +T2.cancer.scale.data <- GetAssayData(T2.cancer.data, slot = "scale.data") +T2.cancer.scale.data[which(T2.cancer.scale.data < 0)] <- 0 +T2.cancer.scale.data <- Filter.gene(T2.cancer.scale.data) # 16831*2382 +T3.cancer.scale.data <- GetAssayData(T3.cancer.data, slot = "scale.data") +T3.cancer.scale.data[which(T3.cancer.scale.data < 0)] <- 0 +T3.cancer.scale.data <- Filter.gene(T3.cancer.scale.data) # 14390*320 +T4.cancer.scale.data <- GetAssayData(T4.cancer.data, slot = "scale.data") +T4.cancer.scale.data[which(T4.cancer.scale.data < 0)] <- 0 +T4.cancer.scale.data <- Filter.gene(T4.cancer.scale.data) # 12042*256 + +####stardard vatiation analysis +T1.sd <- apply(T1.cancer.scale.data, 1, sd) +T2.sd <- apply(T2.cancer.scale.data, 1, sd) +T3.sd <- apply(T3.cancer.scale.data, 1, sd) +T4.sd <- apply(T4.cancer.scale.data, 1, sd) + +sd.data <- data.frame(Sample = c(rep("T1", length(T1.sd)), rep("T2", length(T2.sd)),rep("T3", length(T3.sd)), rep("T4", length(T4.sd))), + sd = c(T1.sd, T2.sd, T3.sd, T4.sd)) +pdf("SD/sample.sd.distribution.pdf") +ggdensity(sd.data, x="sd", rug = TRUE, color = "Sample", fill = "Sample", palette = "npg") + geom_vline(xintercept = 0.5) +dev.off() +T1.cancer.scale.data.pro <- T1.cancer.scale.data[names(T1.sd)[which(T1.sd >= 0.5)],] +T2.cancer.scale.data.pro <- T2.cancer.scale.data[names(T2.sd)[which(T2.sd >= 0.5)],] +T3.cancer.scale.data.pro <- T3.cancer.scale.data[names(T3.sd)[which(T3.sd >= 0.5)],] +T4.cancer.scale.data.pro <- T4.cancer.scale.data[names(T4.sd)[which(T4.sd >= 0.5)],] + +#### Determine the best rank +T1.nmf <- nmf(T1.cancer.scale.data.pro, 2:6, seed=seed.data, nrun = 30, .options = "vP48") +T2.nmf <- nmf(T2.cancer.scale.data.pro, 2:6, seed=seed.data, nrun = 30, .options = "vP48") +T3.nmf <- nmf(T3.cancer.scale.data.pro, 2:6, seed=seed.data, nrun = 30, .options = "vP48") +T4.nmf <- nmf(T4.cancer.scale.data.pro, 2:6, seed=seed.data, nrun = 30, .options = "vP48") +saveRDS(T1.nmf, "T1.nmf.rds") +saveRDS(T2.nmf, "T2.nmf.rds") +saveRDS(T3.nmf, "T3.nmf.rds") +saveRDS(T4.nmf, "T4.nmf.rds") +pdf("patient.optiaml.rank.pdf") +plot(T1.nmf) +plot(2:6,T1.nmf$measures$cophenetic, type="b", col="purple") +plot(T2.nmf) +plot(2:6,T2.nmf$measures$cophenetic, type="b", col="purple") +plot(T3.nmf) +plot(2:6,T3.nmf$measures$cophenetic, type="b", col="purple") +plot(T4.nmf) +plot(2:6,T4.nmf$measures$cophenetic, type="b", col="purple") +dev.off() + +# function: Determine the best rank +optimal.rank <- function(nmf){ + coph <- nmf$measures$cophenetic + coph_diff <- NULL + for (i in 2:length(coph)){ + coph_diff <- c(coph_diff, coph[i-1]-coph[i]) + } + k.best <- which.max(coph_diff)+1 + return(k.best) +} +optimal.rank(T1.nmf) +optimal.rank(T2.nmf) +optimal.rank(T3.nmf) +optimal.rank(T4.nmf) + +#optimal rank +T1.nmf.optimal <- nmf(T1.cancer.scale.data.pro, optimal.rank(T1.nmf), nrun = 30, seed=seed.data, .options = "vP48") +T2.nmf.optimal <- nmf(T2.cancer.scale.data.pro, optimal.rank(T2.nmf), nrun = 30, seed=seed.data, .options = "vP48") +T3.nmf.optimal <- nmf(T3.cancer.scale.data.pro, optimal.rank(T3.nmf), nrun = 30, seed=seed.data, .options = "vP48") +T4.nmf.optimal <- nmf(T4.cancer.scale.data.pro, optimal.rank(T4.nmf), nrun = 30, seed=seed.data, .options = "vP48") + +pdf("nmf.map.pdf") +coefmap(T1.nmf.optimal) +basismap(T1.nmf.optimal) +consensusmap(T1.nmf.optimal) +coefmap(T2.nmf.optimal) +basismap(T2.nmf.optimal) +consensusmap(T2.nmf.optimal) +coefmap(T3.nmf.optimal) +basismap(T3.nmf.optimal) +consensusmap(T3.nmf.optimal) +coefmap(T4.nmf.optimal) +basismap(T4.nmf.optimal) +consensusmap(T4.nmf.optimal) +dev.off() + +saveRDS(T1.nmf.optimal, "T1.nmf.optimal.rds") +saveRDS(T2.nmf.optimal, "T2.nmf.optimal.rds") +saveRDS(T3.nmf.optimal, "T3.nmf.optimal.rds") +saveRDS(T4.nmf.optimal, "T4.nmf.optimal.rds") + +#######################################################2.NMF anlaysis################################################### +#### load data +T1.cancer.data <- readRDS("T1.cancer.data.rds") +T2.cancer.data <- readRDS("T2.cancer.data.rds") +T3.cancer.data <- readRDS("T3.cancer.data.rds") +T4.cancer.data <- readRDS("T4.cancer.data.rds") +CancerCell <- readRDS("CancerCell.rds") + +setwd(paste0(getwd(), "/SD")) +T1.nmf.optimal <- readRDS("T1.nmf.optimal.rds") +T2.nmf.optimal <- readRDS("T2.nmf.optimal.rds") +T3.nmf.optimal <- readRDS("T3.nmf.optimal.rds") +T4.nmf.optimal <- readRDS("T4.nmf.optimal.rds") + +# Calculate the correlation between each sample and each program +T1.w <- basis(T1.nmf.optimal) +T1.H <- coef(T1.nmf.optimal) +T2.w <- basis(T2.nmf.optimal) +T2.H <- coef(T2.nmf.optimal) +T3.w <- basis(T3.nmf.optimal) +T3.H <- coef(T3.nmf.optimal) +T4.w <- basis(T4.nmf.optimal) +T4.H <- coef(T4.nmf.optimal) + +T1.basis.score <- as.numeric(T1.w) +T2.basis.score <- as.numeric(T2.w) +T3.basis.score <- as.numeric(T3.w) +T4.basis.score <- as.numeric(T4.w) +basis.score <- data.frame(patient = c(rep("T1", length(T1.basis.score)), + rep("T2", length(T2.basis.score)), + rep("T3", length(T3.basis.score)), + rep("T4", length(T4.basis.score))), + basis.score = c(T1.basis.score, T2.basis.score, T3.basis.score, T4.basis.score)) +pdf("basis.score.pdf") +ggdensity(basis.score, x = "basis.score", add = "mean", rug = TRUE, color = "patient", fill = "patient") +dev.off() + +#####################################################2.identify program/meta signature################################################### +#############2.1 program signature +####Extract the signature gene corresponding to each patient program +T1.program.orders <- extractFeatures(T1.nmf.optimal, 30) +T1.program.signature <- lapply(T1.program.orders, function(x){ + return(rownames(T1.w)[x]) +}) +T2.program.orders <- extractFeatures(T2.nmf.optimal, 30) +T2.program.signature <- lapply(T2.program.orders, function(x){ + return(rownames(T2.w)[x]) +}) +T3.program.orders <- extractFeatures(T3.nmf.optimal, 30) +T3.program.signature <- lapply(T3.program.orders, function(x){ + return(rownames(T3.w)[x]) +}) +T4.program.orders <- extractFeatures(T4.nmf.optimal, 30) +T4.program.signature <- lapply(T4.program.orders, function(x){ + return(rownames(T4.w)[x]) +}) + +colnames(T1.w) <- paste0("T1_Program", 1:ncol(T1.w)) +colnames(T2.w) <- paste0("T2_Program", 1:ncol(T2.w)) +colnames(T3.w) <- paste0("T3_Program", 1:ncol(T3.w)) +colnames(T4.w) <- paste0("T4_Program", 1:ncol(T4.w)) +NMF.score <- list(T1 = T1.w, T2 = T2.w, T3 = T3.w, T4 = T4.w) +saveRDS(NMF.score, file = "NMF.score.rds") + +T1.data.scale <- GetAssayData(T1.cancer.data, slot = "scale.data") +T2.data.scale <- GetAssayData(T2.cancer.data, slot = "scale.data") +T3.data.scale <- GetAssayData(T3.cancer.data, slot = "scale.data") +T4.data.scale <- GetAssayData(T4.cancer.data, slot = "scale.data") +pdf("program.signatures.heatmap.eachPatient.pdf") +col.list <- list(Phase = c("S" = "#3BBCA8", "G1" = "#6E4B9E", "G2M" = "#F59899")) +row.list <- list(Type = c("Program1"="#208A42", "Program2"="#7700FF","Program3"="#89288F", "Program4"="#e0598b","Program5"="#F47D2B")) +anno.info <- data.frame(Type = rep(paste0("Program", 1:length(T1.program.orders)), sapply(T1.program.orders, function(x){length(x)}))) +row.anno <- HeatmapAnnotation(Type = anno.info$Type, which = "row", col = row.list) +top.anno <- HeatmapAnnotation(Phase = T1.cancer.data$Phase, + col = col.list) +p <- Heatmap(T1.data.scale[unlist(T1.program.signature),], name = "Expression", cluster_rows = F, top_annotation = top.anno, left_annotation = row.anno, show_row_dend = F, show_column_dend = F, show_row_names = F, show_column_names = F) +print(p) + +anno.info <- data.frame(Type = rep(paste0("Program", 1:length(T2.program.orders)), sapply(T2.program.orders, function(x){length(x)}))) +row.anno <- HeatmapAnnotation(Type = anno.info$Type, which = "row", col = row.list) +top.anno <- HeatmapAnnotation(Phase = T2.cancer.data$Phase, + col = col.list) +p <- Heatmap(T2.data.scale[unlist(T2.program.signature),], name = "Expression", cluster_rows = F, top_annotation = top.anno, left_annotation = row.anno, show_row_dend = F, show_column_dend = F, show_row_names = F, show_column_names = F) +print(p) + +anno.info <- data.frame(Type = rep(paste0("Program", 1:length(T3.program.orders)), sapply(T3.program.orders, function(x){length(x)}))) +row.anno <- HeatmapAnnotation(Type = anno.info$Type, which = "row", col = row.list) +top.anno <- HeatmapAnnotation(Phase = T3.cancer.data$Phase, + col = col.list) +p <- Heatmap(T3.data.scale[unlist(T3.program.signature),], name = "Expression", cluster_rows = F, top_annotation = top.anno, left_annotation = row.anno, show_row_dend = F, show_column_dend = F, show_row_names = F, show_column_names = F) +print(p) + +anno.info <- data.frame(Type = rep(paste0("Program", 1:length(T4.program.orders)), sapply(T4.program.orders, function(x){length(x)}))) +row.anno <- HeatmapAnnotation(Type = anno.info$Type, which = "row", col = row.list) +top.anno <- HeatmapAnnotation(Phase = T4.cancer.data$Phase, + col = col.list) +p <- Heatmap(T4.data.scale[unlist(T4.program.signature),], name = "Expression", cluster_rows = F, top_annotation = top.anno, left_annotation = row.anno, show_row_dend = F, show_column_dend = F, show_row_names = F, show_column_names = F) +print(p) +dev.off() + +#####################################Calculate the correlation of each program +names(T1.program.signature) <- paste0("T1_Program", 1:length(T1.program.signature)) +names(T2.program.signature) <- paste0("T2_Program", 1:length(T2.program.signature)) +names(T3.program.signature) <- paste0("T3_Program", 1:length(T3.program.signature)) +names(T4.program.signature) <- paste0("T4_Program", 1:length(T4.program.signature)) +All.program.signature <- c(T1.program.signature, T2.program.signature, T3.program.signature, T4.program.signature) +saveRDS(All.program.signature, file = "All.program.signature.rds") +#out put list +a <- as.data.frame(All.program.signature) +write.csv(a, file = "All.program.signature.csv", row.names = F) + +####Calculate the program score in the overall scale.data data and then perform correlation comparison analysis +CancerCell.four.matrix <- GetAssayData(CancerCell, slot = "scale.data") + +##1. Calculate the program score of each sample across samples +CancerCell <- AddModuleScore(CancerCell, features = All.program.signature, name = paste0(names(All.program.signature), "_")) +program.score <- CancerCell@meta.data[, grep("Program", colnames(CancerCell@meta.data))] +colnames(program.score) <- gsub("_\\d+", "", colnames(program.score)) +saveRDS(program.score, file = "program.score.rds") + +##2. Calculation of each program score of a single sample +T1.cancer.data <- AddModuleScore(T1.cancer.data, features = All.program.signature, name = paste0(names(All.program.signature), "_")) +T1.program.score <- T1.cancer.data@meta.data[, grep("Program", colnames(T1.cancer.data@meta.data))] +colnames(T1.program.score) <- gsub("_\\d+", "", colnames(T1.program.score)) +T2.cancer.data <- AddModuleScore(T2.cancer.data, features = All.program.signature, name = paste0(names(All.program.signature), "_")) +T2.program.score <- T2.cancer.data@meta.data[, grep("Program", colnames(T2.cancer.data@meta.data))] +colnames(T2.program.score) <- gsub("_\\d+", "", colnames(T2.program.score)) +T3.cancer.data <- AddModuleScore(T3.cancer.data, features = All.program.signature, name = paste0(names(All.program.signature), "_")) +T3.program.score <- T3.cancer.data@meta.data[, grep("Program", colnames(T3.cancer.data@meta.data))] +colnames(T3.program.score) <- gsub("_\\d+", "", colnames(T3.program.score)) +T4.cancer.data <- AddModuleScore(T4.cancer.data, features = All.program.signature, name = paste0(names(All.program.signature), "_")) +T4.program.score <- T4.cancer.data@meta.data[, grep("Program", colnames(T4.cancer.data@meta.data))] +colnames(T4.program.score) <- gsub("_\\d+", "", colnames(T4.program.score)) +single.programScore <- list(T1 = T1.program.score, T2 = T2.program.score, T3 = T3.program.score, T4 = T4.program.score) +saveRDS(single.programScore, file = "single.programScore.rds") + +# program score correlation analysis +pdf("program.correlation.pdf") +corrM <- cor(program.score) +corrplot(corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "All sample", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +T1.corrM <- cor(T1.program.score) +corrplot(T1.corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "T1", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +T2.corrM <- cor(T2.program.score) +corrplot(T2.corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "T2", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +T3.corrM <- cor(T3.program.score) +corrplot(T3.corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "T3", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +T4.corrM <- cor(T4.program.score) +corrplot(T4.corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "T4", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +mean.corrM <- (T1.corrM+T2.corrM+T3.corrM+T4.corrM)/4 +corrplot(mean.corrM, order = "hclust", col=colorRampPalette(c("blue","white","#EA4335"))(100), hclust.method = "ward.D", title = "Mean correlation", addCoef.col = "black", number.cex = 0.75, number.font = 3, tl.col = "black", tl.cex = 0.8) +dev.off() + +#### Functional enrichment analysis based on these programs +source("/home/longzhilin/Analysis_Code/PathwayEnrichment/clusterProfiler.enricher.R") +pdf("program.pathway.enrichment.pdf") +res <- sapply(names(All.program.signature), function(x){ + y <- All.program.signature[[x]] + res <- cluterProfiler.enricher(gene = y, geneType = "SYMBOL", db.type = "MsigDB", saveDir = paste0(getwd(),"/cluterProfiler_MsigDB_Program"), + title = x, qvalueCutoff = 0.05, pvalueCutoff = 0.05) + return(res) +}) +dev.off() + +############2.2 Calculate the similarity between cells +####Cell similarity +#First realize clustering through ComplexHeatmap +#Hierarchical clustering, with one minus Pearson correlation as the distance metric and Ward’s linkage +#The first way: first cluster a single sample in the overall scale matrix, obtain the sort position, and then combine +cluster.order <- sapply(c("T1_", "T2_", "T3_", "T4_"), function(x){ + index <- grep(x, colnames(CancerCell.four.matrix)) + patient.matrix <- CancerCell.four.matrix[,index] + corrMatrix <- (1- cor(patient.matrix, method="pearson")) + hc <- hclust(as.dist(corrMatrix), method="ward.D") + row_dend <- dendsort(hc) + return(colnames(patient.matrix)[row_dend$order]) +}) +cluster.order <- unlist(cluster.order) +index <- match(cluster.order, colnames(CancerCell.four.matrix)) +CancerCell.four.matrix <- CancerCell.four.matrix[,index] +saveRDS(CancerCell.four.matrix, file = "CancerCell.four.matrix.rds") +cell.similarity <- cor(CancerCell.four.matrix, method="pearson") +saveRDS(cell.similarity, file = "cell.similarity.rds") +diag(cell.similarity) <- 0 +diag(cell.similarity) <- max(cell.similarity) +pdf("cell.similarity.pdf") +row_split <- gsub("_\\d+", "", names(cluster.order)) +column_split <- gsub("_\\d+", "", names(cluster.order)) +Heatmap(cell.similarity, cluster_rows = F, border = TRUE, row_split = row_split, column_split = column_split, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), width = unit(12, "cm"), height = unit(12, "cm"), cluster_columns = F, show_column_names = F, show_row_names = F, use_raster = T, heatmap_legend_param = list(title = "Correlation coeficient")) +dev.off() + +#############2.3 Observe the similarity of each program score based on the order of cell similarity +pdf("programScore.hierarchical.clustering.pdf") +## all patient +a <- t(program.score) +index <- match(cluster.order, colnames(a)) +a <- a[,index] +p1 <- Heatmap(a, cluster_rows = T, border = TRUE, row_split = 4, clustering_distance_rows = "pearson", clustering_method_rows = "ward.D", column_split = column_split, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), cluster_columns = F, show_column_names = F, show_row_names = F, use_raster = T, width = unit(8, "cm"), height = unit(6, "cm"), row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Expression score")) + +program.matrix <- matrix(0, ncol = 4, nrow = nrow(a)) +colnames(program.matrix) <- c("T1", "T2", "T3", "T4") +rownames(program.matrix) <- rownames(a) +for(i in 1:nrow(a)){ + x <- rownames(a)[i] + x <- gsub("_.*", "", x) + program.matrix[i, x] <- 1 +} +colors = structure(c("white", "#6387C5"), names = c("0", "1")) +p2 <- Heatmap(program.matrix, col = colors, cluster_rows = F, rect_gp = gpar(col = "white", lwd = 2), border = TRUE, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), cluster_columns = F, show_column_names = T, show_row_names = F, width = unit(1.5, "cm"), row_names_gp = gpar(fontsize = 10)) +plot.list <- p1+p2 +draw(plot.list, heatmap_height = unit(6, "cm")) + +p1 <- Heatmap(a, cluster_rows = T, border = TRUE, row_split = 4, clustering_distance_rows = "pearson", clustering_method_rows = "ward.D", column_split = column_split, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), cluster_columns = F, show_column_names = F, show_row_names = T, use_raster = T, width = unit(8, "cm"), height = unit(6, "cm"), row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Expression score")) +p2 <- Heatmap(program.matrix, col = colors, cluster_rows = F, rect_gp = gpar(col = "white", lwd = 2), border = TRUE, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), cluster_columns = F, show_column_names = T, show_row_names = T, width = unit(1.5, "cm"), row_names_gp = gpar(fontsize = 10)) +plot.list <- p1+p2 +draw(plot.list, heatmap_height = unit(6, "cm")) + +## single patient +singleScore.cluster <- function(program.score, cluster.order, row_split){ + a <- t(program.score) + index <- match(cluster.order, colnames(a)) + a <- a[,na.omit(index)] + p1 <- Heatmap(a, cluster_rows = T, border = TRUE, row_split = row_split, clustering_distance_rows = "pearson", clustering_method_rows = "ward.D", cluster_column = F, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), cluster_columns = F, show_column_names = F, show_row_names = T, use_raster = T, width = unit(8, "cm"), height = unit(6, "cm"), row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Expression score")) + print(p1) +} +res <- singleScore.cluster(program.score = T1.program.score, cluster.order = cluster.order, row_split = 5) +res <- singleScore.cluster(program.score = T2.program.score, cluster.order = cluster.order, row_split = 5) +res <- singleScore.cluster(program.score = T3.program.score, cluster.order = cluster.order, row_split = 5) +res <- singleScore.cluster(program.score = T4.program.score, cluster.order = cluster.order, row_split = 5) +Heatmap(mean.corrM, cluster_rows = T, cluster_columns = T, border = TRUE, row_split = 5, column_split = 5, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), show_column_names = T, show_row_names = T, use_raster = T, width = unit(6, "cm"), height = unit(6, "cm"), row_names_gp = gpar(fontsize = 10), column_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Mean correlation")) +Heatmap(mean.corrM, cluster_rows = T, cluster_columns = T, clustering_distance_rows = "pearson", clustering_method_rows = "ward.D", clustering_distance_columns = "pearson", clustering_method_columns = "ward.D", border = TRUE, row_split = 5, column_split = 5, row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), show_column_names = T, show_row_names = T, use_raster = T, width = unit(6, "cm"), height = unit(6, "cm"), row_names_gp = gpar(fontsize = 10), column_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Mean correlation")) +dev.off() + +###############2.4 Build metaProgram +#ref:Resolving medulloblastoma cellular architecture by single-cell genomics. 2019 Nature +All.program.signature <- readRDS("All.program.signature.rds") +metaProgram1 <- c("T2_Program2", "T1_Program1", "T3_Program1", "T4_Program1") +metaProgram2 <- c("T4_Program2", "T2_Program1", "T3_Program3") +#metaProgram3 <- c("T2_Program3", "T3_Program2") + +#' @note function --- metaProgram signature gene +#average NMF score top30 gene as the signature +getMetaSignature <- function(NMF.score, program.set){ + # first step:extract NMF score with correlated program + patient.info <- gsub("_.*", "", program.set) + program.nmf.score <- lapply(1:length(patient.info), function(x){ + nmf.score <- NMF.score[[patient.info[x]]] # focus patient + nmf.score <- nmf.score[, program.set[x]] # focus program + nmf.score <- data.frame(gene = names(nmf.score), score = as.numeric(nmf.score)) + return(nmf.score) + }) + program.nmf.score.merge <- Reduce(function(x,y) merge(x, y, by="gene", all.x=TRUE, all.y=TRUE), program.nmf.score) + colnames(program.nmf.score.merge) <- c("gene", program.set) + + # second step: calculate average score + avg.score <- apply(program.nmf.score.merge, 1, function(x){ + return(sum(na.omit(as.numeric(x[-1])))/length(program.set)) + }) + program.nmf.score.merge$avgScore <- avg.score + program.nmf.score.merge <- program.nmf.score.merge[order(program.nmf.score.merge$avgScore, decreasing = T), ] + return(program.nmf.score.merge$gene[1:30]) +} +meta.signature1 <- getMetaSignature(program.set = metaProgram1, NMF.score = NMF.score) +meta.signature2 <- getMetaSignature(program.set = metaProgram2, NMF.score = NMF.score) +meta.Signature <- list(metaProgram1 = meta.signature1, + metaProgram2 = meta.signature2) +a <- as.data.frame(meta.Signature) +write.csv(a, file = "meta.Signature.csv", row.names = F) +saveRDS(meta.Signature, file = "meta.Signature.rds") + +##Functional enrichment analysis +source("/home/longzhilin/Analysis_Code/PathwayEnrichment/clusterProfiler.enricher.R") +pdf("MetaProgram.pathway.enrichment.pdf") +res <- sapply(1:length(meta.Signature), function(x){ + y <- meta.Signature[[x]] + res <- cluterProfiler.enricher(gene = y, geneType = "SYMBOL", db.type = "MsigDB", saveDir = paste0(getwd(), "/cluterProfiler_MsigDB_MetaProgram"), + title = x, qvalueCutoff = 0.05, pvalueCutoff = 0.05, showCategory = 10) + return(res) +}) +dev.off() + +Expression.matrix <- CancerCell.four.matrix +meta.genes <- unlist(meta.Signature[c(1:length(meta.Signature))]) #meta gene signature +exp.matrix <- Expression.matrix[meta.genes,] +row_split <- rep(names(meta.Signature)[c(1:length(meta.Signature))],each=30) +column_split <- gsub("_.+", "", colnames(CancerCell.four.matrix)) +pdf("MetaSignature.Expression.pdf") +Heatmap(exp.matrix, cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = F, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Relative expression")) +Heatmap(exp.matrix, cluster_rows = T, cluster_columns = T, row_split = row_split, column_split = column_split, width = 12, height = 10, show_column_dend = F, show_row_dend = F, show_column_names = F, show_row_names = T, use_raster = T, row_names_gp = gpar(fontsize = 10), heatmap_legend_param = list(title = "Relative expression")) +dev.off() + +##Analyze the correlation between the two meatPrograms +#Based on the original strategy for programScore, --- mainly consider this, +CancerCell <- AddModuleScore(CancerCell, features = meta.Signature, name =paste0(names(meta.Signature), "_")) +metaSignature.score <- CancerCell@meta.data[, grep("metaProgram", colnames(CancerCell@meta.data))] +colnames(metaSignature.score) <- gsub("_\\d+", "", colnames(metaSignature.score)) +metaSignature.score$orig.ident <- CancerCell@meta.data$orig.ident +saveRDS(metaSignature.score, file = "metaSignature.score.rds") +saveRDS(CancerCell, file = "CancerCell.pro.rds") + +cell.group <- apply(metaSignature.score[,1:ncol(metaSignature.score)], 1, function(x){ + idx <- which(x == max(x)) + return(idx) +}) +cell.group <- gsub(1, "metaProgram1", cell.group) +cell.group <- gsub(2, "metaProgram2", cell.group) +CancerCell$program.group <- cell.group +pdf("program.group.pdf") +DimPlot(CancerCell, group.by = "program.group") +ggscatter(metaSignature.score, x = "metaProgram1", y = "metaProgram2", add = "reg.line", cor.coef = T) +ggscatter(metaSignature.score, x = "metaProgram1", y = "metaProgram2", color = "orig.ident", add = "reg.line", cor.coef = T) +dev.off() + +#############2.8 TCGA survival +source(file = "/home/longzhilin/Analysis_Code/code/analysis.diff.survival.TCGA.R") +DESeq2.normalized_counts <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.normalized_counts.rds") +DESeq2.normalized_counts <- log2(DESeq2.normalized_counts+1) +DESeq2.result <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/DESeq2.result.rds") +clin.data <- readRDS("/data/active_data/lzl/RenalTumor-20200713/Data/TCGA/KIRC/Result/clin.data.rds") +pdf("meta.signature.survival.pdf") +Metabolic.TCGA <- analysis.diff.survival.TCGA(interest.gene = meta.Signature$metaProgram1, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, main = "Meta program1", Box.plot = F, meta.signature = T, single.signature = F) +Immune.TCGA <- analysis.diff.survival.TCGA(interest.gene = meta.Signature$metaProgram2, diff.gene.pro = DESeq2.result, exp.data.process = DESeq2.normalized_counts, clin.data = clin.data, EnhancedVolcano.plot = F, main = "Meta program2", Box.plot = F, meta.signature = T, single.signature = F) +dev.off()