--- a +++ b/Fig7_Univariate_Coxph_survival.R @@ -0,0 +1,913 @@ +GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/" +source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) +source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R")) +source(file.path(GIT_HOME, "common_scripts/statistics/statistics_wrappers.R")) +source(file.path(GIT_HOME, "common_scripts/featurematrix/compute.pairwise.R")) +source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R")) + +library(RColorBrewer) +library(survival) +library(data.table) +library(ggplot2) +library(ComplexHeatmap) +library(survMisc) +library(survminer) +library(plyr) + +setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures") + +#******************************************** Hemap ********************************************* + +annot = get(load("Hemap_immunology_Annotations.Rdata")) +annot=annot[!is.na(annot$OS_Time),] + +# survival time and status +TIME=annot$OS_Time +STATUS=as.numeric(annot$OS_Status) +TIME2=annot$PFS_Time +STATUS2=as.numeric(annot$PFS_Status) + +#************************************** Process gene expression data ************************************ +profile=data.matrix(get(load("mixtureM_profile.Rdata"))) +profile[profile==-1] = 0 +profile2=profile[,colnames(profile)%in%annot$GSM.identifier..sample.] +data=t(get(load("data9544_with_gene_symbols.RData"))) +data=data[,colnames(data)%in%annot$GSM.identifier..sample.] + +# take only high expressed into account, for CGA score +profile2[data<5]=0 + +#******************************************************************************************************** + + +#********************************** Make necessary gene lists *************************************** + +# other lists from each analysis +cga = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)[,1] +co.stim=fread("costim_ligands_final.txt", data.table = F) + +hlagenes=c("B2M", "HLA-A", "HLA-B", "HLA-C", "HLA-DMA", "HLA-DMB", "HLA-DPA1", "HLA-DPB1", "HLA-DRA", "HLA-DRB1", "CIITA") +immunoscore=c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA") + +# for AML: +MDS=get(load("MDS_genesets.Rdata")) + +# significant per disease +microenv=get(load("Hemap_cytolytic_correlated_genes_TableS2_onlysignif.Rdata")) + +#**************************************************************************************************** +# data for survival +df=data.frame("time"=annot$OS_Time, "status"=annot$OS_Status, "HLAI"=annot$HLAIScore, "HLAII"=annot$HLAIIScore, "CytolyticScore"=annot$CytolyticScore, "Freq.CGA"=colSums(profile2[rownames(profile2)%in%cga,]), stringsAsFactors = F) + +#********************************* just AML ********************************* +filterv = annot$subclasses%in%"AML"&annot$CELLS_SORTED==0 +logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv) +names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv])) + +filterv = annot$subclasses%in%"AML"&annot$CELLS_SORTED==0 +logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv) +names(logicalv2)="Hemap_AML" +logicalv=append(logicalv2, logicalv) +logicalv=logicalv[lapply(logicalv, sum)>2] + +# HLA, cytolytic +DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore)) +aml_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +aml_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F)) + +DATA_clin=data.frame("Age"=annot$AGE, "Gender"=annot$GENDER) +aml_res_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F)) +aml_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = F, pretty=F)) + +DATA_hla=data.frame(t(data[hlagenes,])) +aml_res_HLA=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_hla,TIME,STATUS, univariate = T, pretty=F)) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +aml_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# MDS: +DATA=data.frame(scale(t(data[rownames(data)%in%c(MDS$MDS_signature_all_filt),]))) +aml_res_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# microenvironment +ME=microenv[microenv$disease%in%"AML",] +DATA=data.frame(scale(t(data[rownames(data)%in%ME$gene,]))) +aml_res_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +aml_res_microenvironment[aml_res_microenvironment$Adj.P<0.1,] + +# subtype: +load("Hemap_AML_subtypes.Rdata") +coordinates.subtype=coordinates.subtype[!is.na(coordinates.subtype$subtype),] + +samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type]) +DATA=data.frame(do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1) +colnames(DATA)=gsub("-", ".", unique(coordinates.subtype$subtype)) + +aml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(aml_res, aml_res_subtype, aml_res_costim, aml_res_MDS, aml_res_microenvironment, aml_res_clin) +tableS7=tableS7[tableS7$Name%in%c("Hemap_AML"),] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%unique(coordinates.subtype$subtype)]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%aml_res_MDS[,1]]="MDS-signature gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_AML.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_AML_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore)) + +samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type]) +subtypes=do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1 +colnames(subtypes)=gsub("-", ".", unique(coordinates.subtype$subtype)) +samp=data.frame(subtypes, "Age"=annot$AGE, "Gender"=annot$GENDER) + +save(list = c("gexp","immunoscore", "TIME", "STATUS", "logicalv", "samp"), file="Hemap_AML_survival_data.Rdata") + +#************************************* just MM ************************************* +filterv = annot$subclasses%in%"Cancer_Myeloma"&!is.na(annot$MM_ISS) +logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv) +names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv])) + +# just MM +filterv = annot$subclasses%in%"Cancer_Myeloma"&!is.na(annot$MM_ISS) +logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv) +names(logicalv2)="MM_all" +logicalv=append(logicalv2, logicalv) +names(logicalv)=c("Hemap_MM", "GSE19784_Hemap_MM", "GSE16716,GSE24080_Hemap_MM") + +data.test=data.frame("time"=TIME[logicalv[[1]]], "status"=STATUS[logicalv[[1]]]) + +ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), + xlab = "months", + ylab = "Overall survival probability") + + +DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "ISS3"=(annot$MM_ISS==3)*1, "ISS1"=(annot$MM_ISS==1)*1, "Freq.CGA"=as.numeric(df$Freq.CGA),"Age"=annot$AGE, "Gender"=annot$GENDER, check.names = F) + +mm_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +# mm_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,3:5],TIME,STATUS, univariate = F, pretty=F)) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +mm_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +DATA=data.frame(scale(t(data[rownames(data)%in%cga,]))) +mm_res_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# subtype: +load("Hemap_MM_subtypes.Rdata") + +samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type]) +DATA=data.frame(do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1) +colnames(DATA)=gsub("-", "_", unique(coordinates.subtype$subtype)) + +mm_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(mm_res, mm_res_costim, mm_res_antigen, mm_res_subtype) +tableS7=tableS7[tableS7$Name%in%c("GSE16716,GSE24080_Hemap_MM"),] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%gsub("-", "_", unique(coordinates.subtype$subtype))]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE24080.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE24080_signif.tsv", sep="\t") + + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(mm_res, mm_res_costim, mm_res_antigen) +tableS7=tableS7[tableS7$Name%in%c("GSE19784_Hemap_MM"),] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%gsub("-", "_", unique(coordinates.subtype$subtype))]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE19784.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_MM_GSE19784_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "Freq.CGA"=as.numeric(df$Freq.CGA), check.names = F) + +samples=lapply(unique(coordinates.subtype$subtype), function(type)coordinates.subtype$ID[coordinates.subtype$subtype%in%type]) +subtype=do.call(cbind, lapply(samples, function(id)annot$GSM.identifier..sample.%in%id))*1 +colnames(subtype)=c(gsub("-", ".", unique(coordinates.subtype$subtype)), colnames(subtype)) +samp=data.frame(subtype, "Age"=annot$AGE, "Gender"=annot$GENDER, "ISS3"=(annot$MM_ISS==3)*1, "ISS1"=(annot$MM_ISS==1)*1) + +save(list = c("gexp","immunoscore", "TIME", "STATUS", "logicalv", "samp"), file="Hemap_MM_survival_data.Rdata") + +#***************************** just DLBCL ************************************ +# remove genes not in chapyu dataset: +gexp=get(load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/GSE98588/GSE98588_symbol_rma_normalized.Rdata")) + +ME=microenv[microenv$disease%in%"DLBCL"µenv$category=="Stromal/cancer gene (Rho > 0)"µenv$Rho>0.4,] +microenv_dlbcl=ME[,1] +microenv_dlbcl=microenv_dlbcl[microenv_dlbcl%in%rownames(gexp)] + +filterv = annot$subclasses%in%"BCL_DLBCL"&!is.na(annot$dlbcl_ipi) +logicalv=get.logical(annovector = list(annot$GSE.identifier..experiment.), filterv = filterv) +names(logicalv)=unique(paste(names(logicalv), annot$subclasses[filterv])) + +filterv = annot$subclasses%in%"BCL_DLBCL" +logicalv2=get.logical(annovector = list(annot$subclasses), filterv = filterv) +names(logicalv2)="BCL_DLBCL_all" + +logicalv=append(logicalv2, logicalv) +logicalv=logicalv[lapply(logicalv, sum)>2] + +RCHOP=list(annot$Chemotherapy_RCHOP==1&!is.na(STATUS)&!TIME==0) +CHOP=list(annot$Chemotherapy_CHOP==1&!is.na(STATUS)&!TIME==0) +names(RCHOP)="Hemap_DLBCL_RCHOP" +names(CHOP)="Hemap_DLBCL_CHOP" + +logicalv <- append(logicalv, append(RCHOP, CHOP)) + +# HLA, cytolytic +DATA=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "CytolyticScore"=scale(annot$CytolyticScore), "IPI_0to1"=(annot$dlbcl_ipi%in%c(0,1))*1, "IPI_4to5"=(annot$dlbcl_ipi%in%c(4,5))*1, "Freq.CGA"=as.numeric(df$Freq.CGA), "ABC"=(grepl("ABC", annot$tbLY))*1, "GCB"=(grepl("GCB", annot$tbLY))*1,check.names = F) + +dlbcl_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +# dlbcl_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,4:6],TIME,STATUS, univariate = F, pretty=F)) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +dlbcl_res_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# microenv +DATA=data.frame(scale(t(data[rownames(data)%in%c(microenv_dlbcl),]))) +dlbcl_res_microenv=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +DATA=data.frame(scale(t(data[rownames(data)%in%cga,]))) +dlbcl_res_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(dlbcl_res, dlbcl_res_costim, dlbcl_res_microenv, dlbcl_res_antigen) +tableS7=tableS7[tableS7$Name%in%c("Hemap_DLBCL_RCHOP"),] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_RCHOP.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_RCHOP_signif.tsv", sep="\t") + + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(dlbcl_res, dlbcl_res_costim, dlbcl_res_microenv, dlbcl_res_antigen) +tableS7=tableS7[tableS7$Name%in%c("Hemap_DLBCL_CHOP"),] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_CHOP.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Hemap_DLBCL_CHOP_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(annot$HLAIScore), "HLAII"=scale(annot$HLAIIScore), "Freq.CGA"=as.numeric(df$Freq.CGA), check.names = F) + +samp=data.frame("Age"=annot$AGE, "Gender"=annot$GENDER, "IPI_0to1"=(annot$dlbcl_ipi%in%c(0,1))*1, "IPI_4to5"=(annot$dlbcl_ipi%in%c(4,5))*1,"ABC"=(grepl("ABC", annot$tbLY))*1, "GCB"=(grepl("GCB", annot$tbLY))*1) + +save(list = c("gexp","immunoscore","samp", "TIME", "STATUS", "logicalv"), file="Hemap_DLBCL_survival_data.Rdata") + + +#******************************************** TCGA AML ***************************************************** +fm_org=get(load("TCGA_AML_FM_DUFVA.Rdata")) +fm=fm_org[,!is.na(fm_org["N:SAMP:CytolyticScore",])] + +risks=c("N:CLIN:Age:::::", + "C:CLIN:acute_myeloid_leukemia_calgb_cytogenetics_risk_category:::::" , + "C:CLIN:FISH_test_component:::::", + "B:GNAB:NPM1:chr5:170814708:170837888:+:y_n_somatic", + "B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic", + "B:GNAB:CEBPA:chr19:33790840:33793430:-:y_n_somatic", + "B:GNAB:TP53:chr17:7565097:7590863:-:y_n_somatic") + +df=t(fm[rownames(fm)%in%risks,]) +colnames(df)=do.call(rbind, strsplit(colnames(df), ":"))[,3] + +data=data.matrix(fm[grepl("GEXP", rownames(fm)),]) +rownames(data)=make.unique(do.call(rbind, strsplit(rownames(data), ":"))[,3]) + + +OS=as.numeric(fm["N:CLIN:OS.months..3.31.12:::::",]) +TIME=OS +STATUS=as.numeric(fm["C:CLIN:vital_status_TCGA_paper:::::",]=="DECEASED") +PFS=as.numeric(fm["N:CLIN:EFS.months....4.30.13:::::",]) + +DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",]))) + +logicalv=list("TCGA_AML"=rep(T, dim(DATA)[1])) + +TCGA_AML_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +# TCGA_AML_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F)) + +# clinical +DATA_clin=data.frame("Age"=scale(as.numeric(fm["N:CLIN:Age:::::",])), "Blast.percentage"=scale(as.numeric(fm["N:CLIN:X.BM.Blast:::::",])), "Gender"=as.character(fm["C:CLIN:Sex:::::",])) +TCGA_AML_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F)) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +TCGA_AML_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +DATA_hla=data.frame(scale(t(data[rownames(data)%in%hlagenes,]))) +TCGA_AML_HLA=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_hla,TIME,STATUS, univariate = T, pretty=F)) + +# MDS +DATA=data.frame(scale(t(data[rownames(data)%in%c(MDS$MDS_signature_all_filt),]))) +TCGA_AML_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# mikroenvironment +ME=microenv[microenv$disease%in%"AML",] +DATA=data.frame(scale(t(data[rownames(data)%in%ME$gene,]))) +TCGA_res_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +TCGA_res_microenvironment[TCGA_res_microenvironment$Adj.P<0.1,] + +# subtype: +load("TCGA_AML_subtypes.Rdata") + +DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1) + +TCGA_aml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(TCGA_AML_res, TCGA_AML_costim, TCGA_AML_MDS, TCGA_res_microenvironment, TCGA_aml_res_subtype, TCGA_AML_clin) +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%unique(coordinates.subtype$subtype)]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%aml_res_MDS[,1]]="MDS-signature gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_TCGA_AML.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_TCGA_AML_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",]))) + +samp=cbind(DATA_clin, DATA_subtypes) + +save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="TCGA_AML_survival_data.Rdata") + +#********************************************************* Compass MM ********************************************************* +fm=get(load("MM_COMPASS_FM.Rdata")) +annot=get(load("MM_COMPASS_ANNOT.Rdata")) + +annot=annot[match(colnames(fm), rownames(annot)),] + +data=fm[grepl("N:GEXP:", rownames(fm)),] +rownames(data)=gsub("N:GEXP:", "", rownames(data)) + +data.mut=fm[grepl("B:GNAB:", rownames(fm)),] +rownames(data.mut)=gsub("B:GNAB:", "", rownames(data.mut)) +data.mut.filt=data.mut[!rowSums(data.mut, na.rm = T)<10,] + +TIME=as.numeric(fm["N:CLIN:OS",]) +STATUS=as.numeric(fm["B:CLIN:STATUS",]) + +STATUS[TIME>1825&!is.na(STATUS)&STATUS==1]=0 # change to 4year survival, 5 year sharp drop +TIME[TIME>1825]=1825 + +# compute antigen scores +t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T) +t.df=t.df[order(t.df[,3]),] +genelist=t.df[,1] + +data.test=data.frame("time"=TIME[logicalv[[1]]]*0.0328767, "status"=STATUS[logicalv[[1]]]) + +ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), + xlab = "months", + ylab = "Overall survival probability") + +expressed_testis_num=as.numeric(fm["N:SAMP:nCGA",]) + +l.regulon.gene=regulon.feats(fm, co.stim[,1]) + +hlagenes=c("B2M", "HLA-A", "HLA-B", "HLA-C", "HLA-DMA", "HLA-DMB", "HLA-DPA1", "HLA-DPB1", "HLA-DRA", "HLA-DRB1", "CIITA") + +DATAcostim=scale(data.frame(t(gexp[rownames(gexp)%in%co.stim[,1],]))) +DATAhla=scale(data.frame(t(gexp[rownames(gexp)%in%hlagenes,]))) +DATAcga=scale(data.frame(t(gexp[rownames(gexp)%in%t.df[,1],]))) + +DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "ISS1"=as.numeric(fm["B:CLIN:R_ISS_1",]),"ISS2"=as.numeric(fm["B:CLIN:R_ISS_2",]),"ISS3"=as.numeric(fm["B:CLIN:R_ISS_3",]), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F) + +logicalv=list(!is.na(expressed_testis_num)) + +names(logicalv)="CoMMPass" + +# plot overall survival +r=lapply(seq(logicalv), function(i){ + data.test=data.frame("time"=TIME[logicalv[[i]]], "status"=STATUS[logicalv[[i]]]) + + ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), + xlab = "months", + ylab = "Overall survival probability",title=(names(logicalv)[i]) + ) +}) + +names(r)=names(logicalv) + +pdf("CoMMpass_MM_cohorts.pdf", width =5, height = ceiling(length(r)/2)*2.75) +plots.together=arrange_ggsurvplots(r, print = TRUE, ncol = 2, nrow = ceiling(length(r)/2)) +dev.off() + +commpass_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +commpass_res_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F)) + +# costim, no mut +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +commpass_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# mutations +mut=t(data.mut.filt[rownames(data.mut.filt)%in%c(t.df[,1]),]) +colnames(mut)=paste0("MUT:", colnames(mut)) +DATA=data.frame(scale(t(data[rownames(data)%in%c(t.df[,1]),])), mut) + +commpass_antig=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# subtype: +load("CoMMpass_MM_subtypes.Rdata") +coordinates.subtype=coordinates.subtype[match(colnames(fm), coordinates.subtype$ID),] + +coordinates.subtype$subtype[coordinates.subtype$cluster=="CGA_Prolif"&!is.na(coordinates.subtype$cluster)]="CGA_Prolif" +DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1) +commpass_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(commpass_res, commpass_costim, commpass_antig, commpass_subtype) +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +# annotate these genes, needed later: +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[genelist_signif[,1]%in%commpass_subtype$Feature]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_CoMMpass.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_CoMMpass_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F) + +samp=cbind(data.frame("ISS1"=as.numeric(fm["B:CLIN:R_ISS_1",]),"ISS2"=as.numeric(fm["B:CLIN:R_ISS_2",]),"ISS3"=as.numeric(fm["B:CLIN:R_ISS_3",]), stringsAsFactors = F), DATA_subtypes) + +save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="CoMMpass_survival_data.Rdata") + +#********************************************************* Chapyu DLBCL ********************************************************* + +fm=get(load("GSE98588_fm.Rdata")) +annot=get(load("GSE98588_annot.Rdata")) + +TIME=as.numeric(fm["N:CLIN:OS",]) +PFS=as.numeric(fm["N:CLIN:PFS",]) +STATUS=as.numeric(fm["B:CLIN:OS_STAT",]) +STATUS2=as.numeric(fm["B:CLIN:PFS_STAT",]) + +# compute antigen scores +genelist=t.df[,1] + +data=get(load("GSE98588_symbol_rma_normalized.Rdata")) +colnames(data)=gsub("_DLBCL", "", colnames(data)) +gexp=data + +load("GSE98588_DLBCL_mixtureM_profile.Rdata") +profile[profile==-1] = 0 +profile[data.matrix(data)<5]=0 + +expressed_testis_num=as.numeric(colSums(profile[rownames(profile)%in%genelist,])) + +cnv_annot=fread("41591_2018_16_MOESM8_ESM_CNV_ANNOT.txt", data.table=F) + +l.regulon.gene=regulon.feats(fm, c(co.stim[,1], hlagenes), cnv_annot) + +ME=microenv[microenv$disease%in%"DLBCL"µenv$category=="Stromal/cancer gene (Rho > 0)"µenv$Rho>0.4,] + +# all costim-cytolytic-CGA correlated mutations: +costim_feats=data.matrix(fm[rownames(fm)%in%unlist(l.regulon.gene),]) +rownames(costim_feats)=sapply(rownames(costim_feats), function(n){ + if(!grepl("CNVR", n))return(n) + a=names(l.regulon.gene)[sapply(l.regulon.gene, function(a)any(a%in%n))] + if(length(a))paste0(n,"@", paste(a, collapse=",")) +}) + +DATAmut=data.frame(t(costim_feats[grepl("GNAB|CNVR", rownames(costim_feats)),])) + +DATA=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F) + +logicalv=list(rep(T, dim(DATA)[1])) +names(logicalv)="GSE98588_DLBCL" + +GSE98588_DLBCL_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +# GSE98588_DLBCL_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA[,4:6],TIME,STATUS, univariate = F, pretty=F)) + +# Clinical +DATA_clin=data.frame("IPI_0to1"=annot$IPI%in%c(0,1)*1,"IPI_4to5"=annot$IPI%in%c(4,5)*1, "ABC"=(annot$COO_byGEP=="ABC")*1 ,"GCB"=(annot$COO_byGEP=="GCB")*1, stringsAsFactors = F) +GSE98588_DLBCL_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F)) + +# immuno-editing: +GSE98588_DLBCL_immunoediting=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATAmut,TIME,STATUS, univariate = T, pretty=F)) +GSE98588_DLBCL_immunoediting$Feature=gsub("B.GNAB.", "MUT:", GSE98588_DLBCL_immunoediting$Feature) +GSE98588_DLBCL_immunoediting$Feature=gsub("HLA.", "HLA-", GSE98588_DLBCL_immunoediting$Feature) +GSE98588_DLBCL_immunoediting$Feature=gsub("B.CNVR.", "", GSE98588_DLBCL_immunoediting$Feature) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),])), DATAmut, "costim.mut"=(rowSums(DATAmut))) +GSE98588_DLBCL_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# monocyte +DATA=data.frame(scale(t(data[rownames(data)%in%c(ME[,1]),]))) +GSE98588_DLBCL_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +DATA=data.frame(scale(t(data[rownames(data)%in%c(t.df[,1]),]))) +GSE98588_DLBCL_antigen=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(GSE98588_DLBCL_res,GSE98588_DLBCL_clin, GSE98588_DLBCL_costim, GSE98588_DLBCL_microenvironment, GSE98588_DLBCL_antigen, GSE98588_DLBCL_immunoediting) +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%colnames(DATAmut)]="Immune Editing mutation" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_GSE98588_DLBCL.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_GSE98588_DLBCL_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAI"=scale(as.numeric(fm["N:SAMP:HLAIScore",])), "HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), "Freq.CGA"=as.numeric(expressed_testis_num), stringsAsFactors = F) + +samp=DATA_clin + +save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="GSE98588_DLBCL_survival_data.Rdata") + +#********************************************************* Reddy DLBCL ********************************************************* +fm=get(load("REDDY_DLBCL_fm.Rdata")) +annot=get(load("REDDY_DLBCL_annot.Rdata")) + +data=data.matrix(fm[grepl("GEXP", rownames(fm)),]) +rownames(data)=gsub("N:GEXP:", "", rownames(data)) + +TIME=as.numeric(annot$Overall.Survival.years) +STATUS=(as.numeric(annot$Censored)==0)*1 # 0 indicates no censoring, meaning that the death was observed, whereas a 1 indicates that the patient was alive + +ME=microenv[microenv$disease%in%"DLBCL"µenv$category=="Stromal/cancer gene (Rho > 0)"µenv$Rho>0.4,] + +l.regulon.gene=regulon.feats(fm, c(co.stim[,1], hlagenes)) + +# all costim-cytolytic-CGA correlated mutations: +costim_feats=data.matrix(fm[rownames(fm)%in%unlist(l.regulon.gene),]) +rownames(costim_feats)=sapply(rownames(costim_feats), function(n){ + if(!grepl("CNVR", n))return(n) + a=names(l.regulon.gene)[sapply(l.regulon.gene, function(a)any(a%in%n))] + # if(length(a))paste0(n,"@", paste(a, collapse=",")) +}) + +DATAmut=data.frame(t(costim_feats[grepl("GNAB|CNVR", rownames(costim_feats)),])) + +DATA=data.frame("HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), stringsAsFactors = F) + +logicalv=list("Reddy_DLBCL"=!is.na(data[1,]), "Reddy_DLBCL_ABC"=!is.na(data[1,])&annot$ABC.GCB..RNAseq.=="ABC", "Reddy_DLBCL_GCB"=!is.na(data[1,])&annot$ABC.GCB..RNAseq.=="GCB") + +Reddy_DLBCL_res=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +Reddy_DLBCL_multivar=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = F, pretty=F)) + +# Clinical +DATA_clin=data.frame("IPI_0to1"=annot$IPI%in%c(0,1)*1,"IPI_4to5"=annot$IPI%in%c(4,5)*1, "ABC"=(annot$ABC.GCB..RNAseq.=="ABC")*1 ,"GCB"=(annot$ABC.GCB..RNAseq.=="GCB")*1, stringsAsFactors = F) +Reddy_DLBCL_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F)) + +# immuno-editing: +Reddy_DLBCL_immunoediting=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATAmut,TIME,STATUS, univariate = T, pretty=F)) +Reddy_DLBCL_immunoediting$Feature=gsub("B.GNAB.", "MUT:", Reddy_DLBCL_immunoediting$Feature) +Reddy_DLBCL_immunoediting$Feature=gsub("HLA.", "HLA-", Reddy_DLBCL_immunoediting$Feature) +Reddy_DLBCL_immunoediting$Feature=gsub("N.CNVR.", "", Reddy_DLBCL_immunoediting$Feature) + +# costim +DATA=data.frame(scale(t(data[rownames(data)%in%c(co.stim[,1]),]))) +Reddy_DLBCL_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# microenvironment +DATA=data.frame(scale(t(data[rownames(data)%in%c(ME[,1]),]))) +Reddy_DLBCL_microenvironment=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(Reddy_DLBCL_res,Reddy_DLBCL_clin[!is.na(Reddy_DLBCL_clin$`exp(coef)`),], Reddy_DLBCL_immunoediting, Reddy_DLBCL_costim, Reddy_DLBCL_microenvironment) +tableS7=tableS7[tableS7$Name%in%"Reddy_DLBCL",] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="Clinical", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%Reddy_DLBCL_immunoediting$Feature]="Immune Editing mutation" +genelist_signif$type[genelist_signif[,1]%in%cga]="CGA" +genelist_signif$type[tableS7$Feature%in%c("ABC", "GCB")]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Reddy_DLBCL.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_Reddy_DLBCL_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(data))) +immunoscore=data.frame("HLAII"=scale(as.numeric(fm["N:SAMP:HLAIIScore",])), "CytolyticScore"=scale(as.numeric(fm["N:SAMP:CytolyticScore",])), stringsAsFactors = F) + +samp=DATA_clin + +save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="Reddy_DLBCL_survival_data.Rdata") + + +#************************************ beatAML: +load("BeatAML_fm.Rdata") +annot=get(load("BeatAML_fm_annot.Rdata")) + +gexp=fm[grepl("GEXP", rownames(fm)),] +rownames(gexp)=gsub("N:GEXP:", "", rownames(gexp)) + +annot$vitalStatus[annot$vitalStatus=="Unknown"]=NA +TIME=as.numeric(annot$overallSurvival) +STATUS=as.numeric(annot$vitalStatus=="Dead") + +STATUS[TIME>1825&!is.na(STATUS)&STATUS==1]=0 # change to 5year survival +TIME[TIME>1825]=1825 +TIME[TIME==0]=0.1 + +filtv=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(TIME)|is.na(STATUS)|is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") +filtv=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") +filtv2=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("CMP-like","MDS-like","Monocyte-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") +filtv3=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("MDS-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") +filtv4=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("CMP-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") +filtv5=annot$specimenType%in%"Bone Marrow Aspirate"&!(is.na(annot$TCGA_coord))&annot$TCGA_coord%in%c("Monocyte-like")&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown") + +logicalv=list("BeatAML"=!(is.na(TIME)|is.na(STATUS)|is.na(annot$TCGA_coord))&annot$causeOfDeath%in%c("Alive", "Dead-Disease", "Dead-Unknown"), "beatAML_BMonly"=filtv, "normal_karyotype"=filtv2, "MDS-like"=filtv3, "CMP-like"=filtv4, "Monocyte-like"=filtv5) + +# plot overall survival +r=lapply(seq(logicalv), function(i){ + ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.frame("time"=TIME[logicalv[[i]]], "status"=STATUS[logicalv[[i]]])), + xlab = "months", + ylab = "Overall survival probability",title=(names(logicalv)[i]) + ) +}) + +names(r)=names(logicalv) + +pdf("BeatAML_cohorts.pdf", width =5, height = ceiling(length(r)/2)*2.75) +plots.together=arrange_ggsurvplots(r, print = TRUE, ncol = 2, nrow = ceiling(length(r)/2)) +dev.off() + +data.test=data.frame("time"=TIME[logicalv[[1]]]*0.0328767, "status"=STATUS[logicalv[[1]]]) + +ggsurvplot(survfit(Surv(time, status) ~ 1, data = data.test), + xlab = "months", + ylab = "Overall survival probability") + +# HLA, cytolytic +DATA=data.frame(scale(t(fm[grepl("N:SAMP:.*.Score", rownames(fm)),])), check.names = F) +colnames(DATA)=c("HLAI", "HLAII", "CytolyticScore") +beatAML=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) + +# Clinical: +DATA_clin=data.frame(t(fm[grepl("ELN2017", rownames(fm)),]),"Age"=as.numeric(fm[grepl("ageAtDiagnosis", rownames(fm)),,drop=F]), t(fm[grepl("BM_Transplant", rownames(fm)),,drop=F]),t(fm[grepl("B:CLIN:priorMDS_TRUE", rownames(fm)),,drop=F]), t(fm[grepl("in_PB|in_BM|B:CLIN:is|finalFusion_Complex", rownames(fm)),,drop=F])) +DATA_clin$Age[is.na(DATA_clin$Age)]=median(DATA_clin$Age, na.rm = T) #one observation --> set to median + +colnames(DATA_clin)=gsub("..CLIN.", "", colnames(DATA_clin)) +beatAML_clin=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_clin,TIME,STATUS, univariate = T, pretty=F)) + +# stroma +ME=microenv[microenv$disease=="AML",] +DATA=data.frame(t(gexp[rownames(gexp)%in%ME$gene,])) +beatAML_microenv=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +beatAML_microenv[beatAML_microenv$P<0.01,] + +# MDS +DATA=data.frame(t(gexp[rownames(gexp)%in%MDS$MDS_signature_all_filt,])) +beatAML_MDS=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +beatAML_MDS[beatAML_MDS$P<0.05,] + +# costim +DATA=data.frame(t(gexp[rownames(gexp)%in%co.stim[,1],])) +beatAML_costim=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA,TIME,STATUS, univariate = T, pretty=F)) +beatAML_costim[beatAML_costim$P<0.05,] + +# subtype: +load("BeatAML_subtypes.Rdata") +coordinates.subtype$subtype[coordinates.subtype$subtype%in%"Progenitor-like"]="CMP-like" +coordinates.subtype=coordinates.subtype[match(colnames(fm), coordinates.subtype$ID),] + +DATA_subtypes=data.frame(do.call(cbind, get.logical(list(coordinates.subtype$subtype)))*1) + +beataml_res_subtype=do.call(rbind, lapply(seq(logicalv), fun.get.cox, logicalv, DATA_subtypes,TIME,STATUS, univariate = T, pretty=F)) + +fun.kapplanMeier(TIME[logicalv[[1]]], STATUS[logicalv[[1]]],GROUPS=coordinates.subtype$subtype[logicalv[[1]]], MONTHS=T, PVAL=1, INDIVIDUAL_GROUPS=F,LWD = 1, NAME = "Prognostic Index - validation") + +# make table S6, adjusted p-value set here to correct for number of comparisons in total: +tableS7=rbind(beatAML, beataml_res_subtype, beatAML_costim, beatAML_MDS, beatAML_microenv) +tableS7=tableS7[tableS7$Name%in%"BeatAML",] +tableS7$Adj.P=p.adjust(tableS7$P, method="BH") + +tableS7[,2]=prettyNum(tableS7[,2]) +tableS7[,3]=prettyNum(tableS7[,3]) +tableS7[,4]=prettyNum(tableS7[,4]) +tableS7[,5]=prettyNum(tableS7[,5]) +tableS7[,6]=prettyNum(tableS7[,6]) +tableS7[,8]=prettyNum(tableS7[,8]) + +# annotate these genes, needed later: +genelist_signif=data.frame(tableS7[,1], type="", stringsAsFactors = F) +genelist_signif$type[genelist_signif[,1]%in%c("HLAI", "HLAII", "CytolyticScore", "Freq.CGA")]="ImmunoScores" +genelist_signif$type[genelist_signif[,1]%in%beataml_res_subtype$Feature]="Subtype" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho > 0)",1]]="Stromal/cancer gene (Rho > 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"Stromal/cancer gene (Rho < 0)",1]]="Stromal/cancer gene (Rho < 0)" +genelist_signif$type[genelist_signif[,1]%in%ME[ME$category%in%"CTL/NK gene",1]]="CTL/NK gene" +genelist_signif$type[genelist_signif[,1]%in%MDS$MDS_signature_all_filt]="MDS-signature gene" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Inhibitory", co.stim$`Immune checkpoint function`),1]]="Inhibitory ligand" +genelist_signif$type[genelist_signif[,1]%in%co.stim[grepl("Stimulatory", co.stim$`Immune checkpoint function`),1]]="Stimulatory ligand" + +tableS7$Type=genelist_signif$type +tableS7=tableS7[order(tableS7$Type),] +tableS7[,1]=gsub("\\.", "-", tableS7[,1]) + +data.table::fwrite(tableS7[,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_beatAML.tsv", sep="\t") +data.table::fwrite(tableS7[tableS7$Adj.P<0.2,c(1,11,10,2,3,4,5,6,8,9)], "tableS7_beatAML_signif.tsv", sep="\t") + +# save cohort and survival +gexp=data.frame(scale(t(gexp))) +immunoscore=data.frame(scale(t(fm[grepl("N:SAMP:.*.Score", rownames(fm)),])), check.names = F) +colnames(immunoscore)=c("HLAI", "HLAII", "CytolyticScore") + +samp=cbind(DATA_clin, DATA_subtypes) + +save(list = c("gexp","immunoscore", "samp", "TIME", "STATUS", "logicalv"), file="BeatAML_survival_data.Rdata") + +#******************************** make excel table +files=list.files(pattern = "tableS7") +files=files[!grepl("_CHOP", files)] + +univariate.results=lapply(files[grepl("signif", files)], fread, data.table=F) +names(univariate.results)=gsub("tableS7_|_signif.tsv", "", files[grepl("signif", files)]) +univariate.results=univariate.results[c(grep("DLBCL", names(univariate.results)), grep("MM", names(univariate.results)), grep("AML", names(univariate.results)))] + +require(openxlsx) + +write.xlsx(univariate.results, file = "TableS7_cox.xlsx", )