--- a +++ b/Fig7_univariate_survival_forestplot.R @@ -0,0 +1,147 @@ +GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/" +source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) +source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R")) +source(file.path(GIT_HOME, "common_scripts/statistics/statistics_wrappers.R")) +source(file.path(GIT_HOME, "common_scripts/featurematrix/compute.pairwise.R")) +source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R")) + +library(RColorBrewer) +library(survival) +library(data.table) +library(ggplot2) +library(ComplexHeatmap) + +setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures") + +files=list.files(pattern = "tableS7") +files=files[!grepl("_CHOP", files)] # CHOP treated DLBCL not shown, as it has very different survival to RCHOP + +univariate.results=lapply(files[grepl("signif", files)], fread, data.table=F) + +univariate.results.all=lapply(files[!grepl("signif", files)], fread, data.table=F) + +names(univariate.results)=files[grepl("signif", files)] +names(univariate.results.all)=files[!grepl("signif", files)] + +AML=do.call(rbind, univariate.results[grep("AML", names(univariate.results))]) +AML$Name=gsub("Cancer_Myeloma", "Hemap_MM", AML$Name) +AML$Name=gsub("_RCHOP", "", AML$Name) + +DLBCL=do.call(rbind, univariate.results[grep("DLBCL", names(univariate.results))]) +DLBCL$Name=gsub("Cancer_Myeloma", "Hemap_MM", DLBCL$Name) +DLBCL$Name=gsub("_RCHOP", "", DLBCL$Name) + +MM=do.call(rbind, univariate.results[grep("MM", names(univariate.results))]) +MM$Name=gsub("Cancer_Myeloma", "Hemap_MM", MM$Name) +MM$Name=gsub("_RCHOP", "", MM$Name) + +# all +pancan=do.call(rbind, univariate.results.all) +pancan$Name=gsub("Cancer_Myeloma", "Hemap_MM", pancan$Name) +pancan$Name=gsub("_RCHOP", "", pancan$Name) + +fun_forestplot=function(data, NAME="data", BOX=0.1,cex=2, colorv="black"){ + library(forestplot) + + txt.df=data.frame("Feature"=data$Feature, "Cohort"=gsub("_", " ", data$Name), "FDR"=signif(data$Adj.P, 2), stringsAsFactors = F) + txt.df$Feature[duplicated(txt.df$Feature)]="" + txt.df=rbind(c("Feature", "Cohort", "FDR"), txt.df) + + coef.df=data.frame("HR"=data$`exp(coef)`, "lower .95"=data$`lower .95`, "upper .95"=data$`upper .95`) + coef.df=rbind(c(NA, NA, NA), coef.df) + + xticks=seq(ifelse(min(data$`lower .95`)<0.5, 0, 0.5), min(c(5, max(data$`upper .95`))), by = 0.1) + + attr(xticks, "labels") = xticks%in%seq(-4, 4, by=0.5) + + forestplot(txt.df,coef.df, new_page = T, zero = c(0.98, 1.02), + clip =c(-1, 2), is.summary = c(T, rep(F, length(data$Name))), + xticks=xticks, + boxsize=BOX, + xlab="HR", + col=fpColors(box=colorv), + txt_gp = fpTxtGp(label = list(gpar(fontfamily = "Helvetica", cex=cex*1.25), + gpar(fontfamily = "Helvetica", col = "black", cex=cex)), + summary = gpar(fontfamily = "Helvetica", cex=cex*1.5), + ticks = gpar(fontfamily = "Helvetica", cex=cex), + xlab = gpar(fontfamily = "Helvetica", cex = cex*1.5))) + +} +cat=c("Subtype","ImmunoScores","Inhibitory ligand", "Stimulatory ligand", "Stromal/cancer gene (Rho > 0)", "Stromal/cancer gene (Rho < 0)","CTL/NK gene","Clinical", "CGA") +cols=data.frame("name"=c("Subtype","ImmunoScores","Inhibitory ligand", "Stimulatory ligand", "Stromal/cancer gene (Rho > 0)", "Stromal/cancer gene (Rho < 0)","CTL/NK gene","Clinical", "CGA", "MDS-signature gene"), + "color"=c("#acb839","#5e2883","#1f78b4","#b2df8a","#377eb8","grey50","#e41a1c","brown", "#d7a85b", "indianred"), stringsAsFactors = F) + +count.signif=table(AML$Feature) +signif.feats=names(count.signif)[count.signif>1] +AML.filt=AML[AML$Feature%in%signif.feats,] +AML.filt=AML.filt[order(match(AML.filt$Type, cat), AML.filt$Feature, AML.filt$Adj.P),] + +count.signif=table(MM$Feature) +signif.feats=names(count.signif)[count.signif>1] +MM.filt=MM[MM$Feature%in%signif.feats,] + +count.signif=table(DLBCL$Feature) +signif.feats=names(count.signif)[count.signif>1] +DLBCL.filt=DLBCL[DLBCL$Feature%in%signif.feats,] + +Fig7_AML=AML[AML$Feature%in%c("MDS-like", "CLEC2B", "Monocyte-like","C10orf54"),] +Fig7_MM=MM[MM$Feature%in%c("Freq-CGA", "MAGEA1", "MAGEB2"),] +Fig7_DLBCL=DLBCL[DLBCL$Feature%in%c("ABC","C1QA","C1QB","C1QC","CCL18", "CD163", "LYZ"),] +Fig7_pancan=pancan[pancan$Feature%in%c("CD274", "C10orf54", "CD58", "CD84"),] +Fig7_pancan=Fig7_pancan[Fig7_pancan$Adj.P<0.2,] + +all=rbind(Fig7_pancan) + +all=all[order(match(all$Feature, c("CD274", "C10orf54", "CD58", "CD84", "HLAII")), -all$`exp(coef)`),] + +col=cols[match(unique(all$Type), cols$name),2] +names(col)=unique(all$Type) + +pdf("FigS7B_univariate_pancan_forestPlot.pdf", height = 4, width = 5) +fun_forestplot(all, "AML, MM, DLBCL", BOX=0.5, cex=0.5, colorv ="black") +dev.off() + +# show even if not significant: +Fig7_AML=pancan[pancan$Feature%in%c("MDS-like", "CLEC2B", "Monocyte-like","C10orf54")&grepl("AML", pancan$Name),] +Fig7_MM=pancan[pancan$Feature%in%c("Freq-CGA", "MAGEA1", "MAGEB2")&grepl("MM", pancan$Name),] +Fig7_DLBCL=pancan[pancan$Feature%in%c("C1QA","C1QB","C1QC", "CCL18","CD163", "LYZ")&grepl("DLBCL", pancan$Name),] + +all=rbind(Fig7_AML, Fig7_MM, Fig7_DLBCL) + +all=all[order(match(all$Feature, c(c("MDS-like", "CLEC2B", "Monocyte-like","C10orf54"), c("Freq-CGA", "MAGEA1", "MAGEB2"), c("C1QA","C1QB","C1QC", "CCL18", "CD163", "LYZ"))), -all$`exp(coef)`),] + +col=cols[match(unique(all$Type), cols$name),2] +names(col)=unique(all$Type) + +pdf("Fig7A_univariate_forestPlot.pdf", height = 5, width = 5) +fun_forestplot(all, "AML, MM, DLBCL", BOX=0.5, cex=0.5, colorv ="black") +dev.off() + +# draw KM curve from a few examples: +load("Hemap_AML_survival_data.Rdata") + +pdf("Fig7B_KM_VISTA.pdf", width = 2, height = 2.5) +fun.kapplanMeier(TIME = TIME[logicalv[[1]]], STATUS = STATUS[logicalv[[1]]], CONTINUOUS = as.numeric(gexp[logicalv[[1]],"C10orf54"]), conf.int = F, MONTHS=F, PVAL=1,LWD = 0.5, CONTINUOUS_SUMMARY = "75th_25th_percentile", INDIVIDUAL_GROUPS=F, NAME = "VISTA") +dev.off() + +# draw KM curve from a few examples: +load("CoMMpass_survival_data.Rdata") + +pdf("FigS7A_CGA_CoMMpass.pdf", width = 2, height = 2.5) +fun.kapplanMeier(TIME = TIME[logicalv[[1]]], STATUS = STATUS[logicalv[[1]]], GROUPS = as.numeric(immunoscore[logicalv[[1]],"Freq.CGA"]>=7), conf.int = F, MONTHS=T, PVAL=1,LWD = 0.5, CONTINUOUS_SUMMARY = "75th_25th_percentile", INDIVIDUAL_GROUPS=F, NAME = "MAGEA1") +dev.off() + +# draw KM curve from a few examples: +load("Hemap_MM_survival_data.Rdata") + +pdf("FigS7A_CGA_Hemap.pdf", width = 2, height = 2.5) +fun.kapplanMeier(TIME = TIME[logicalv[[1]]], STATUS = STATUS[logicalv[[1]]], GROUPS = as.numeric(immunoscore[logicalv[[1]],"Freq.CGA"]>=7), conf.int = F, MONTHS=T, PVAL=1,LWD = 0.5, CONTINUOUS_SUMMARY = "75th_25th_percentile", INDIVIDUAL_GROUPS=F, NAME = "MAGEA1") +dev.off() + +# draw KM curve from a few examples: +load("Hemap_DLBCL_survival_data.Rdata") + +pdf("Fig7D_KM_CD163_LYZ.pdf", width = 2, height = 2.5) +fun.kapplanMeier(TIME = TIME[logicalv[[5]]], STATUS = STATUS[logicalv[[5]]], CONTINUOUS = as.numeric(gexp[logicalv[[5]],"CD163"]), conf.int = F, MONTHS=F, PVAL=1,LWD = 0.5, CONTINUOUS_SUMMARY = "75th_25th_percentile", INDIVIDUAL_GROUPS=F, NAME = "CD163") +fun.kapplanMeier(TIME = TIME[logicalv[[5]]], STATUS = STATUS[logicalv[[5]]], CONTINUOUS = as.numeric(gexp[logicalv[[5]],"LYZ"]), conf.int = F, MONTHS=F, PVAL=1,LWD = 0.5, CONTINUOUS_SUMMARY = "75th_25th_percentile", INDIVIDUAL_GROUPS=F, NAME = "LYZ") +dev.off() \ No newline at end of file