--- a
+++ b/FigS6G_CGA_GSEA_hemapMM.R
@@ -0,0 +1,168 @@
+GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
+source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
+source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
+source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))
+
+library(GSVA)
+library(parallel)
+library(ComplexHeatmap)
+
+setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
+
+t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)
+t.df=t.df[order(t.df[,3]),]
+
+annot = get(load("Hemap_immunology_Annotations.Rdata"))
+
+# gexp data
+data=t(get(load("data9544_with_gene_symbols.RData")))
+data=data[,colnames(data)%in%annot$GSM.identifier..sample.]
+  
+# data:
+genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
+
+# take MM
+GSE=c("GSE16716,GSE24080")
+# GSE=c("GSE19784")
+
+gexp=data[,annot$GSE.identifier..experiment.%in%GSE]
+annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
+
+# GSE="All_samples"
+# gexp=data[,annot$colorClass=="MM"]
+# annot2=annot[annot$colorClass=="MM",]
+
+
+# rank patients by number of testis antigens expressed
+# profiles
+profile=get(load("mixtureM_profile.Rdata"))
+profile[profile==-1] = 0
+profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
+
+# take only high expressed into account
+profile2[data.matrix(gexp)<5]=0
+
+expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
+feat_class=expressed_testis_num
+feat_class[expressed_testis_num==0]="0_Antigens"
+feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1to4_Antigens"
+feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5to6_Antigens"
+feat_class[expressed_testis_num>=7]="over7_Antigens"
+
+GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt"
+WD=file.path(getwd(), "GSEA")
+OUTDIR=file.path(getwd(), "GSEA")
+
+# # run GSEA:
+# command=run.GSEA(data=gexp, cls.vector = as.numeric(expressed_testis_num), datatype = "N", GENESETS = GENESETS, dataname = "MM", clsname = "Testis_antigen_groups", WD=WD, OUTDIR=OUTDIR)
+# try(system(command))
+# 
+# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAI", WD=WD, OUTDIR=OUTDIR)
+# try(system(command))
+# 
+# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAII", WD=WD, OUTDIR=OUTDIR)
+# try(system(command))
+
+a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_neg_1549532122926.xls"), stringsAsFactors = F)
+a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_pos_1549532122926.xls"), stringsAsFactors = F)
+
+# # GSE19784
+# a=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_pos_1549542936287.xls", stringsAsFactors = F)
+# b=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_neg_1549542936287.xls", stringsAsFactors = F)
+ 
+c=rbind(a,b)
+
+# get GSVA visualization for the pathways
+# Geneset list
+Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE)
+
+# Make list
+listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),])
+B=A[!A==""&!A=="NA"]}, mc.cores=6)
+
+names(listA) <- Onc.pathways[,1]
+
+viz_scores=gsva(expr = data.matrix(gexp), gset.idx.list = listA, parallel.sz=8, tau=0.25)
+
+# make a complex heatmap
+significant=c(c[c$FWER.p.val<0.001,1])[1:10]
+dat_plot=viz_scores[match(significant, toupper(rownames(viz_scores))),]
+
+annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
+
+df_anno=data.frame("cytogeneticAbnormalities"=as.double(annot2$MM_CYTOGENETIC_ABNORMALITIES),"age"=annot2$AGE, "iss"=as.character(annot2$MM_ISS), "iss1"=annot2$MM_ISS==1,"iss2"=annot2$MM_ISS==2,"iss3"=annot2$MM_ISS==3, "OS"=annot2$OS_Time, "PFS"=annot2$PFS_Time, "number.T.Antigenes"=as.numeric(expressed_testis_num), "antigen.group"=feat_class)
+
+expressed_testis_num2=expressed_testis_num[order(expressed_testis_num)]
+
+df_anno=df_anno[match(names(expressed_testis_num2), rownames(df_anno)),]
+dat_plot=dat_plot[,match(names(expressed_testis_num2), colnames(dat_plot))]
+annof=annof[match(names(expressed_testis_num2), colnames(gexp)),]
+feat_class=feat_class[match(names(expressed_testis_num2), names(feat_class))]
+gexp=gexp[,match(names(expressed_testis_num2), colnames(gexp))]
+
+ha = HeatmapAnnotation(df=df_anno,NUM_TESTIS = anno_barplot(expressed_testis_num2, axis = T, gp = gpar(fill = "indianred")), HLA = anno_barplot(annof$HLAI, axis = T, gp = gpar(fill = "indianred")), HLAII = anno_barplot(annof$HLAII, axis = T, gp = gpar(fill = "darkgoldenrod"),ylim = c(5,10)))
+ha2 = HeatmapAnnotation(df=data.frame(as.character(feat_class)), height = unit(20, "mm"))
+
+pl=data.frame(df_anno$antigen.group, annof$HLAI)
+
+logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
+names(logicalVectors)=unique(feat_class)
+
+rownames(dat_plot)=gsub("-.*.|_IN_CANCER_HOMO_SAPIENS-WIKIPW", "", rownames(dat_plot))
+
+pdf("FigS6G_CGA_GSVA_GSE16716_GSE24080.pdf", height = 8, width = 8)
+Heatmap(dat_plot, bottom_annotation = ha,use_raster = F, top_annotation=HeatmapAnnotation("# CGA"=anno_barplot(data.frame(expressed_testis_num2), height = unit(10, "mm"))), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="GSVA\nScore", col=colorRamp2(c(-0.5,0,0.5), c("grey50", "white", "red")), show_column_names = F, width = unit(40, "mm"), height = unit(1.25*20, "mm"))
+Heatmap(t(annof$HLAII), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*1, "mm"))
+
+a=t(scale(annof$HLAII))
+a[a>2]=2
+a[a<(-2)]=-2
+
+Heatmap(a, cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*dim(a)[1], "mm"))
+dev.off()
+
+
+# verify HLA association
+
+GSE=c("GSE16716,GSE24080")
+# GSE=c("GSE19784")
+# GSE=c("GSE15695,GSE21349", "GSE16716,GSE24080", "GSE17306", "GSE19784", "GSE2134")
+
+annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
+
+# gexp data
+gexp=data[,colnames(data)%in%annot2$GSM.identifier..sample.]
+
+t.df=t.df[order(t.df[,3]),]
+
+# data:
+genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
+genelist=t.df[,1]
+
+# rank patients by number of testis antigens expressed
+# profiles
+profile[profile==-1] = 0
+profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
+
+# take only high expressed into account
+profile2[data.matrix(gexp)<5]=0
+
+expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
+feat_class=expressed_testis_num
+feat_class[expressed_testis_num==0]="0 CGA"
+feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1-4 CGA"
+feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5-6 CGA"
+feat_class[expressed_testis_num>=7]=">7 CGA"
+
+logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
+names(logicalVectors)=unique(feat_class)
+
+annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
+
+genelist=c("HLAI", "HLAII")
+p.all=lapply(genelist, plot.boxplot, logicalVectors = logicalVectors, data = t(annof), order = T,spread = F)
+
+TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, logicalVector_normals = logicalVectors, ALTERNATIVE = "less")
+TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, ALTERNATIVE = "less")
+
+ggsave(plot = p.all[[2]], filename = "FigureS6G_HLAII_MM_hemap.pdf", width = unit(3.25, "cm"), height = unit(3, "cm"))