[8e0848]: / FigS6G_CGA_GSEA_hemapMM.R

Download this file

169 lines (119 with data), 8.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))
library(GSVA)
library(parallel)
library(ComplexHeatmap)
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)
t.df=t.df[order(t.df[,3]),]
annot = get(load("Hemap_immunology_Annotations.Rdata"))
# gexp data
data=t(get(load("data9544_with_gene_symbols.RData")))
data=data[,colnames(data)%in%annot$GSM.identifier..sample.]
# data:
genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
# take MM
GSE=c("GSE16716,GSE24080")
# GSE=c("GSE19784")
gexp=data[,annot$GSE.identifier..experiment.%in%GSE]
annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
# GSE="All_samples"
# gexp=data[,annot$colorClass=="MM"]
# annot2=annot[annot$colorClass=="MM",]
# rank patients by number of testis antigens expressed
# profiles
profile=get(load("mixtureM_profile.Rdata"))
profile[profile==-1] = 0
profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
# take only high expressed into account
profile2[data.matrix(gexp)<5]=0
expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
feat_class=expressed_testis_num
feat_class[expressed_testis_num==0]="0_Antigens"
feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1to4_Antigens"
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5to6_Antigens"
feat_class[expressed_testis_num>=7]="over7_Antigens"
GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt"
WD=file.path(getwd(), "GSEA")
OUTDIR=file.path(getwd(), "GSEA")
# # run GSEA:
# command=run.GSEA(data=gexp, cls.vector = as.numeric(expressed_testis_num), datatype = "N", GENESETS = GENESETS, dataname = "MM", clsname = "Testis_antigen_groups", WD=WD, OUTDIR=OUTDIR)
# try(system(command))
#
# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAI", WD=WD, OUTDIR=OUTDIR)
# try(system(command))
#
# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAII", WD=WD, OUTDIR=OUTDIR)
# try(system(command))
a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_neg_1549532122926.xls"), stringsAsFactors = F)
a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_pos_1549532122926.xls"), stringsAsFactors = F)
# # GSE19784
# a=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_pos_1549542936287.xls", stringsAsFactors = F)
# b=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_neg_1549542936287.xls", stringsAsFactors = F)
c=rbind(a,b)
# get GSVA visualization for the pathways
# Geneset list
Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE)
# Make list
listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),])
B=A[!A==""&!A=="NA"]}, mc.cores=6)
names(listA) <- Onc.pathways[,1]
viz_scores=gsva(expr = data.matrix(gexp), gset.idx.list = listA, parallel.sz=8, tau=0.25)
# make a complex heatmap
significant=c(c[c$FWER.p.val<0.001,1])[1:10]
dat_plot=viz_scores[match(significant, toupper(rownames(viz_scores))),]
annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
df_anno=data.frame("cytogeneticAbnormalities"=as.double(annot2$MM_CYTOGENETIC_ABNORMALITIES),"age"=annot2$AGE, "iss"=as.character(annot2$MM_ISS), "iss1"=annot2$MM_ISS==1,"iss2"=annot2$MM_ISS==2,"iss3"=annot2$MM_ISS==3, "OS"=annot2$OS_Time, "PFS"=annot2$PFS_Time, "number.T.Antigenes"=as.numeric(expressed_testis_num), "antigen.group"=feat_class)
expressed_testis_num2=expressed_testis_num[order(expressed_testis_num)]
df_anno=df_anno[match(names(expressed_testis_num2), rownames(df_anno)),]
dat_plot=dat_plot[,match(names(expressed_testis_num2), colnames(dat_plot))]
annof=annof[match(names(expressed_testis_num2), colnames(gexp)),]
feat_class=feat_class[match(names(expressed_testis_num2), names(feat_class))]
gexp=gexp[,match(names(expressed_testis_num2), colnames(gexp))]
ha = HeatmapAnnotation(df=df_anno,NUM_TESTIS = anno_barplot(expressed_testis_num2, axis = T, gp = gpar(fill = "indianred")), HLA = anno_barplot(annof$HLAI, axis = T, gp = gpar(fill = "indianred")), HLAII = anno_barplot(annof$HLAII, axis = T, gp = gpar(fill = "darkgoldenrod"),ylim = c(5,10)))
ha2 = HeatmapAnnotation(df=data.frame(as.character(feat_class)), height = unit(20, "mm"))
pl=data.frame(df_anno$antigen.group, annof$HLAI)
logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
names(logicalVectors)=unique(feat_class)
rownames(dat_plot)=gsub("-.*.|_IN_CANCER_HOMO_SAPIENS-WIKIPW", "", rownames(dat_plot))
pdf("FigS6G_CGA_GSVA_GSE16716_GSE24080.pdf", height = 8, width = 8)
Heatmap(dat_plot, bottom_annotation = ha,use_raster = F, top_annotation=HeatmapAnnotation("# CGA"=anno_barplot(data.frame(expressed_testis_num2), height = unit(10, "mm"))), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="GSVA\nScore", col=colorRamp2(c(-0.5,0,0.5), c("grey50", "white", "red")), show_column_names = F, width = unit(40, "mm"), height = unit(1.25*20, "mm"))
Heatmap(t(annof$HLAII), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*1, "mm"))
a=t(scale(annof$HLAII))
a[a>2]=2
a[a<(-2)]=-2
Heatmap(a, cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*dim(a)[1], "mm"))
dev.off()
# verify HLA association
GSE=c("GSE16716,GSE24080")
# GSE=c("GSE19784")
# GSE=c("GSE15695,GSE21349", "GSE16716,GSE24080", "GSE17306", "GSE19784", "GSE2134")
annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
# gexp data
gexp=data[,colnames(data)%in%annot2$GSM.identifier..sample.]
t.df=t.df[order(t.df[,3]),]
# data:
genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
genelist=t.df[,1]
# rank patients by number of testis antigens expressed
# profiles
profile[profile==-1] = 0
profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
# take only high expressed into account
profile2[data.matrix(gexp)<5]=0
expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
feat_class=expressed_testis_num
feat_class[expressed_testis_num==0]="0 CGA"
feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1-4 CGA"
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5-6 CGA"
feat_class[expressed_testis_num>=7]=">7 CGA"
logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
names(logicalVectors)=unique(feat_class)
annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
genelist=c("HLAI", "HLAII")
p.all=lapply(genelist, plot.boxplot, logicalVectors = logicalVectors, data = t(annof), order = T,spread = F)
TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, logicalVector_normals = logicalVectors, ALTERNATIVE = "less")
TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, ALTERNATIVE = "less")
ggsave(plot = p.all[[2]], filename = "FigureS6G_HLAII_MM_hemap.pdf", width = unit(3.25, "cm"), height = unit(3, "cm"))