Diff of /FigS6G_CGA_GSEA_hemapMM.R [000000] .. [8e0848]

Switch to unified view

a b/FigS6G_CGA_GSEA_hemapMM.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
3
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
4
source(file.path(GIT_HOME, "common_scripts/pathway_analysis/functions.GSEA.R"))
5
6
library(GSVA)
7
library(parallel)
8
library(ComplexHeatmap)
9
10
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
11
12
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)
13
t.df=t.df[order(t.df[,3]),]
14
15
annot = get(load("Hemap_immunology_Annotations.Rdata"))
16
17
# gexp data
18
data=t(get(load("data9544_with_gene_symbols.RData")))
19
data=data[,colnames(data)%in%annot$GSM.identifier..sample.]
20
  
21
# data:
22
genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
23
24
# take MM
25
GSE=c("GSE16716,GSE24080")
26
# GSE=c("GSE19784")
27
28
gexp=data[,annot$GSE.identifier..experiment.%in%GSE]
29
annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
30
31
# GSE="All_samples"
32
# gexp=data[,annot$colorClass=="MM"]
33
# annot2=annot[annot$colorClass=="MM",]
34
35
36
# rank patients by number of testis antigens expressed
37
# profiles
38
profile=get(load("mixtureM_profile.Rdata"))
39
profile[profile==-1] = 0
40
profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
41
42
# take only high expressed into account
43
profile2[data.matrix(gexp)<5]=0
44
45
expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
46
feat_class=expressed_testis_num
47
feat_class[expressed_testis_num==0]="0_Antigens"
48
feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1to4_Antigens"
49
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5to6_Antigens"
50
feat_class[expressed_testis_num>=7]="over7_Antigens"
51
52
GENESETS="Combined_pathway_signatures_2017_filtered_robust.gmt"
53
WD=file.path(getwd(), "GSEA")
54
OUTDIR=file.path(getwd(), "GSEA")
55
56
# # run GSEA:
57
# command=run.GSEA(data=gexp, cls.vector = as.numeric(expressed_testis_num), datatype = "N", GENESETS = GENESETS, dataname = "MM", clsname = "Testis_antigen_groups", WD=WD, OUTDIR=OUTDIR)
58
# try(system(command))
59
# 
60
# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAI", WD=WD, OUTDIR=OUTDIR)
61
# try(system(command))
62
# 
63
# command=run.GSEA(data=gexp, cls.vector = as.numeric(annot2$HLAIIScore), datatype = "N", GENESETS = GENESETS, dataname = paste0("MM_", GSE[1]), clsname = "HLAII", WD=WD, OUTDIR=OUTDIR)
64
# try(system(command))
65
66
a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_neg_1549532122926.xls"), stringsAsFactors = F)
67
a=read.delim(file.path(WD, "MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549532122926/gsea_report_for_feat_pos_1549532122926.xls"), stringsAsFactors = F)
68
69
# # GSE19784
70
# a=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_pos_1549542936287.xls", stringsAsFactors = F)
71
# b=read.delim("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/MM_Testis_antigen_groups_continuous_phenotype.Gsea.1549542936287/gsea_report_for_feat_neg_1549542936287.xls", stringsAsFactors = F)
72
 
73
c=rbind(a,b)
74
75
# get GSVA visualization for the pathways
76
# Geneset list
77
Onc.pathways=read.delim(GENESETS, stringsAsFactors = FALSE, header=F, col.names = paste("V",1:max(count.fields(GENESETS, sep = '\t'), na.rm = T)), fill = TRUE)
78
79
# Make list
80
listA=mclapply(1:length(Onc.pathways[,1]), function(i){A=as.character(Onc.pathways[i,3:length(Onc.pathways),])
81
B=A[!A==""&!A=="NA"]}, mc.cores=6)
82
83
names(listA) <- Onc.pathways[,1]
84
85
viz_scores=gsva(expr = data.matrix(gexp), gset.idx.list = listA, parallel.sz=8, tau=0.25)
86
87
# make a complex heatmap
88
significant=c(c[c$FWER.p.val<0.001,1])[1:10]
89
dat_plot=viz_scores[match(significant, toupper(rownames(viz_scores))),]
90
91
annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
92
93
df_anno=data.frame("cytogeneticAbnormalities"=as.double(annot2$MM_CYTOGENETIC_ABNORMALITIES),"age"=annot2$AGE, "iss"=as.character(annot2$MM_ISS), "iss1"=annot2$MM_ISS==1,"iss2"=annot2$MM_ISS==2,"iss3"=annot2$MM_ISS==3, "OS"=annot2$OS_Time, "PFS"=annot2$PFS_Time, "number.T.Antigenes"=as.numeric(expressed_testis_num), "antigen.group"=feat_class)
94
95
expressed_testis_num2=expressed_testis_num[order(expressed_testis_num)]
96
97
df_anno=df_anno[match(names(expressed_testis_num2), rownames(df_anno)),]
98
dat_plot=dat_plot[,match(names(expressed_testis_num2), colnames(dat_plot))]
99
annof=annof[match(names(expressed_testis_num2), colnames(gexp)),]
100
feat_class=feat_class[match(names(expressed_testis_num2), names(feat_class))]
101
gexp=gexp[,match(names(expressed_testis_num2), colnames(gexp))]
102
103
ha = HeatmapAnnotation(df=df_anno,NUM_TESTIS = anno_barplot(expressed_testis_num2, axis = T, gp = gpar(fill = "indianred")), HLA = anno_barplot(annof$HLAI, axis = T, gp = gpar(fill = "indianred")), HLAII = anno_barplot(annof$HLAII, axis = T, gp = gpar(fill = "darkgoldenrod"),ylim = c(5,10)))
104
ha2 = HeatmapAnnotation(df=data.frame(as.character(feat_class)), height = unit(20, "mm"))
105
106
pl=data.frame(df_anno$antigen.group, annof$HLAI)
107
108
logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
109
names(logicalVectors)=unique(feat_class)
110
111
rownames(dat_plot)=gsub("-.*.|_IN_CANCER_HOMO_SAPIENS-WIKIPW", "", rownames(dat_plot))
112
113
pdf("FigS6G_CGA_GSVA_GSE16716_GSE24080.pdf", height = 8, width = 8)
114
Heatmap(dat_plot, bottom_annotation = ha,use_raster = F, top_annotation=HeatmapAnnotation("# CGA"=anno_barplot(data.frame(expressed_testis_num2), height = unit(10, "mm"))), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="GSVA\nScore", col=colorRamp2(c(-0.5,0,0.5), c("grey50", "white", "red")), show_column_names = F, width = unit(40, "mm"), height = unit(1.25*20, "mm"))
115
Heatmap(t(annof$HLAII), cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*1, "mm"))
116
117
a=t(scale(annof$HLAII))
118
a[a>2]=2
119
a[a<(-2)]=-2
120
121
Heatmap(a, cluster_columns = F, cluster_rows = F, row_names_side = "right", column_names_gp = gpar(fontsize = 8), row_names_gp = gpar(fontsize = 6), name="HLAII\nScore", show_column_names = F, width = unit(40, "mm"), height = unit(1.25*dim(a)[1], "mm"))
122
dev.off()
123
124
125
# verify HLA association
126
127
GSE=c("GSE16716,GSE24080")
128
# GSE=c("GSE19784")
129
# GSE=c("GSE15695,GSE21349", "GSE16716,GSE24080", "GSE17306", "GSE19784", "GSE2134")
130
131
annot2=annot[annot$GSE.identifier..experiment.%in%GSE,]
132
133
# gexp data
134
gexp=data[,colnames(data)%in%annot2$GSM.identifier..sample.]
135
136
t.df=t.df[order(t.df[,3]),]
137
138
# data:
139
genelist=t.df[t.df[,3]%in%"Cancer_Myeloma",1]
140
genelist=t.df[,1]
141
142
# rank patients by number of testis antigens expressed
143
# profiles
144
profile[profile==-1] = 0
145
profile2=profile[,colnames(profile)%in%annot2$GSM.identifier..sample.]
146
147
# take only high expressed into account
148
profile2[data.matrix(gexp)<5]=0
149
150
expressed_testis_num=colSums(profile2[rownames(profile2)%in%unique(t.df$gene),])
151
feat_class=expressed_testis_num
152
feat_class[expressed_testis_num==0]="0 CGA"
153
feat_class[expressed_testis_num>=1&expressed_testis_num<=4]="1-4 CGA"
154
feat_class[expressed_testis_num>=5&expressed_testis_num<=6]="5-6 CGA"
155
feat_class[expressed_testis_num>=7]=">7 CGA"
156
157
logicalVectors=lapply(unique(feat_class), function(cl)feat_class%in%cl)
158
names(logicalVectors)=unique(feat_class)
159
160
annof=data.frame("HLAI"=annot2$HLAIScore, "HLAII"=annot2$HLAIIScore)
161
162
genelist=c("HLAI", "HLAII")
163
p.all=lapply(genelist, plot.boxplot, logicalVectors = logicalVectors, data = t(annof), order = T,spread = F)
164
165
TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, logicalVector_normals = logicalVectors, ALTERNATIVE = "less")
166
TestGeneWilcox("HLAII", data = t(annof), logicalVectors = logicalVectors, ALTERNATIVE = "less")
167
168
ggsave(plot = p.all[[2]], filename = "FigureS6G_HLAII_MM_hemap.pdf", width = unit(3.25, "cm"), height = unit(3, "cm"))