a b/Fig4D_TCGA_AML_complexheatmap_CIITA.R
1
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/"
2
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
3
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
4
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R"))
5
6
library(data.table)
7
library(ggplot2)
8
library(reshape2)
9
library(RColorBrewer)
10
library(ggrepel)
11
library(ComplexHeatmap)
12
library(circlize)
13
14
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
15
16
fm=get(load("TCGA_AML_FM_DUFVA.Rdata"))
17
18
gexp=t(scale(t(data.matrix(fm[grepl("CIITA|HLA-D|Score", rownames(fm))&grepl("GEXP|N:SAMP:.*.Score", rownames(fm)),]))))
19
gexp=gexp[!grepl("HLA-DO", rownames(gexp)),]
20
21
parsemeth=read.delim("data_meth_parse.txt", header = F, stringsAsFactors = F)
22
parsemeth=parsemeth[grep("CIITA|HLA-D|CD274|CD40|RAET1E|PDCD1LG2|TCGA-", parsemeth[,3]),]
23
24
meth_annot=cbind(parsemeth[-1,1], parsemeth[-1,3], parsemeth[-1,4], parsemeth[-1,5])
25
meth=data.matrix(parsemeth[2:dim(parsemeth)[1],grep("\\.", parsemeth[4,])])
26
rownames(meth)=paste(meth_annot[,2],meth_annot[,1], sep="_")
27
colnames(meth)=substr(parsemeth[1,grep("\\.", parsemeth[4,])], 1, 15)
28
29
meth=meth[!rowSums(is.na(meth))>100,]
30
gexp=gexp[,colnames(gexp)%in%colnames(meth)]
31
fm=fm[,colnames(fm)%in%colnames(meth)]
32
33
meth=meth[,match(colnames(gexp), colnames(meth))]
34
colnames(meth)=colnames(gexp)
35
36
meth=meth[!grepl("HLA-DO|LG", rownames(meth)),]
37
38
meth=meth[order(rownames(meth)),]
39
40
ciita_gexp=data.matrix(fm[grepl("CIITA", rownames(fm))&grepl("GEXP", rownames(fm)),!is.na(gexp[1,])])
41
HLAII_gexp=data.matrix(fm[grepl("HLAIIScore", rownames(fm))&grepl("N:SAMP", rownames(fm)),!is.na(gexp[1,])])
42
43
dat=data.frame("CIITA"=as.numeric(ciita_gexp),"HLAIIscore"=as.numeric(HLAII_gexp))
44
dat$grp=factor("group")
45
myColors <- c("#E41A1C")
46
47
names(myColors) <- levels(dat$grp)
48
colScale <- scale_colour_manual(name = "grp",values = myColors)
49
50
main="CIITA - HLAII score"
51
52
p=ggplot(dat, aes(CIITA, HLAIIscore, colour = grp)) +
53
  geom_point(size=1) + colScale +
54
  theme_classic(base_size = 12) +
55
  scale_y_continuous(breaks = pretty(dat$HLAIIscore, n = 6))+
56
  theme(legend.position = "bottom", legend.direction = "horizontal",
57
        axis.line = element_line(size=1, colour = "black"),
58
        panel.grid.major = element_line(colour = "#d3d3d3"),
59
        panel.grid.minor = element_blank(),
60
        panel.border = element_blank(), panel.background = element_blank(),
61
        plot.title = element_text(size = 10, face = "bold"),
62
        text=element_text()
63
  )+
64
  theme(axis.text.x = element_text(colour="grey20",size=10,face="plain"),
65
        axis.text.y = element_text(colour="grey20",size=10,face="plain"),  
66
        axis.title.x = element_text(colour="grey20",size=10,face="plain"),
67
        axis.title.y = element_text(colour="grey20",size=10,face="plain"))  
68
69
ggsave(p, filename = "CIITA_HLAII_correlation.pdf", width = 2 , height = 2.3)
70
71
# get CEBPA annotations:
72
mut=read.delim("CEBPA_mutations.txt", header = T, stringsAsFactors = F)
73
cebpa=table(substr(mut$Tumor_Sample_Barcode, 1, 15))
74
cebpa_sil=colnames(gexp)%in%names(cebpa)[cebpa==1]
75
cebpa_dm=colnames(gexp)%in%names(cebpa)[cebpa>1]
76
77
# make complex heatmap:
78
# make logical vectors with fab and genetic subtype
79
is.na.s=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Complex_Cytogenetics|GENETICS):::::",]))
80
81
complex=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Complex_Cytogenetics|GENETICS):::::",!(is.na(is.na.s))]))
82
normal=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Normal_Karyotype|GENETICS):::::",!(is.na(is.na.s))]))
83
mll=as.logical(as.numeric(colSums(data.matrix(fm[rownames(fm)%in%c("B:SAMP:I(MLL_translocation,_poor_risk|GENETICS):::::", "B:SAMP:I(MLL_translocation,_t(9;11)|GENETICS):::::"),!(is.na(is.na.s))]))>0))
84
runx=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(RUNX1-RUNX1T1|GENETICS):::::",!(is.na(is.na.s))]))
85
cbfb=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(CBFB-MYH11|GENETICS):::::",!(is.na(is.na.s))]))
86
pml=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(PML-RARA|GENETICS):::::",!(is.na(is.na.s))]))
87
88
# mut
89
FLT3=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic",!(is.na(is.na.s))]))
90
cebpa_sil=cebpa_sil[!(is.na(is.na.s))]
91
cebpa_dm=cebpa_dm[!(is.na(is.na.s))]
92
RUNX=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:RUNX1:chr21:36160098:37357047:-:y_n_somatic",!(is.na(is.na.s))]))
93
TP53=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:TP53:chr17:7565097:7590863:-:y_n_somatic",!(is.na(is.na.s))]))
94
NPM1=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:NPM1:chr5:170814708:170837888:+:y_n_somatic",!(is.na(is.na.s))]))
95
IDH1=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:IDH1:chr2:209100953:209119806:-:y_n_somatic",!(is.na(is.na.s))]))
96
IDH2=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:IDH2:chr15:90627212:90645708:-:y_n_somatic",!(is.na(is.na.s))]))
97
TET=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:TET1:chr10:70320117:70454239:+:y_n_somatic",!(is.na(is.na.s))]))
98
DNMT3A=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:DNMT3A:chr2:25455845:25565459:-:y_n_somatic",!(is.na(is.na.s))]))
99
FLT3=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic" ,!(is.na(is.na.s))]))
100
del5q7q=as.logical(as.numeric(fm[rownames(fm)%in%"B:CLIN:I(-5_or_del(5q)|FISH_test_component):::::" ,!(is.na(is.na.s))]))|as.logical(as.numeric(fm[rownames(fm)%in%"B:CLIN:I(-7_or_del(7q)|FISH_test_component):::::" ,!(is.na(is.na.s))]))
101
NRAS=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:NRAS:chr1:115247085:115259515:-:y_n_somatic" ,!(is.na(is.na.s))]))
102
KRAS=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:KRAS:chr12:25358180:25403863:-:y_n_somatic" ,!(is.na(is.na.s))]))
103
104
d.l=list(pml,cebpa_sil,cebpa_dm,IDH1,IDH2, TET, DNMT3A, mll,cbfb, runx, del5q7q, NPM1,NRAS,KRAS, FLT3, RUNX, TP53, complex, normal)
105
names(d.l)=c("PML_RARA","CEBPA","CEBPA_DM", "IDH1", "IDH2", "TET3", "DNMT3A", "MLL", "CBFB_MYH11", "RUNX1_RUNX1T1", "-5/7q", "NPM1", "NRAS", "KRAS", "FLT3", "RUNX1","TP53", "Complex_Karyotype", "Normal_Karyotype")
106
clin=do.call(cbind, d.l)*1
107
108
fab=t(fm[rownames(fm)%in%"C:CLIN:leukemia_french_american_british_morphology_code:::::",!(is.na(is.na.s)),drop=F])
109
110
fab=data.frame(fab, stringsAsFactors = F)
111
colnames(fab)=c("FAB")
112
113
# sort everything based on CIITA exp:
114
clin=clin[order(gexp[1,!(is.na(is.na.s))], decreasing = T),]
115
fab=fab[order(gexp[1,!(is.na(is.na.s))], decreasing = T),,drop=F]
116
meth=meth[,order(gexp[1,!(is.na(is.na.s))], decreasing = T)]
117
gexp=gexp[,order(gexp[1,!(is.na(is.na.s))], decreasing = T)]
118
119
ha2 = HeatmapAnnotation(df = data.frame(clin))
120
ha = HeatmapAnnotation(df = data.frame(fab[match(colnames(gexp), rownames(fab)),]))
121
122
gexp[gexp>2]=2
123
gexp[gexp<(-2)]=-2
124
125
rownames(gexp)=do.call(rbind, strsplit(rownames(gexp), ":"))[,3]
126
127
pdf("Figure4D_TCGA_AML_ComplexHeatmap_CIITA_gexp.pdf", height = 15, width = 30)
128
Heatmap(data.matrix(gexp[grep("CIITA|HLA-D", rownames(gexp)),]), bottom_annotation = ha, name = "CIITA", cluster_columns = F, cluster_rows = F, show_column_names = F)
129
dev.off()
130
131
corres=cor(t(meth), gexp[1,], use = "complete.obs")
132
133
res=p.adjust(apply(meth, 1, cor.test.p, y = gexp[1,], method="pearson"), method="bonferroni")
134
135
filt=names(res)[res<0.0001]
136
137
meth2=meth[match(filt, rownames(meth)),]
138
meth_annot2=meth_annot[match(filt, paste(meth_annot[,2],meth_annot[,1], sep="_")),]
139
140
meth2=meth2[order(meth_annot2[,2]),]
141
meth_annot2=meth_annot2[order(meth_annot2[,2]),]
142
143
# take only highest anti-correlated from significant:
144
probes=sapply(unique(meth_annot2[,2]), function(g){
145
  a=rownames(meth2[meth_annot2[,2]%in%g,])
146
  names(which.min(corres[rownames(corres)%in%a,]))
147
}
148
)
149
150
meth3=t(scale(t(meth2[rownames(meth2)%in%probes,])))
151
rownames(meth3)=unique(meth_annot2[,2])
152
153
pdf("Figure4D_TCGA_AML_ComplexHeatmap_CIITA_meth.pdf", height = 15, width = 30)
154
Heatmap(meth3, name = "CIITA", bottom_annotation = ha, cluster_columns = F, cluster_rows = F, col = colorRamp2(c(-0.5, 0, 1), c("brown","grey", "orange")), show_column_names = F)
155
dev.off()
156
157
# order based on mutual exclusivity and HLA groups
158
# fab[fab[,1]%in%"Not_Classified",1]=NA
159
# m=cbind((gexp[17,]<(-1))*1, clin,(fab[,1]%in%c("M3"))*1, (fab[,1]%in%c("M0_Undifferentiated", "M1", "M2"))*1, (fab[,1]%in%c("M4", "M5"))*1)
160
# colnames(m)[1]="HLAIIlow"
161
# colnames(m)[21]="FAB_M3"
162
# colnames(m)[22]="FAB_M0_M1_M2"
163
# colnames(m)[23]="FAB_M4_M5"
164
# 
165
# m=m[do.call(order, -as.data.frame(m)),,drop=F]
166
# sample_ord=rownames(m)
167
# 
168
# # order samples
169
# m=t(m[match(sample_ord, rownames(m)),])
170
# gexp2=gexp[,match(sample_ord, colnames(gexp))]
171
# meth4=meth3[,match(sample_ord, colnames(meth3))]
172
# 
173
# pdf("TCGA_AML_ComplexHeatmap_gexp.pdf", height = 15, width = 30)
174
# Heatmap(data.matrix(gexp2), top_annotation = HeatmapAnnotation(df = data.frame(fab[match(sample_ord, rownames(fab)),])), name = "CIITA", cluster_columns = F, cluster_rows = F, show_column_names = F)
175
# dev.off()
176
# 
177
# pdf("TCGA_AML_ComplexHeatmap_meth.pdf", height = 15, width = 30)
178
# Heatmap(meth4, name = "CIITA", cluster_columns = F, cluster_rows = F, col = colorRamp2(c(-0.5, 0, 1), c("brown","grey", "orange")), show_column_names = F)
179
# dev.off()
180
#
181
# source("/research/users/ppolonen/git_home/common_scripts/visualisation/oncoprint_memo.R")
182
# pdf("TCGA_AML_ComplexHeatmap_genetics_HLA.pdf", height = 15, width = 15)
183
# oncoPrint(m, sort=F)
184
# dev.off()