|
a |
|
b/Fig4D_TCGA_AML_complexheatmap_CIITA.R |
|
|
1 |
GIT_HOME="/research/users/ppolonen/git_home/ImmunogenomicLandscape-BloodCancers/" |
|
|
2 |
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R")) |
|
|
3 |
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) |
|
|
4 |
source(file.path(GIT_HOME, "common_scripts/statistics/functions_statistics.R")) |
|
|
5 |
|
|
|
6 |
library(data.table) |
|
|
7 |
library(ggplot2) |
|
|
8 |
library(reshape2) |
|
|
9 |
library(RColorBrewer) |
|
|
10 |
library(ggrepel) |
|
|
11 |
library(ComplexHeatmap) |
|
|
12 |
library(circlize) |
|
|
13 |
|
|
|
14 |
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures") |
|
|
15 |
|
|
|
16 |
fm=get(load("TCGA_AML_FM_DUFVA.Rdata")) |
|
|
17 |
|
|
|
18 |
gexp=t(scale(t(data.matrix(fm[grepl("CIITA|HLA-D|Score", rownames(fm))&grepl("GEXP|N:SAMP:.*.Score", rownames(fm)),])))) |
|
|
19 |
gexp=gexp[!grepl("HLA-DO", rownames(gexp)),] |
|
|
20 |
|
|
|
21 |
parsemeth=read.delim("data_meth_parse.txt", header = F, stringsAsFactors = F) |
|
|
22 |
parsemeth=parsemeth[grep("CIITA|HLA-D|CD274|CD40|RAET1E|PDCD1LG2|TCGA-", parsemeth[,3]),] |
|
|
23 |
|
|
|
24 |
meth_annot=cbind(parsemeth[-1,1], parsemeth[-1,3], parsemeth[-1,4], parsemeth[-1,5]) |
|
|
25 |
meth=data.matrix(parsemeth[2:dim(parsemeth)[1],grep("\\.", parsemeth[4,])]) |
|
|
26 |
rownames(meth)=paste(meth_annot[,2],meth_annot[,1], sep="_") |
|
|
27 |
colnames(meth)=substr(parsemeth[1,grep("\\.", parsemeth[4,])], 1, 15) |
|
|
28 |
|
|
|
29 |
meth=meth[!rowSums(is.na(meth))>100,] |
|
|
30 |
gexp=gexp[,colnames(gexp)%in%colnames(meth)] |
|
|
31 |
fm=fm[,colnames(fm)%in%colnames(meth)] |
|
|
32 |
|
|
|
33 |
meth=meth[,match(colnames(gexp), colnames(meth))] |
|
|
34 |
colnames(meth)=colnames(gexp) |
|
|
35 |
|
|
|
36 |
meth=meth[!grepl("HLA-DO|LG", rownames(meth)),] |
|
|
37 |
|
|
|
38 |
meth=meth[order(rownames(meth)),] |
|
|
39 |
|
|
|
40 |
ciita_gexp=data.matrix(fm[grepl("CIITA", rownames(fm))&grepl("GEXP", rownames(fm)),!is.na(gexp[1,])]) |
|
|
41 |
HLAII_gexp=data.matrix(fm[grepl("HLAIIScore", rownames(fm))&grepl("N:SAMP", rownames(fm)),!is.na(gexp[1,])]) |
|
|
42 |
|
|
|
43 |
dat=data.frame("CIITA"=as.numeric(ciita_gexp),"HLAIIscore"=as.numeric(HLAII_gexp)) |
|
|
44 |
dat$grp=factor("group") |
|
|
45 |
myColors <- c("#E41A1C") |
|
|
46 |
|
|
|
47 |
names(myColors) <- levels(dat$grp) |
|
|
48 |
colScale <- scale_colour_manual(name = "grp",values = myColors) |
|
|
49 |
|
|
|
50 |
main="CIITA - HLAII score" |
|
|
51 |
|
|
|
52 |
p=ggplot(dat, aes(CIITA, HLAIIscore, colour = grp)) + |
|
|
53 |
geom_point(size=1) + colScale + |
|
|
54 |
theme_classic(base_size = 12) + |
|
|
55 |
scale_y_continuous(breaks = pretty(dat$HLAIIscore, n = 6))+ |
|
|
56 |
theme(legend.position = "bottom", legend.direction = "horizontal", |
|
|
57 |
axis.line = element_line(size=1, colour = "black"), |
|
|
58 |
panel.grid.major = element_line(colour = "#d3d3d3"), |
|
|
59 |
panel.grid.minor = element_blank(), |
|
|
60 |
panel.border = element_blank(), panel.background = element_blank(), |
|
|
61 |
plot.title = element_text(size = 10, face = "bold"), |
|
|
62 |
text=element_text() |
|
|
63 |
)+ |
|
|
64 |
theme(axis.text.x = element_text(colour="grey20",size=10,face="plain"), |
|
|
65 |
axis.text.y = element_text(colour="grey20",size=10,face="plain"), |
|
|
66 |
axis.title.x = element_text(colour="grey20",size=10,face="plain"), |
|
|
67 |
axis.title.y = element_text(colour="grey20",size=10,face="plain")) |
|
|
68 |
|
|
|
69 |
ggsave(p, filename = "CIITA_HLAII_correlation.pdf", width = 2 , height = 2.3) |
|
|
70 |
|
|
|
71 |
# get CEBPA annotations: |
|
|
72 |
mut=read.delim("CEBPA_mutations.txt", header = T, stringsAsFactors = F) |
|
|
73 |
cebpa=table(substr(mut$Tumor_Sample_Barcode, 1, 15)) |
|
|
74 |
cebpa_sil=colnames(gexp)%in%names(cebpa)[cebpa==1] |
|
|
75 |
cebpa_dm=colnames(gexp)%in%names(cebpa)[cebpa>1] |
|
|
76 |
|
|
|
77 |
# make complex heatmap: |
|
|
78 |
# make logical vectors with fab and genetic subtype |
|
|
79 |
is.na.s=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Complex_Cytogenetics|GENETICS):::::",])) |
|
|
80 |
|
|
|
81 |
complex=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Complex_Cytogenetics|GENETICS):::::",!(is.na(is.na.s))])) |
|
|
82 |
normal=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(Normal_Karyotype|GENETICS):::::",!(is.na(is.na.s))])) |
|
|
83 |
mll=as.logical(as.numeric(colSums(data.matrix(fm[rownames(fm)%in%c("B:SAMP:I(MLL_translocation,_poor_risk|GENETICS):::::", "B:SAMP:I(MLL_translocation,_t(9;11)|GENETICS):::::"),!(is.na(is.na.s))]))>0)) |
|
|
84 |
runx=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(RUNX1-RUNX1T1|GENETICS):::::",!(is.na(is.na.s))])) |
|
|
85 |
cbfb=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(CBFB-MYH11|GENETICS):::::",!(is.na(is.na.s))])) |
|
|
86 |
pml=as.logical(as.numeric(fm[rownames(fm)%in%"B:SAMP:I(PML-RARA|GENETICS):::::",!(is.na(is.na.s))])) |
|
|
87 |
|
|
|
88 |
# mut |
|
|
89 |
FLT3=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
90 |
cebpa_sil=cebpa_sil[!(is.na(is.na.s))] |
|
|
91 |
cebpa_dm=cebpa_dm[!(is.na(is.na.s))] |
|
|
92 |
RUNX=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:RUNX1:chr21:36160098:37357047:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
93 |
TP53=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:TP53:chr17:7565097:7590863:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
94 |
NPM1=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:NPM1:chr5:170814708:170837888:+:y_n_somatic",!(is.na(is.na.s))])) |
|
|
95 |
IDH1=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:IDH1:chr2:209100953:209119806:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
96 |
IDH2=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:IDH2:chr15:90627212:90645708:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
97 |
TET=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:TET1:chr10:70320117:70454239:+:y_n_somatic",!(is.na(is.na.s))])) |
|
|
98 |
DNMT3A=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:DNMT3A:chr2:25455845:25565459:-:y_n_somatic",!(is.na(is.na.s))])) |
|
|
99 |
FLT3=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:FLT3:chr13:28577411:28674729:-:y_n_somatic" ,!(is.na(is.na.s))])) |
|
|
100 |
del5q7q=as.logical(as.numeric(fm[rownames(fm)%in%"B:CLIN:I(-5_or_del(5q)|FISH_test_component):::::" ,!(is.na(is.na.s))]))|as.logical(as.numeric(fm[rownames(fm)%in%"B:CLIN:I(-7_or_del(7q)|FISH_test_component):::::" ,!(is.na(is.na.s))])) |
|
|
101 |
NRAS=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:NRAS:chr1:115247085:115259515:-:y_n_somatic" ,!(is.na(is.na.s))])) |
|
|
102 |
KRAS=as.logical(as.numeric(fm[rownames(fm)%in%"B:GNAB:KRAS:chr12:25358180:25403863:-:y_n_somatic" ,!(is.na(is.na.s))])) |
|
|
103 |
|
|
|
104 |
d.l=list(pml,cebpa_sil,cebpa_dm,IDH1,IDH2, TET, DNMT3A, mll,cbfb, runx, del5q7q, NPM1,NRAS,KRAS, FLT3, RUNX, TP53, complex, normal) |
|
|
105 |
names(d.l)=c("PML_RARA","CEBPA","CEBPA_DM", "IDH1", "IDH2", "TET3", "DNMT3A", "MLL", "CBFB_MYH11", "RUNX1_RUNX1T1", "-5/7q", "NPM1", "NRAS", "KRAS", "FLT3", "RUNX1","TP53", "Complex_Karyotype", "Normal_Karyotype") |
|
|
106 |
clin=do.call(cbind, d.l)*1 |
|
|
107 |
|
|
|
108 |
fab=t(fm[rownames(fm)%in%"C:CLIN:leukemia_french_american_british_morphology_code:::::",!(is.na(is.na.s)),drop=F]) |
|
|
109 |
|
|
|
110 |
fab=data.frame(fab, stringsAsFactors = F) |
|
|
111 |
colnames(fab)=c("FAB") |
|
|
112 |
|
|
|
113 |
# sort everything based on CIITA exp: |
|
|
114 |
clin=clin[order(gexp[1,!(is.na(is.na.s))], decreasing = T),] |
|
|
115 |
fab=fab[order(gexp[1,!(is.na(is.na.s))], decreasing = T),,drop=F] |
|
|
116 |
meth=meth[,order(gexp[1,!(is.na(is.na.s))], decreasing = T)] |
|
|
117 |
gexp=gexp[,order(gexp[1,!(is.na(is.na.s))], decreasing = T)] |
|
|
118 |
|
|
|
119 |
ha2 = HeatmapAnnotation(df = data.frame(clin)) |
|
|
120 |
ha = HeatmapAnnotation(df = data.frame(fab[match(colnames(gexp), rownames(fab)),])) |
|
|
121 |
|
|
|
122 |
gexp[gexp>2]=2 |
|
|
123 |
gexp[gexp<(-2)]=-2 |
|
|
124 |
|
|
|
125 |
rownames(gexp)=do.call(rbind, strsplit(rownames(gexp), ":"))[,3] |
|
|
126 |
|
|
|
127 |
pdf("Figure4D_TCGA_AML_ComplexHeatmap_CIITA_gexp.pdf", height = 15, width = 30) |
|
|
128 |
Heatmap(data.matrix(gexp[grep("CIITA|HLA-D", rownames(gexp)),]), bottom_annotation = ha, name = "CIITA", cluster_columns = F, cluster_rows = F, show_column_names = F) |
|
|
129 |
dev.off() |
|
|
130 |
|
|
|
131 |
corres=cor(t(meth), gexp[1,], use = "complete.obs") |
|
|
132 |
|
|
|
133 |
res=p.adjust(apply(meth, 1, cor.test.p, y = gexp[1,], method="pearson"), method="bonferroni") |
|
|
134 |
|
|
|
135 |
filt=names(res)[res<0.0001] |
|
|
136 |
|
|
|
137 |
meth2=meth[match(filt, rownames(meth)),] |
|
|
138 |
meth_annot2=meth_annot[match(filt, paste(meth_annot[,2],meth_annot[,1], sep="_")),] |
|
|
139 |
|
|
|
140 |
meth2=meth2[order(meth_annot2[,2]),] |
|
|
141 |
meth_annot2=meth_annot2[order(meth_annot2[,2]),] |
|
|
142 |
|
|
|
143 |
# take only highest anti-correlated from significant: |
|
|
144 |
probes=sapply(unique(meth_annot2[,2]), function(g){ |
|
|
145 |
a=rownames(meth2[meth_annot2[,2]%in%g,]) |
|
|
146 |
names(which.min(corres[rownames(corres)%in%a,])) |
|
|
147 |
} |
|
|
148 |
) |
|
|
149 |
|
|
|
150 |
meth3=t(scale(t(meth2[rownames(meth2)%in%probes,]))) |
|
|
151 |
rownames(meth3)=unique(meth_annot2[,2]) |
|
|
152 |
|
|
|
153 |
pdf("Figure4D_TCGA_AML_ComplexHeatmap_CIITA_meth.pdf", height = 15, width = 30) |
|
|
154 |
Heatmap(meth3, name = "CIITA", bottom_annotation = ha, cluster_columns = F, cluster_rows = F, col = colorRamp2(c(-0.5, 0, 1), c("brown","grey", "orange")), show_column_names = F) |
|
|
155 |
dev.off() |
|
|
156 |
|
|
|
157 |
# order based on mutual exclusivity and HLA groups |
|
|
158 |
# fab[fab[,1]%in%"Not_Classified",1]=NA |
|
|
159 |
# m=cbind((gexp[17,]<(-1))*1, clin,(fab[,1]%in%c("M3"))*1, (fab[,1]%in%c("M0_Undifferentiated", "M1", "M2"))*1, (fab[,1]%in%c("M4", "M5"))*1) |
|
|
160 |
# colnames(m)[1]="HLAIIlow" |
|
|
161 |
# colnames(m)[21]="FAB_M3" |
|
|
162 |
# colnames(m)[22]="FAB_M0_M1_M2" |
|
|
163 |
# colnames(m)[23]="FAB_M4_M5" |
|
|
164 |
# |
|
|
165 |
# m=m[do.call(order, -as.data.frame(m)),,drop=F] |
|
|
166 |
# sample_ord=rownames(m) |
|
|
167 |
# |
|
|
168 |
# # order samples |
|
|
169 |
# m=t(m[match(sample_ord, rownames(m)),]) |
|
|
170 |
# gexp2=gexp[,match(sample_ord, colnames(gexp))] |
|
|
171 |
# meth4=meth3[,match(sample_ord, colnames(meth3))] |
|
|
172 |
# |
|
|
173 |
# pdf("TCGA_AML_ComplexHeatmap_gexp.pdf", height = 15, width = 30) |
|
|
174 |
# Heatmap(data.matrix(gexp2), top_annotation = HeatmapAnnotation(df = data.frame(fab[match(sample_ord, rownames(fab)),])), name = "CIITA", cluster_columns = F, cluster_rows = F, show_column_names = F) |
|
|
175 |
# dev.off() |
|
|
176 |
# |
|
|
177 |
# pdf("TCGA_AML_ComplexHeatmap_meth.pdf", height = 15, width = 30) |
|
|
178 |
# Heatmap(meth4, name = "CIITA", cluster_columns = F, cluster_rows = F, col = colorRamp2(c(-0.5, 0, 1), c("brown","grey", "orange")), show_column_names = F) |
|
|
179 |
# dev.off() |
|
|
180 |
# |
|
|
181 |
# source("/research/users/ppolonen/git_home/common_scripts/visualisation/oncoprint_memo.R") |
|
|
182 |
# pdf("TCGA_AML_ComplexHeatmap_genetics_HLA.pdf", height = 15, width = 15) |
|
|
183 |
# oncoPrint(m, sort=F) |
|
|
184 |
# dev.off() |