a b/Fig6D_CCLE_CGA_methylation.R
1
GIT_HOME="/research/users/ppolonen/git_home/"
2
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R"))
3
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R"))
4
5
library(limma)
6
library(edgeR)
7
library(parallel)
8
library(gridExtra)
9
library(ComplexHeatmap)
10
11
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
12
13
# all CGAss:
14
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T)
15
16
# CCLE gexp data:
17
ccle=get(load("CCLE_RNASEQ_SYMBOL_COUNTS.Rdata"))
18
dge=DGEList(ccle)
19
# keep.exprs <- filterByExpr(dge) # these remove some of the CGA that are rarely expressed
20
# dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
21
dge <- calcNormFactors(dge, method="TMM")
22
ccle=voom(dge, plot=T)$E
23
24
# CCLE RPKM
25
ccle2=get(load("CCLE_RNASEQ_SYMBOL_RPKM.Rdata"))
26
27
# CCLE meth data:
28
ccle.meth=get(load("CCLE_combined_methylation.Rdata"))
29
30
# CCLE annot:
31
annot=read.delim("CCLE_sample_info_file_2012-10-18.txt", header=T, stringsAsFactors = F)
32
33
find=intersect(intersect(colnames(ccle.meth), intersect(annot$CCLE.name, colnames(ccle))), colnames(ccle2))
34
35
ccle.meth=ccle.meth[,match(find, colnames(ccle.meth))]
36
ccle=ccle[,match(find, colnames(ccle))]
37
annot=annot[match(find, annot$CCLE.name),]
38
ccle2=ccle2[,match(find ,colnames(ccle2))]
39
40
# compute HLA-scores:
41
dat_a3=2^ccle[rownames(ccle)%in%c("HLA-DMA",
42
                                  "HLA-DMB",
43
                                  "HLA-DPB1",
44
                                  "HLA-DRA",
45
                                  "HLA-DRB1"),]+1
46
47
gm3=log2(t(apply(dat_a3, 2, gm_mean)))
48
rownames(gm3)="HLAII_SCORE"
49
50
dat2=rbind(ccle, gm3)
51
52
annotv=colnames(d)[colnames(d)%in%colnames(ccle.meth)[ccle.meth["CIITA@chr16_10971271",]>0.2]]
53
54
55
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"plasma_cell_myeloma"]="MM"
56
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"mantle_cell_lymphoma"]="MCL"
57
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"diffuse_large_B_cell_lymphoma"]="DLBCL"
58
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"chronic_lymphocytic_leukaemia-small_lymphocytic_lymphoma"]="CLL"
59
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"blast_phase_chronic_myeloid_leukaemia"]="CML"
60
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"anaplastic_large_cell_lymphoma"]="ALCL"
61
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"adult_T_cell_lymphoma-leukaemia"]="TCL"
62
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_myeloid_leukaemia"]="AML"
63
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_lymphoblastic_T_cell_leukaemia"]="T-ALL"
64
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_lymphoblastic_B_cell_leukaemia"]="pre-B-ALL"
65
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"Hodgkin_lymphoma"]="CHL"
66
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"Burkitt_lymphoma"]="BL"
67
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%c("B_cell_lymphoma_unspecified")]="BCL, unspecified"
68
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%c("peripheral_T_cell_lymphoma_unspecified")]="TCL"
69
70
d=dat2
71
groups=factor(annot$Hist.Subtype_hem[grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)])
72
73
logicalVectors=lapply(unique(annot$Hist.Subtype_hem), function(g){annot$Hist.Subtype_hem%in%g})
74
names(logicalVectors)=unique(annot$Hist.Subtype_hem)
75
logicalVectors=logicalVectors[!is.na(names(logicalVectors))]
76
77
logicalVectors=logicalVectors[-6] # remove unspesified
78
79
a=names(sort(table(t.df[,3]), decreasing = T))
80
81
t.df=do.call(rbind, lapply(a, function(n){
82
  b=t.df[t.df[,3]%in%n,]
83
  b=b[order(b[,2], decreasing = T),]
84
}))
85
86
# data:
87
genelist=unique(t.df[,1])
88
89
ccle_antg=ccle[match(genelist,rownames(ccle)),]
90
antigens_expresssed=colSums(ccle2[match(genelist,rownames(ccle2)),]>0.5, na.rm = T)
91
92
disease=annot$Hist.Subtype_hem
93
94
go.thr=gsub("@.*", "", rownames(ccle.meth))
95
ccle.meth=ccle.meth[go.thr%in%genelist,]
96
go.thr=go.thr[go.thr%in%genelist]
97
  
98
# load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/CCLE_METHYLATION/probelists.Rdata")
99
100
highest_anticor=unlist(lapply(unique(go.thr), function(g){
101
  
102
  if(!g%in%go.thr)return(NULL)
103
  if(!g%in%rownames(ccle))return(NULL)
104
  
105
  m=ccle.meth[go.thr%in%g,grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)]
106
  cc=as.numeric(ccle[rownames(ccle)%in%g,grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)])
107
  
108
  if(all(rowSums(is.na(m))>140))return(NULL)
109
  
110
  # remove probes that are not found in most datasets:
111
  m=m[!rowSums(!is.na(m))<20,]
112
  
113
  if(dim(m)[1]==0)return(NULL)
114
  
115
  res=cor(cc,t(m), use="pairwise.complete.obs", method = "spearman")
116
  
117
  # rm poor correlations:
118
  res=res[res<(-0.2)]
119
  
120
  if(!length(res))return(NULL)
121
  
122
  anticor=rownames(m[which.min(res),])
123
}))
124
125
126
# data frame
127
df_anno=data.frame("Disease"=disease, "Number_Antigens_expressed"=as.integer(antigens_expresssed))
128
129
# sort based on antigens expressed and disease
130
sample_order=order(match(annot$Hist.Subtype_hem, c("T-ALL", "pre-B-ALL", "AML", "CML", "CLL","MM","BL","CHL","DLBCL","MCL","BCL, unspecified","TCL","ALCL")), antigens_expresssed)
131
132
df_anno=df_anno[sample_order,]
133
134
ccle_antg=ccle_antg[,sample_order[!is.na(df_anno$Disease)]]
135
df_anno=df_anno[!is.na(df_anno$Disease),]
136
137
ccle_antg_meth=ccle.meth[,match(colnames(ccle_antg), colnames(ccle.meth))]
138
139
cname=colnames(ccle_antg)
140
colnames(ccle_antg)=gsub("_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE","", colnames(ccle_antg))
141
colnames(ccle_antg_meth)=gsub("_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE","", colnames(ccle_antg_meth))
142
143
ccle_antg_meth=ccle_antg_meth[rownames(ccle_antg_meth)%in%highest_anticor,]
144
ccle_antg_meth=ccle_antg_meth[match(genelist, gsub("@.*", "", rownames(ccle_antg_meth))),]
145
146
ccle_antg_meth2=ccle_antg_meth
147
ccle_antg_meth2[ccle_antg_meth2<=0.2]=0 # this works best, tested
148
ccle_antg_meth2[ccle_antg_meth2>0.2]=1
149
150
ccle_antg_meth2=ccle_antg_meth2[!rowSums(is.na(ccle_antg_meth2))>100,]
151
152
sum_hypometh=colSums(ccle_antg_meth2==0, na.rm = T)
153
154
cor(df_anno$Number_Antigens_expressed, sum_hypometh, use="complete.obs")
155
156
df_anno=data.frame(df_anno, "Number_Antigens_methylated"=sum_hypometh)
157
158
ha = HeatmapAnnotation(df=df_anno[,1,drop=F], Number.Antigens = anno_barplot(df_anno$Number_Antigens_expressed, axis = T, gp = gpar(fill = "indianred")), Number.Me.Antigens = anno_barplot(sum_hypometh, axis = T, gp = gpar(fill = "darkgoldenrod")), height = unit(4, "inch"))
159
160
ccle_antg=t(scale(t(ccle_antg)))
161
ccle_antg[ccle_antg<(-4)]=-4
162
ccle_antg[ccle_antg>(4)]=4
163
164
Heatmap(ccle_antg_meth, top_annotation = ha, name = "TestisAntigens", cluster_columns = F, cluster_rows = F)
165
166
pdf("Fig6D_CCLE_ComplexHeatmap_testis_antigens.pdf", height = 15, width = 30)
167
Heatmap(ccle_antg, top_annotation = ha, name = "TestisAntigens", cluster_columns = F, cluster_rows = F, col=colorRamp2(c(-4,-2, 0,2, 4), c("#063061","#579ec9", "white", "#d45d4a", "#67001e")))
168
dev.off()