|
a |
|
b/Fig6D_CCLE_CGA_methylation.R |
|
|
1 |
GIT_HOME="/research/users/ppolonen/git_home/" |
|
|
2 |
source(file.path(GIT_HOME, "common_scripts/featurematrix/functions_generate_fm.R")) |
|
|
3 |
source(file.path(GIT_HOME, "common_scripts/visualisation/plotting_functions.R")) |
|
|
4 |
|
|
|
5 |
library(limma) |
|
|
6 |
library(edgeR) |
|
|
7 |
library(parallel) |
|
|
8 |
library(gridExtra) |
|
|
9 |
library(ComplexHeatmap) |
|
|
10 |
|
|
|
11 |
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures") |
|
|
12 |
|
|
|
13 |
# all CGAss: |
|
|
14 |
t.df = read.delim("t.antigen_df.txt", stringsAsFactors=F, header=T) |
|
|
15 |
|
|
|
16 |
# CCLE gexp data: |
|
|
17 |
ccle=get(load("CCLE_RNASEQ_SYMBOL_COUNTS.Rdata")) |
|
|
18 |
dge=DGEList(ccle) |
|
|
19 |
# keep.exprs <- filterByExpr(dge) # these remove some of the CGA that are rarely expressed |
|
|
20 |
# dge <- dge[keep.exprs,, keep.lib.sizes=FALSE] |
|
|
21 |
dge <- calcNormFactors(dge, method="TMM") |
|
|
22 |
ccle=voom(dge, plot=T)$E |
|
|
23 |
|
|
|
24 |
# CCLE RPKM |
|
|
25 |
ccle2=get(load("CCLE_RNASEQ_SYMBOL_RPKM.Rdata")) |
|
|
26 |
|
|
|
27 |
# CCLE meth data: |
|
|
28 |
ccle.meth=get(load("CCLE_combined_methylation.Rdata")) |
|
|
29 |
|
|
|
30 |
# CCLE annot: |
|
|
31 |
annot=read.delim("CCLE_sample_info_file_2012-10-18.txt", header=T, stringsAsFactors = F) |
|
|
32 |
|
|
|
33 |
find=intersect(intersect(colnames(ccle.meth), intersect(annot$CCLE.name, colnames(ccle))), colnames(ccle2)) |
|
|
34 |
|
|
|
35 |
ccle.meth=ccle.meth[,match(find, colnames(ccle.meth))] |
|
|
36 |
ccle=ccle[,match(find, colnames(ccle))] |
|
|
37 |
annot=annot[match(find, annot$CCLE.name),] |
|
|
38 |
ccle2=ccle2[,match(find ,colnames(ccle2))] |
|
|
39 |
|
|
|
40 |
# compute HLA-scores: |
|
|
41 |
dat_a3=2^ccle[rownames(ccle)%in%c("HLA-DMA", |
|
|
42 |
"HLA-DMB", |
|
|
43 |
"HLA-DPB1", |
|
|
44 |
"HLA-DRA", |
|
|
45 |
"HLA-DRB1"),]+1 |
|
|
46 |
|
|
|
47 |
gm3=log2(t(apply(dat_a3, 2, gm_mean))) |
|
|
48 |
rownames(gm3)="HLAII_SCORE" |
|
|
49 |
|
|
|
50 |
dat2=rbind(ccle, gm3) |
|
|
51 |
|
|
|
52 |
annotv=colnames(d)[colnames(d)%in%colnames(ccle.meth)[ccle.meth["CIITA@chr16_10971271",]>0.2]] |
|
|
53 |
|
|
|
54 |
|
|
|
55 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"plasma_cell_myeloma"]="MM" |
|
|
56 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"mantle_cell_lymphoma"]="MCL" |
|
|
57 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"diffuse_large_B_cell_lymphoma"]="DLBCL" |
|
|
58 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"chronic_lymphocytic_leukaemia-small_lymphocytic_lymphoma"]="CLL" |
|
|
59 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"blast_phase_chronic_myeloid_leukaemia"]="CML" |
|
|
60 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"anaplastic_large_cell_lymphoma"]="ALCL" |
|
|
61 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"adult_T_cell_lymphoma-leukaemia"]="TCL" |
|
|
62 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_myeloid_leukaemia"]="AML" |
|
|
63 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_lymphoblastic_T_cell_leukaemia"]="T-ALL" |
|
|
64 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"acute_lymphoblastic_B_cell_leukaemia"]="pre-B-ALL" |
|
|
65 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"Hodgkin_lymphoma"]="CHL" |
|
|
66 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%"Burkitt_lymphoma"]="BL" |
|
|
67 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%c("B_cell_lymphoma_unspecified")]="BCL, unspecified" |
|
|
68 |
annot$Hist.Subtype_hem[annot$Hist.Subtype1%in%c("peripheral_T_cell_lymphoma_unspecified")]="TCL" |
|
|
69 |
|
|
|
70 |
d=dat2 |
|
|
71 |
groups=factor(annot$Hist.Subtype_hem[grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)]) |
|
|
72 |
|
|
|
73 |
logicalVectors=lapply(unique(annot$Hist.Subtype_hem), function(g){annot$Hist.Subtype_hem%in%g}) |
|
|
74 |
names(logicalVectors)=unique(annot$Hist.Subtype_hem) |
|
|
75 |
logicalVectors=logicalVectors[!is.na(names(logicalVectors))] |
|
|
76 |
|
|
|
77 |
logicalVectors=logicalVectors[-6] # remove unspesified |
|
|
78 |
|
|
|
79 |
a=names(sort(table(t.df[,3]), decreasing = T)) |
|
|
80 |
|
|
|
81 |
t.df=do.call(rbind, lapply(a, function(n){ |
|
|
82 |
b=t.df[t.df[,3]%in%n,] |
|
|
83 |
b=b[order(b[,2], decreasing = T),] |
|
|
84 |
})) |
|
|
85 |
|
|
|
86 |
# data: |
|
|
87 |
genelist=unique(t.df[,1]) |
|
|
88 |
|
|
|
89 |
ccle_antg=ccle[match(genelist,rownames(ccle)),] |
|
|
90 |
antigens_expresssed=colSums(ccle2[match(genelist,rownames(ccle2)),]>0.5, na.rm = T) |
|
|
91 |
|
|
|
92 |
disease=annot$Hist.Subtype_hem |
|
|
93 |
|
|
|
94 |
go.thr=gsub("@.*", "", rownames(ccle.meth)) |
|
|
95 |
ccle.meth=ccle.meth[go.thr%in%genelist,] |
|
|
96 |
go.thr=go.thr[go.thr%in%genelist] |
|
|
97 |
|
|
|
98 |
# load("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/data/CCLE_METHYLATION/probelists.Rdata") |
|
|
99 |
|
|
|
100 |
highest_anticor=unlist(lapply(unique(go.thr), function(g){ |
|
|
101 |
|
|
|
102 |
if(!g%in%go.thr)return(NULL) |
|
|
103 |
if(!g%in%rownames(ccle))return(NULL) |
|
|
104 |
|
|
|
105 |
m=ccle.meth[go.thr%in%g,grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)] |
|
|
106 |
cc=as.numeric(ccle[rownames(ccle)%in%g,grep("lymphoma|leukaemia|myeloma", annot$Hist.Subtype1)]) |
|
|
107 |
|
|
|
108 |
if(all(rowSums(is.na(m))>140))return(NULL) |
|
|
109 |
|
|
|
110 |
# remove probes that are not found in most datasets: |
|
|
111 |
m=m[!rowSums(!is.na(m))<20,] |
|
|
112 |
|
|
|
113 |
if(dim(m)[1]==0)return(NULL) |
|
|
114 |
|
|
|
115 |
res=cor(cc,t(m), use="pairwise.complete.obs", method = "spearman") |
|
|
116 |
|
|
|
117 |
# rm poor correlations: |
|
|
118 |
res=res[res<(-0.2)] |
|
|
119 |
|
|
|
120 |
if(!length(res))return(NULL) |
|
|
121 |
|
|
|
122 |
anticor=rownames(m[which.min(res),]) |
|
|
123 |
})) |
|
|
124 |
|
|
|
125 |
|
|
|
126 |
# data frame |
|
|
127 |
df_anno=data.frame("Disease"=disease, "Number_Antigens_expressed"=as.integer(antigens_expresssed)) |
|
|
128 |
|
|
|
129 |
# sort based on antigens expressed and disease |
|
|
130 |
sample_order=order(match(annot$Hist.Subtype_hem, c("T-ALL", "pre-B-ALL", "AML", "CML", "CLL","MM","BL","CHL","DLBCL","MCL","BCL, unspecified","TCL","ALCL")), antigens_expresssed) |
|
|
131 |
|
|
|
132 |
df_anno=df_anno[sample_order,] |
|
|
133 |
|
|
|
134 |
ccle_antg=ccle_antg[,sample_order[!is.na(df_anno$Disease)]] |
|
|
135 |
df_anno=df_anno[!is.na(df_anno$Disease),] |
|
|
136 |
|
|
|
137 |
ccle_antg_meth=ccle.meth[,match(colnames(ccle_antg), colnames(ccle.meth))] |
|
|
138 |
|
|
|
139 |
cname=colnames(ccle_antg) |
|
|
140 |
colnames(ccle_antg)=gsub("_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE","", colnames(ccle_antg)) |
|
|
141 |
colnames(ccle_antg_meth)=gsub("_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE","", colnames(ccle_antg_meth)) |
|
|
142 |
|
|
|
143 |
ccle_antg_meth=ccle_antg_meth[rownames(ccle_antg_meth)%in%highest_anticor,] |
|
|
144 |
ccle_antg_meth=ccle_antg_meth[match(genelist, gsub("@.*", "", rownames(ccle_antg_meth))),] |
|
|
145 |
|
|
|
146 |
ccle_antg_meth2=ccle_antg_meth |
|
|
147 |
ccle_antg_meth2[ccle_antg_meth2<=0.2]=0 # this works best, tested |
|
|
148 |
ccle_antg_meth2[ccle_antg_meth2>0.2]=1 |
|
|
149 |
|
|
|
150 |
ccle_antg_meth2=ccle_antg_meth2[!rowSums(is.na(ccle_antg_meth2))>100,] |
|
|
151 |
|
|
|
152 |
sum_hypometh=colSums(ccle_antg_meth2==0, na.rm = T) |
|
|
153 |
|
|
|
154 |
cor(df_anno$Number_Antigens_expressed, sum_hypometh, use="complete.obs") |
|
|
155 |
|
|
|
156 |
df_anno=data.frame(df_anno, "Number_Antigens_methylated"=sum_hypometh) |
|
|
157 |
|
|
|
158 |
ha = HeatmapAnnotation(df=df_anno[,1,drop=F], Number.Antigens = anno_barplot(df_anno$Number_Antigens_expressed, axis = T, gp = gpar(fill = "indianred")), Number.Me.Antigens = anno_barplot(sum_hypometh, axis = T, gp = gpar(fill = "darkgoldenrod")), height = unit(4, "inch")) |
|
|
159 |
|
|
|
160 |
ccle_antg=t(scale(t(ccle_antg))) |
|
|
161 |
ccle_antg[ccle_antg<(-4)]=-4 |
|
|
162 |
ccle_antg[ccle_antg>(4)]=4 |
|
|
163 |
|
|
|
164 |
Heatmap(ccle_antg_meth, top_annotation = ha, name = "TestisAntigens", cluster_columns = F, cluster_rows = F) |
|
|
165 |
|
|
|
166 |
pdf("Fig6D_CCLE_ComplexHeatmap_testis_antigens.pdf", height = 15, width = 30) |
|
|
167 |
Heatmap(ccle_antg, top_annotation = ha, name = "TestisAntigens", cluster_columns = F, cluster_rows = F, col=colorRamp2(c(-4,-2, 0,2, 4), c("#063061","#579ec9", "white", "#d45d4a", "#67001e"))) |
|
|
168 |
dev.off() |