Diff of /Fig6D_CCLE_CGA_heatmap.R [000000] .. [8e0848]

Switch to unified view

a b/Fig6D_CCLE_CGA_heatmap.R
1
2
# Plot heatmap of cancer-germline antuigen expression and methylation in CCLE hematologic cell line data (Figure 6D)
3
4
library(CePa)
5
library(biomaRt)
6
library(ggplot2)
7
library(cowplot)
8
library(reshape2)
9
library(matrixStats)
10
library(data.table)
11
library(ggrepel)
12
library(dplyr)
13
library(gridExtra)
14
library(ggpubr)
15
library(ComplexHeatmap)
16
library(circlize)
17
library(RColorBrewer)
18
19
# load data
20
meth <- fread("CCLE_RRBS_TSS_1kb_20180614.txt", data.table = F)
21
meth_cpg <- fread("CCLE_RRBS_TSS_CpG_clusters_20180614.txt", data.table = F)
22
rpkm <- read.gct(file = "CCLE_DepMap_18q3_RNAseq_RPKM_20180718.gct") # read in CCLE RNA-seq rpkm table
23
annot <- fread(file = "DepMap-2018q3-celllines.csv", data.table = F)
24
annot2 <- fread(file = "CCLE_sample_info_file_2012-10-18_modified.csv", data.table = F, check.names = T) # read in CCLE annotation file (2012 publication) with lineage data added by Olli
25
26
# clean rpkm column names
27
colnames(rpkm) <- gsub("..ACH.*", "", colnames(rpkm))
28
29
# subset rpkm to methylation data
30
samples_rkpm_meth <- colnames(rpkm)[colnames(rpkm) %in% colnames(meth_cpg)]
31
32
# subset annotations to data
33
annot <- annot[annot$CCLE_Name%in%samples_rkpm_meth,]
34
35
# select hematopoietic cell lines from annot
36
annot_hem <- annot[grepl("HAEMATOPOIETIC", annot$CCLE_Name),]
37
38
39
colnames(meth) <- gsub("_name|TSS_|cluster_", "", colnames(meth_cpg))
40
colnames(meth_cpg) <- gsub("_name|TSS_|cluster_", "", colnames(meth_cpg))
41
42
# rpkm data frame with hematopoietic cell lines
43
rpkm_hem <- rpkm[,as.character(annot_hem$CCLE_Name)]
44
45
# combine TSS and CpG methylation data
46
colnames(meth[,c("id", "gene", as.character(annot_hem$CCLE_Name))])==colnames(meth_cpg[,c("id", "gene", as.character(annot_hem$CCLE_Name))]) # check that column names match before joining
47
meth_hem <- rbind(meth[,c("id", "gene", as.character(annot_hem$CCLE_Name))], meth_cpg[,c("id", "gene", as.character(annot_hem$CCLE_Name))])
48
meth_hem[,!colnames(meth_hem)%in%c("id", "gene")] <- sapply(meth_hem[,!colnames(meth_hem)%in%c("id", "gene")], as.numeric) # make numeric
49
50
51
# get gene symbols
52
ensembl_hs_mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
53
ensembl_df <- getBM(attributes=c("ensembl_gene_id", "ensembl_gene_id_version",
54
                                 "hgnc_symbol", "description", "chromosome_name", "start_position"),
55
                    mart=ensembl_hs_mart)
56
gene_annot <- ensembl_df[match(gsub("\\..*", "", rownames(rpkm)), ensembl_df$ensembl_gene_id),] # match biomart data using Ensembl gene symbols
57
rownames(gene_annot) <- rownames(rpkm)
58
59
# get cga genes:
60
genelist <- as.character(read.table("cga.txt")[,1])
61
62
gene_annot_cga <- gene_annot[gene_annot$hgnc_symbol%in%gsub("C11orf85", "MAJIN", genelist),]
63
64
rpkm_hem_cga <- rpkm_hem
65
rownames(rpkm_hem_cga) <- gsub("\\..*", "", rownames(rpkm))
66
rpkm_hem_cga <- rpkm_hem_cga[gene_annot_cga$ensembl_gene_id,]
67
rownames(rpkm_hem_cga) <- gsub("MAJIN", "C11orf85", gene_annot_cga$hgnc_symbol)
68
rpkm_hem_cga_log2 <- log2(rpkm_hem_cga + 0.25)
69
70
meth_hem_cga <- as.data.frame(meth_hem[meth_hem$gene%in%gene_annot_cga$hgnc_symbol,])
71
rownames(meth_hem_cga) <- gsub("MAJIN", "C11orf85", meth_hem_cga$id)
72
meth_hem_cga$id <- NULL
73
meth_hem_cga$gene <- NULL
74
75
hem_cga <- t(rbind(rpkm_hem_cga_log2, meth_hem_cga))
76
hem_cga <- merge(hem_cga, annot_hem, by.x = "row.names", by.y = "CCLE_Name")
77
hem_cga <- merge(hem_cga, annot2, by.x = "Row.names", by.y = "CCLE.name")
78
hem_cga <- hem_cga[grepl("lymphoma|leukaemia|myeloma", hem_cga$Hist.Subtype1),]
79
colnames(hem_cga)[1] <- "CCLE_Name"
80
81
# add shortened annotations
82
grep("lymphoma|leukaemia|myeloma", hem_cga$Hist.Subtype1, value=T)
83
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"plasma_cell_myeloma"]="MM"
84
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"mantle_cell_lymphoma"]="MCL"
85
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"diffuse_large_B_cell_lymphoma"]="DLBCL"
86
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"chronic_lymphocytic_leukaemia-small_lymphocytic_lymphoma"]="CLL"
87
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"blast_phase_chronic_myeloid_leukaemia"]="CML"
88
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"anaplastic_large_cell_lymphoma"]="ALCL"
89
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"adult_T_cell_lymphoma-leukaemia"]="TCL other"
90
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_myeloid_leukaemia"]="AML"
91
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_lymphoblastic_T_cell_leukaemia"]="T-ALL"
92
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_lymphoblastic_B_cell_leukaemia"]="pre-B-ALL"
93
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"Hodgkin_lymphoma"]="CHL"
94
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"Burkitt_lymphoma"]="BL"
95
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%c("B_cell_lymphoma_unspecified")]="BCL other"
96
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%c("peripheral_T_cell_lymphoma_unspecified")]="TCL other"
97
98
# add lineage annotations
99
hem_cga$Lineage <- ifelse(grepl("ALCL|T-ALL|TCL", hem_cga$Hist.Subtype_hem), "T/NK cell", 
100
                          ifelse(grepl("AML|CML", hem_cga$Hist.Subtype_hem), "Myeloid", "B cell"))
101
102
103
# add methylation means to data frame for plotting
104
meth_mean <- function(gene){
105
  df <- data.frame(gene = rowMeans(data.frame(hem_cga[,colnames(hem_cga)[grepl(paste0(gene, "_"), colnames(hem_cga))]]), na.rm = T))
106
  return(df)
107
}
108
109
cga_meth_mean <- do.call(cbind, lapply(gsub("MAJIN", "C11orf85", genelist), meth_mean))
110
colnames(cga_meth_mean) <- paste0(gsub("MAJIN", "C11orf85", genelist), "_meth_mean")
111
112
hem_cga_meth_mean <- cbind(hem_cga, cga_meth_mean)
113
hem_cga_meth_mean <- hem_cga_meth_mean[order(hem_cga_meth_mean$Hist.Subtype_hem),]
114
115
# order cancers for plotting
116
hem_cga_meth_mean$Hist.Subtype_hem <- factor(hem_cga_meth_mean$Hist.Subtype_hem, levels = c("T-ALL", "pre-B-ALL", "AML", "CML", "CLL", "MM", "BL", "CHL", "DLBCL", "MCL", "BCL other", "ALCL", "TCL other"))
117
118
# data prepared
119
120
## --------------------------------------------------------------------------------------
121
122
123
## Test correlations for all methylation values averaged per gene
124
125
# function to correlate gexp to averaged same gene methylation features
126
corr_mean <- function(gene) {
127
  cor.result <- cor.test(hem_cga[,gene],
128
                         as.numeric( # numeric vector for cor.test
129
                           rowMeans( # average methylation values
130
                             data.frame( # data frame for rowMeans
131
                               hem_cga[,colnames(hem_cga)[grepl(paste0(gene, "_"), colnames(hem_cga))]]), na.rm = T)), method = "spearman") # grep columns with underscore to get methylation values only
132
  p <- cor.result$p.value
133
  r <- cor.result$estimate
134
  return(data.frame(gexp = gene, meth = gene, r = r, p = p))
135
}
136
137
# select genes that have methylation data
138
genes = genelist[genelist%in%gsub("_.*", "", colnames(hem_cga)[grepl("_", colnames(hem_cga))])]
139
140
# apply function on gene list
141
result_mean <- do.call(rbind, lapply(genes, corr_mean))
142
result_mean$q <- p.adjust(result_mean$p, method = "fdr")
143
144
# check results sorted by siginificance
145
result_mean %>%
146
  arrange(q)
147
148
# write sorted mean methylation correlations
149
result_mean_sorted <- result_mean %>%
150
  arrange(r)
151
152
# add significance codes to mean gexp to meth correlation for each gene
153
mean_corr <- result_mean %>%
154
  mutate(signifCode = ifelse(q<0.0001, "****",
155
                             ifelse(q<0.001, "***", 
156
                                    ifelse(q<0.01, "**",
157
                                           ifelse(q<0.05, "*", "")))))
158
159
160
## -------------------------------------------------------------------------------
161
162
# Heatmap 
163
164
mat <- data.matrix(rpkm_hem_cga_log2[,hem_cga_meth_mean$CCLE_Name])
165
mat <- t(apply(mat, 1, scale))
166
colnames(mat) <- gsub("_.*", "", colnames(rpkm_hem_cga_log2[,hem_cga_meth_mean$CCLE_Name]))
167
168
mat <- mat[gene_annot_cga$hgnc_symbol,]
169
170
mat 
171
172
# colors
173
cols <- read.table("colors_hemap_immunology.tsv", header = TRUE, sep = "\t", comment.char = " ")
174
cols <- cols %>% mutate(combinedcolor = ifelse(subtypecolor!="", as.character(subtypecolor), as.character(color)))
175
cols$sample <- gsub("^BCL", "BCL other", gsub("TCL", "TCL other", cols$sample))
176
177
# data frames for heatmap annotations
178
exprs_num <- colSums(rpkm_hem_cga[,hem_cga_meth_mean$CCLE_Name]>0.5)
179
180
meth_mat <- hem_cga_meth_mean[,grep("mean", colnames(hem_cga_meth_mean))]
181
colnames(meth_mat) <- gsub("_meth_mean", "", colnames(meth_mat))
182
meth_mat <- t(meth_mat)
183
colnames(meth_mat) <- gsub("_.*", "", hem_cga_meth_mean$CCLE_Name)
184
meth_mat <- meth_mat[,match(colnames(mat), colnames(meth_mat))]
185
meth_mat[meth_mat>0.5] = 1
186
meth_mat[meth_mat<0.5] = 0
187
meth_num <- colSums(meth_mat==0, na.rm = T)
188
189
disease_order <- data.frame(disease = unique(hem_cga_meth_mean$Hist.Subtype_hem), rank = c(13, 3, 11, 7, 8, 5, 4, 9, 10, 6, 2, 1, 12))
190
disease_order$disease <- as.character(disease_order$disease)
191
hem_cga_meth_mean <- merge(hem_cga_meth_mean, disease_order, by.x = "Hist.Subtype_hem", by.y = "disease")
192
193
sample_order <- order(hem_cga_meth_mean$rank, exprs_num)
194
exprs_num <- exprs_num[sample_order]
195
meth_num <- meth_num[sample_order]
196
annot_df <- data.frame(subtype = hem_cga_meth_mean$Hist.Subtype_hem[sample_order])
197
198
# heatmap annotations
199
200
ha1 = HeatmapAnnotation(exprs = anno_barplot(exprs_num, 
201
                                             bar_width = 0.75, 
202
                                             border = FALSE, 
203
                                             axis = TRUE,
204
                                             axis_param = list(gp = gpar(fontsize = 5, lwd = 0.5)),
205
                                             gp = gpar(col = NA, fill = "grey50")),
206
                        meth = anno_barplot(meth_num, 
207
                                            bar_width = 0.75, 
208
                                            border = FALSE, 
209
                                            axis = TRUE,
210
                                            axis_param = list(gp = gpar(fontsize = 5, lwd = 0.5)),
211
                                            gp = gpar(col = NA, fill = "#a52a2a")),
212
                        df = annot_df, 
213
                        col = list(subtype = structure(as.character(cols$combinedcolor[cols$sample %in% hem_cga_meth_mean$Hist.Subtype_hem]), 
214
                                                       names = as.character(cols$sample[cols$sample %in% hem_cga_meth_mean$Hist.Subtype_hem]))[as.character(disease_order$disease[order(disease_order$rank)])]
215
                        ),
216
                        annotation_legend_param = list(subtype = list(title = "Cancer type", title_gp = gpar(fontsize = 7, fontface = "bold"), 
217
                                                                      labels_gp = gpar(fontsize = 7), grid_height = unit(3, "mm"), grid_width = unit(3, "mm"))
218
                        ),
219
                        show_annotation_name = F,
220
                        height = unit(2, "cm"))
221
222
meth_corr_df <- data.frame(corr = mean_corr$r[match(rownames(mat), mean_corr$gexp)])
223
colnames(meth_corr_df) <- "Correlation to methylation"
224
225
mat <- mat[order(meth_corr_df$`Correlation to methylation`),sample_order]
226
227
meth_corr_df <- data.frame(corr = mean_corr$r[match(rownames(mat), mean_corr$gexp)])
228
colnames(meth_corr_df) <- "meth_corr"
229
230
ha2 = HeatmapAnnotation(df = meth_corr_df,
231
                        which = "row",
232
                        col = list(meth_corr = colorRamp2(c(-1, -0.5, -0.375, -0.25, -0.125, 0, 0.125, 0.25, 0.375, 0.5, 1), rev(brewer.pal(11, "PuOr")))),
233
                        annotation_legend_param = list(meth_corr = list(title = "Correlation to methylation", title_gp = gpar(fontsize = 7, fontface = "bold"), 
234
                                                                        labels_gp = gpar(fontsize = 7)), width = unit(0.1, "cm"), grid_height = unit(2, "mm"), grid_width = unit(2, "mm"), legend_direction = "horizontal", legend_width = unit(2, "cm"), title_position = "topcenter"))
235
236
237
# print heatmap
238
pdf(file = "Figure6D_CCLE_CGA_heatmap.pdf", height = 4, width = 5)
239
draw(
240
       ha2 +
241
       Heatmap(mat,
242
               name = "Expression Z-score", 
243
               col = colorRamp2(seq(-3, 3, length.out = 11), rev(brewer.pal(11, "RdBu"))),
244
               row_names_side = "right", 
245
               row_names_gp = gpar(fontsize = 6),
246
               show_column_names = FALSE,
247
               cluster_rows = FALSE,
248
               cluster_columns = FALSE,
249
               top_annotation = ha1, 
250
               heatmap_legend_param = list(title_gp = gpar(fontsize = 7, fontface = "bold"),
251
                                           labels_gp = gpar(fontsize = 7),
252
                                           legend_direction = "horizontal",
253
                                           legend_width = unit(2, "cm"), title_position = "topcenter",
254
                                           grid_height = unit(2, "mm"),
255
                                           grid_width = unit(2, "mm")
256
               )
257
       ),
258
     auto_adjust = F,
259
     padding = unit(c(2, 15, 2, 2), "mm"), heatmap_legend_side = "bottom")
260
decorate_annotation("exprs", {
261
  grid.text("# CGAs expressed", unit(0, "npc") + unit(1, "mm"), 0.5, 
262
            default.units = "npc", just = c("left", "bottom"), gp = gpar(fontsize = 7))
263
})
264
decorate_annotation("meth", {
265
  grid.text("# CGAs hypomethylated", unit(0, "npc") + unit(1, "mm"), 0.5, 
266
            default.units = "npc", just = c("left", "bottom"), gp = gpar(fontsize = 7))
267
})
268
dev.off()
269