|
a |
|
b/Fig6D_CCLE_CGA_heatmap.R |
|
|
1 |
|
|
|
2 |
# Plot heatmap of cancer-germline antuigen expression and methylation in CCLE hematologic cell line data (Figure 6D) |
|
|
3 |
|
|
|
4 |
library(CePa) |
|
|
5 |
library(biomaRt) |
|
|
6 |
library(ggplot2) |
|
|
7 |
library(cowplot) |
|
|
8 |
library(reshape2) |
|
|
9 |
library(matrixStats) |
|
|
10 |
library(data.table) |
|
|
11 |
library(ggrepel) |
|
|
12 |
library(dplyr) |
|
|
13 |
library(gridExtra) |
|
|
14 |
library(ggpubr) |
|
|
15 |
library(ComplexHeatmap) |
|
|
16 |
library(circlize) |
|
|
17 |
library(RColorBrewer) |
|
|
18 |
|
|
|
19 |
# load data |
|
|
20 |
meth <- fread("CCLE_RRBS_TSS_1kb_20180614.txt", data.table = F) |
|
|
21 |
meth_cpg <- fread("CCLE_RRBS_TSS_CpG_clusters_20180614.txt", data.table = F) |
|
|
22 |
rpkm <- read.gct(file = "CCLE_DepMap_18q3_RNAseq_RPKM_20180718.gct") # read in CCLE RNA-seq rpkm table |
|
|
23 |
annot <- fread(file = "DepMap-2018q3-celllines.csv", data.table = F) |
|
|
24 |
annot2 <- fread(file = "CCLE_sample_info_file_2012-10-18_modified.csv", data.table = F, check.names = T) # read in CCLE annotation file (2012 publication) with lineage data added by Olli |
|
|
25 |
|
|
|
26 |
# clean rpkm column names |
|
|
27 |
colnames(rpkm) <- gsub("..ACH.*", "", colnames(rpkm)) |
|
|
28 |
|
|
|
29 |
# subset rpkm to methylation data |
|
|
30 |
samples_rkpm_meth <- colnames(rpkm)[colnames(rpkm) %in% colnames(meth_cpg)] |
|
|
31 |
|
|
|
32 |
# subset annotations to data |
|
|
33 |
annot <- annot[annot$CCLE_Name%in%samples_rkpm_meth,] |
|
|
34 |
|
|
|
35 |
# select hematopoietic cell lines from annot |
|
|
36 |
annot_hem <- annot[grepl("HAEMATOPOIETIC", annot$CCLE_Name),] |
|
|
37 |
|
|
|
38 |
|
|
|
39 |
colnames(meth) <- gsub("_name|TSS_|cluster_", "", colnames(meth_cpg)) |
|
|
40 |
colnames(meth_cpg) <- gsub("_name|TSS_|cluster_", "", colnames(meth_cpg)) |
|
|
41 |
|
|
|
42 |
# rpkm data frame with hematopoietic cell lines |
|
|
43 |
rpkm_hem <- rpkm[,as.character(annot_hem$CCLE_Name)] |
|
|
44 |
|
|
|
45 |
# combine TSS and CpG methylation data |
|
|
46 |
colnames(meth[,c("id", "gene", as.character(annot_hem$CCLE_Name))])==colnames(meth_cpg[,c("id", "gene", as.character(annot_hem$CCLE_Name))]) # check that column names match before joining |
|
|
47 |
meth_hem <- rbind(meth[,c("id", "gene", as.character(annot_hem$CCLE_Name))], meth_cpg[,c("id", "gene", as.character(annot_hem$CCLE_Name))]) |
|
|
48 |
meth_hem[,!colnames(meth_hem)%in%c("id", "gene")] <- sapply(meth_hem[,!colnames(meth_hem)%in%c("id", "gene")], as.numeric) # make numeric |
|
|
49 |
|
|
|
50 |
|
|
|
51 |
# get gene symbols |
|
|
52 |
ensembl_hs_mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl") |
|
|
53 |
ensembl_df <- getBM(attributes=c("ensembl_gene_id", "ensembl_gene_id_version", |
|
|
54 |
"hgnc_symbol", "description", "chromosome_name", "start_position"), |
|
|
55 |
mart=ensembl_hs_mart) |
|
|
56 |
gene_annot <- ensembl_df[match(gsub("\\..*", "", rownames(rpkm)), ensembl_df$ensembl_gene_id),] # match biomart data using Ensembl gene symbols |
|
|
57 |
rownames(gene_annot) <- rownames(rpkm) |
|
|
58 |
|
|
|
59 |
# get cga genes: |
|
|
60 |
genelist <- as.character(read.table("cga.txt")[,1]) |
|
|
61 |
|
|
|
62 |
gene_annot_cga <- gene_annot[gene_annot$hgnc_symbol%in%gsub("C11orf85", "MAJIN", genelist),] |
|
|
63 |
|
|
|
64 |
rpkm_hem_cga <- rpkm_hem |
|
|
65 |
rownames(rpkm_hem_cga) <- gsub("\\..*", "", rownames(rpkm)) |
|
|
66 |
rpkm_hem_cga <- rpkm_hem_cga[gene_annot_cga$ensembl_gene_id,] |
|
|
67 |
rownames(rpkm_hem_cga) <- gsub("MAJIN", "C11orf85", gene_annot_cga$hgnc_symbol) |
|
|
68 |
rpkm_hem_cga_log2 <- log2(rpkm_hem_cga + 0.25) |
|
|
69 |
|
|
|
70 |
meth_hem_cga <- as.data.frame(meth_hem[meth_hem$gene%in%gene_annot_cga$hgnc_symbol,]) |
|
|
71 |
rownames(meth_hem_cga) <- gsub("MAJIN", "C11orf85", meth_hem_cga$id) |
|
|
72 |
meth_hem_cga$id <- NULL |
|
|
73 |
meth_hem_cga$gene <- NULL |
|
|
74 |
|
|
|
75 |
hem_cga <- t(rbind(rpkm_hem_cga_log2, meth_hem_cga)) |
|
|
76 |
hem_cga <- merge(hem_cga, annot_hem, by.x = "row.names", by.y = "CCLE_Name") |
|
|
77 |
hem_cga <- merge(hem_cga, annot2, by.x = "Row.names", by.y = "CCLE.name") |
|
|
78 |
hem_cga <- hem_cga[grepl("lymphoma|leukaemia|myeloma", hem_cga$Hist.Subtype1),] |
|
|
79 |
colnames(hem_cga)[1] <- "CCLE_Name" |
|
|
80 |
|
|
|
81 |
# add shortened annotations |
|
|
82 |
grep("lymphoma|leukaemia|myeloma", hem_cga$Hist.Subtype1, value=T) |
|
|
83 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"plasma_cell_myeloma"]="MM" |
|
|
84 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"mantle_cell_lymphoma"]="MCL" |
|
|
85 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"diffuse_large_B_cell_lymphoma"]="DLBCL" |
|
|
86 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"chronic_lymphocytic_leukaemia-small_lymphocytic_lymphoma"]="CLL" |
|
|
87 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"blast_phase_chronic_myeloid_leukaemia"]="CML" |
|
|
88 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"anaplastic_large_cell_lymphoma"]="ALCL" |
|
|
89 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"adult_T_cell_lymphoma-leukaemia"]="TCL other" |
|
|
90 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_myeloid_leukaemia"]="AML" |
|
|
91 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_lymphoblastic_T_cell_leukaemia"]="T-ALL" |
|
|
92 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"acute_lymphoblastic_B_cell_leukaemia"]="pre-B-ALL" |
|
|
93 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"Hodgkin_lymphoma"]="CHL" |
|
|
94 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%"Burkitt_lymphoma"]="BL" |
|
|
95 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%c("B_cell_lymphoma_unspecified")]="BCL other" |
|
|
96 |
hem_cga$Hist.Subtype_hem[hem_cga$Hist.Subtype1%in%c("peripheral_T_cell_lymphoma_unspecified")]="TCL other" |
|
|
97 |
|
|
|
98 |
# add lineage annotations |
|
|
99 |
hem_cga$Lineage <- ifelse(grepl("ALCL|T-ALL|TCL", hem_cga$Hist.Subtype_hem), "T/NK cell", |
|
|
100 |
ifelse(grepl("AML|CML", hem_cga$Hist.Subtype_hem), "Myeloid", "B cell")) |
|
|
101 |
|
|
|
102 |
|
|
|
103 |
# add methylation means to data frame for plotting |
|
|
104 |
meth_mean <- function(gene){ |
|
|
105 |
df <- data.frame(gene = rowMeans(data.frame(hem_cga[,colnames(hem_cga)[grepl(paste0(gene, "_"), colnames(hem_cga))]]), na.rm = T)) |
|
|
106 |
return(df) |
|
|
107 |
} |
|
|
108 |
|
|
|
109 |
cga_meth_mean <- do.call(cbind, lapply(gsub("MAJIN", "C11orf85", genelist), meth_mean)) |
|
|
110 |
colnames(cga_meth_mean) <- paste0(gsub("MAJIN", "C11orf85", genelist), "_meth_mean") |
|
|
111 |
|
|
|
112 |
hem_cga_meth_mean <- cbind(hem_cga, cga_meth_mean) |
|
|
113 |
hem_cga_meth_mean <- hem_cga_meth_mean[order(hem_cga_meth_mean$Hist.Subtype_hem),] |
|
|
114 |
|
|
|
115 |
# order cancers for plotting |
|
|
116 |
hem_cga_meth_mean$Hist.Subtype_hem <- factor(hem_cga_meth_mean$Hist.Subtype_hem, levels = c("T-ALL", "pre-B-ALL", "AML", "CML", "CLL", "MM", "BL", "CHL", "DLBCL", "MCL", "BCL other", "ALCL", "TCL other")) |
|
|
117 |
|
|
|
118 |
# data prepared |
|
|
119 |
|
|
|
120 |
## -------------------------------------------------------------------------------------- |
|
|
121 |
|
|
|
122 |
|
|
|
123 |
## Test correlations for all methylation values averaged per gene |
|
|
124 |
|
|
|
125 |
# function to correlate gexp to averaged same gene methylation features |
|
|
126 |
corr_mean <- function(gene) { |
|
|
127 |
cor.result <- cor.test(hem_cga[,gene], |
|
|
128 |
as.numeric( # numeric vector for cor.test |
|
|
129 |
rowMeans( # average methylation values |
|
|
130 |
data.frame( # data frame for rowMeans |
|
|
131 |
hem_cga[,colnames(hem_cga)[grepl(paste0(gene, "_"), colnames(hem_cga))]]), na.rm = T)), method = "spearman") # grep columns with underscore to get methylation values only |
|
|
132 |
p <- cor.result$p.value |
|
|
133 |
r <- cor.result$estimate |
|
|
134 |
return(data.frame(gexp = gene, meth = gene, r = r, p = p)) |
|
|
135 |
} |
|
|
136 |
|
|
|
137 |
# select genes that have methylation data |
|
|
138 |
genes = genelist[genelist%in%gsub("_.*", "", colnames(hem_cga)[grepl("_", colnames(hem_cga))])] |
|
|
139 |
|
|
|
140 |
# apply function on gene list |
|
|
141 |
result_mean <- do.call(rbind, lapply(genes, corr_mean)) |
|
|
142 |
result_mean$q <- p.adjust(result_mean$p, method = "fdr") |
|
|
143 |
|
|
|
144 |
# check results sorted by siginificance |
|
|
145 |
result_mean %>% |
|
|
146 |
arrange(q) |
|
|
147 |
|
|
|
148 |
# write sorted mean methylation correlations |
|
|
149 |
result_mean_sorted <- result_mean %>% |
|
|
150 |
arrange(r) |
|
|
151 |
|
|
|
152 |
# add significance codes to mean gexp to meth correlation for each gene |
|
|
153 |
mean_corr <- result_mean %>% |
|
|
154 |
mutate(signifCode = ifelse(q<0.0001, "****", |
|
|
155 |
ifelse(q<0.001, "***", |
|
|
156 |
ifelse(q<0.01, "**", |
|
|
157 |
ifelse(q<0.05, "*", ""))))) |
|
|
158 |
|
|
|
159 |
|
|
|
160 |
## ------------------------------------------------------------------------------- |
|
|
161 |
|
|
|
162 |
# Heatmap |
|
|
163 |
|
|
|
164 |
mat <- data.matrix(rpkm_hem_cga_log2[,hem_cga_meth_mean$CCLE_Name]) |
|
|
165 |
mat <- t(apply(mat, 1, scale)) |
|
|
166 |
colnames(mat) <- gsub("_.*", "", colnames(rpkm_hem_cga_log2[,hem_cga_meth_mean$CCLE_Name])) |
|
|
167 |
|
|
|
168 |
mat <- mat[gene_annot_cga$hgnc_symbol,] |
|
|
169 |
|
|
|
170 |
mat |
|
|
171 |
|
|
|
172 |
# colors |
|
|
173 |
cols <- read.table("colors_hemap_immunology.tsv", header = TRUE, sep = "\t", comment.char = " ") |
|
|
174 |
cols <- cols %>% mutate(combinedcolor = ifelse(subtypecolor!="", as.character(subtypecolor), as.character(color))) |
|
|
175 |
cols$sample <- gsub("^BCL", "BCL other", gsub("TCL", "TCL other", cols$sample)) |
|
|
176 |
|
|
|
177 |
# data frames for heatmap annotations |
|
|
178 |
exprs_num <- colSums(rpkm_hem_cga[,hem_cga_meth_mean$CCLE_Name]>0.5) |
|
|
179 |
|
|
|
180 |
meth_mat <- hem_cga_meth_mean[,grep("mean", colnames(hem_cga_meth_mean))] |
|
|
181 |
colnames(meth_mat) <- gsub("_meth_mean", "", colnames(meth_mat)) |
|
|
182 |
meth_mat <- t(meth_mat) |
|
|
183 |
colnames(meth_mat) <- gsub("_.*", "", hem_cga_meth_mean$CCLE_Name) |
|
|
184 |
meth_mat <- meth_mat[,match(colnames(mat), colnames(meth_mat))] |
|
|
185 |
meth_mat[meth_mat>0.5] = 1 |
|
|
186 |
meth_mat[meth_mat<0.5] = 0 |
|
|
187 |
meth_num <- colSums(meth_mat==0, na.rm = T) |
|
|
188 |
|
|
|
189 |
disease_order <- data.frame(disease = unique(hem_cga_meth_mean$Hist.Subtype_hem), rank = c(13, 3, 11, 7, 8, 5, 4, 9, 10, 6, 2, 1, 12)) |
|
|
190 |
disease_order$disease <- as.character(disease_order$disease) |
|
|
191 |
hem_cga_meth_mean <- merge(hem_cga_meth_mean, disease_order, by.x = "Hist.Subtype_hem", by.y = "disease") |
|
|
192 |
|
|
|
193 |
sample_order <- order(hem_cga_meth_mean$rank, exprs_num) |
|
|
194 |
exprs_num <- exprs_num[sample_order] |
|
|
195 |
meth_num <- meth_num[sample_order] |
|
|
196 |
annot_df <- data.frame(subtype = hem_cga_meth_mean$Hist.Subtype_hem[sample_order]) |
|
|
197 |
|
|
|
198 |
# heatmap annotations |
|
|
199 |
|
|
|
200 |
ha1 = HeatmapAnnotation(exprs = anno_barplot(exprs_num, |
|
|
201 |
bar_width = 0.75, |
|
|
202 |
border = FALSE, |
|
|
203 |
axis = TRUE, |
|
|
204 |
axis_param = list(gp = gpar(fontsize = 5, lwd = 0.5)), |
|
|
205 |
gp = gpar(col = NA, fill = "grey50")), |
|
|
206 |
meth = anno_barplot(meth_num, |
|
|
207 |
bar_width = 0.75, |
|
|
208 |
border = FALSE, |
|
|
209 |
axis = TRUE, |
|
|
210 |
axis_param = list(gp = gpar(fontsize = 5, lwd = 0.5)), |
|
|
211 |
gp = gpar(col = NA, fill = "#a52a2a")), |
|
|
212 |
df = annot_df, |
|
|
213 |
col = list(subtype = structure(as.character(cols$combinedcolor[cols$sample %in% hem_cga_meth_mean$Hist.Subtype_hem]), |
|
|
214 |
names = as.character(cols$sample[cols$sample %in% hem_cga_meth_mean$Hist.Subtype_hem]))[as.character(disease_order$disease[order(disease_order$rank)])] |
|
|
215 |
), |
|
|
216 |
annotation_legend_param = list(subtype = list(title = "Cancer type", title_gp = gpar(fontsize = 7, fontface = "bold"), |
|
|
217 |
labels_gp = gpar(fontsize = 7), grid_height = unit(3, "mm"), grid_width = unit(3, "mm")) |
|
|
218 |
), |
|
|
219 |
show_annotation_name = F, |
|
|
220 |
height = unit(2, "cm")) |
|
|
221 |
|
|
|
222 |
meth_corr_df <- data.frame(corr = mean_corr$r[match(rownames(mat), mean_corr$gexp)]) |
|
|
223 |
colnames(meth_corr_df) <- "Correlation to methylation" |
|
|
224 |
|
|
|
225 |
mat <- mat[order(meth_corr_df$`Correlation to methylation`),sample_order] |
|
|
226 |
|
|
|
227 |
meth_corr_df <- data.frame(corr = mean_corr$r[match(rownames(mat), mean_corr$gexp)]) |
|
|
228 |
colnames(meth_corr_df) <- "meth_corr" |
|
|
229 |
|
|
|
230 |
ha2 = HeatmapAnnotation(df = meth_corr_df, |
|
|
231 |
which = "row", |
|
|
232 |
col = list(meth_corr = colorRamp2(c(-1, -0.5, -0.375, -0.25, -0.125, 0, 0.125, 0.25, 0.375, 0.5, 1), rev(brewer.pal(11, "PuOr")))), |
|
|
233 |
annotation_legend_param = list(meth_corr = list(title = "Correlation to methylation", title_gp = gpar(fontsize = 7, fontface = "bold"), |
|
|
234 |
labels_gp = gpar(fontsize = 7)), width = unit(0.1, "cm"), grid_height = unit(2, "mm"), grid_width = unit(2, "mm"), legend_direction = "horizontal", legend_width = unit(2, "cm"), title_position = "topcenter")) |
|
|
235 |
|
|
|
236 |
|
|
|
237 |
# print heatmap |
|
|
238 |
pdf(file = "Figure6D_CCLE_CGA_heatmap.pdf", height = 4, width = 5) |
|
|
239 |
draw( |
|
|
240 |
ha2 + |
|
|
241 |
Heatmap(mat, |
|
|
242 |
name = "Expression Z-score", |
|
|
243 |
col = colorRamp2(seq(-3, 3, length.out = 11), rev(brewer.pal(11, "RdBu"))), |
|
|
244 |
row_names_side = "right", |
|
|
245 |
row_names_gp = gpar(fontsize = 6), |
|
|
246 |
show_column_names = FALSE, |
|
|
247 |
cluster_rows = FALSE, |
|
|
248 |
cluster_columns = FALSE, |
|
|
249 |
top_annotation = ha1, |
|
|
250 |
heatmap_legend_param = list(title_gp = gpar(fontsize = 7, fontface = "bold"), |
|
|
251 |
labels_gp = gpar(fontsize = 7), |
|
|
252 |
legend_direction = "horizontal", |
|
|
253 |
legend_width = unit(2, "cm"), title_position = "topcenter", |
|
|
254 |
grid_height = unit(2, "mm"), |
|
|
255 |
grid_width = unit(2, "mm") |
|
|
256 |
) |
|
|
257 |
), |
|
|
258 |
auto_adjust = F, |
|
|
259 |
padding = unit(c(2, 15, 2, 2), "mm"), heatmap_legend_side = "bottom") |
|
|
260 |
decorate_annotation("exprs", { |
|
|
261 |
grid.text("# CGAs expressed", unit(0, "npc") + unit(1, "mm"), 0.5, |
|
|
262 |
default.units = "npc", just = c("left", "bottom"), gp = gpar(fontsize = 7)) |
|
|
263 |
}) |
|
|
264 |
decorate_annotation("meth", { |
|
|
265 |
grid.text("# CGAs hypomethylated", unit(0, "npc") + unit(1, "mm"), 0.5, |
|
|
266 |
default.units = "npc", just = c("left", "bottom"), gp = gpar(fontsize = 7)) |
|
|
267 |
}) |
|
|
268 |
dev.off() |
|
|
269 |
|