Diff of /vignettes/src/part05.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part05.Rmd
1
---
2
title: "Part 5"
3
output:
4
  BiocStyle::html_document
5
---
6
7
```{r, message=FALSE, warning=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
8
library("BloodCancerMultiOmics2017")
9
library("Biobase")
10
library("dplyr")
11
library("tidyr")
12
library("broom")
13
library("ggplot2")
14
library("grid")
15
library("gridExtra")
16
library("reshape2")
17
library("foreach")
18
library("doParallel")
19
library("scales")
20
library("knitr")
21
registerDoParallel()
22
```
23
24
```{r echo=FALSE}
25
plotDir = ifelse(exists(".standalone"), "", "part05/")
26
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
27
```
28
29
```{r}
30
options(stringsAsFactors=FALSE)
31
```
32
33
34
# Associations of drug responses with mutations in CLL (IGHV not included)
35
36
In this part, we use both gene mutations and chromosome aberrations to test for gene-drug response associations. In contrast to the analysis done previously, we exclude IGHV status from testing. Additionally, we use information on patient treatment status to account for its effect on drug response screening.
37
38
## Additional functions
39
40
Accessor functions:
41
```{r}
42
# get drug responsee data
43
get.drugresp <- function(lpd) {
44
  drugresp = t(Biobase::exprs(lpd[fData(lpd)$type == 'viab'])) %>%
45
    dplyr::tbl_df() %>% dplyr::select(-ends_with(":5")) %>%
46
    dplyr::mutate(ID = colnames(lpd)) %>%
47
    tidyr::gather(drugconc, viab, -ID) %>%
48
    dplyr::mutate(drug = drugs[substring(drugconc, 1, 5), "name"],
49
           conc = sub("^D_([0-9]+_)", "", drugconc)) %>%
50
    dplyr::mutate(conc = as.integer(gsub("D_CHK_", "", conc)))
51
  
52
  drugresp
53
}
54
55
# extract mutations and IGHV status
56
get.somatic <- function(lpd) {
57
  somatic = t(Biobase::exprs(lpd[Biobase::fData(lpd)$type == 'gen' | 
58
                                   Biobase::fData(lpd)$type == 'IGHV']))
59
  ## rename IGHV Uppsala to 'IGHV' (simply)
60
  colnames(somatic)[grep("IGHV", colnames(somatic))] = "IGHV"
61
  
62
  ## at least 3 patients should have this mutation
63
  min.samples = which(Matrix::colSums(somatic, na.rm = T) > 2)
64
  somatic = dplyr::tbl_df(somatic[, min.samples]) %>%
65
    dplyr::select(-one_of("del13q14_bi", "del13q14_mono", 
66
                          "Chromothripsis", "RP11-766F14.2")) %>%
67
    dplyr::rename(del13q14 = del13q14_any) %>% 
68
    dplyr::mutate(ID = colnames(lpd)) %>%
69
    tidyr::gather(mutation, mut.value, -ID)
70
  somatic
71
}
72
```
73
74
Define the ggplot themes
75
```{r ggTheme}
76
t1<-theme(                              
77
  plot.background = element_blank(), 
78
  panel.grid.major = element_line(),
79
  panel.grid.major.x = element_line(linetype = "dotted", colour = "grey"),
80
  panel.grid.minor = element_blank(), 
81
  panel.border = element_blank(), 
82
  panel.background = element_blank(),
83
  axis.line = element_line(size=.4),
84
  axis.line.x = element_line(),
85
  axis.line.y = element_line(),
86
  axis.text.x  = element_text(angle=90, size=12, 
87
                              face="bold", hjust = 1, vjust = 0.4),
88
  axis.text.y = element_text(size = 14),
89
  axis.ticks.x = element_line(linetype = "dotted"),
90
  axis.ticks.length = unit(0.3,"cm"),
91
  axis.title.x = element_text(face="bold", size=16), 
92
  axis.title.y = element_text(face="bold", size=20),
93
  plot.title = element_text(face="bold", size=16, hjust = 0.5)
94
)
95
96
## theme for the legend
97
t.leg <-  theme(legend.title = element_text(face='bold', 
98
                                            hjust = 1, size=11),
99
                legend.position = c(0, 0.76),
100
                legend.key = element_blank(),
101
                legend.text = element_text(size=12),
102
                legend.background = element_rect(color = "black"))
103
```
104
105
Define the main color palette: 
106
```{r colorPalette}
107
colors= c("#015872","#3A9C94","#99977D","#ffbf00","#5991C7","#99cc00",
108
          "#D5A370","#801416","#B2221C","#ff5050","#33bbff","#5c5cd6",
109
          "#E394BB","#0066ff","#C0C0C0")
110
```
111
112
Get pretreatment status:
113
```{r}
114
get.pretreat <- function(patmeta, lpd) {
115
  patmeta = patmeta[rownames(patmeta) %in% colnames(lpd),]
116
  data.frame(ID=rownames(patmeta), pretreat=!patmeta$IC50beforeTreatment) %>% 
117
    mutate(pretreat = as.factor(pretreat))
118
  
119
}
120
```
121
122
Merge drug response, pretreatment information and somatic mutation data sets
123
```{r}
124
make.dr <- function(resp, features, patmeta, lpd) {
125
  treat = get.pretreat(patmeta, lpd)
126
  dr = full_join(resp, features) %>% 
127
    inner_join(treat) 
128
}
129
```
130
131
Summarize viabilities using Tukey's medpolish
132
```{r}
133
get.medp <- function(drugresp) {
134
  tab = drugresp %>% group_by(drug, conc) %>% 
135
    do(data.frame(v = .$viab, ID = .$ID)) %>% spread(ID, v)
136
  
137
  med.p = foreach(n=unique(tab$drug), .combine = cbind) %dopar% {
138
    tb = filter(tab, drug == n) %>% ungroup() %>% dplyr::select(-(drug:conc)) %>% 
139
      as.matrix %>% `rownames<-`(1:5)
140
    mdp = stats::medpolish(tb)
141
    df = as.data.frame(mdp$col) + mdp$overall
142
    colnames(df) <- n
143
    df
144
  }
145
  
146
  medp.viab = dplyr::tbl_df(med.p) %>% dplyr::mutate(ID = rownames(med.p)) %>%
147
    tidyr::gather(drug, viab, -ID) 
148
  medp.viab
149
}
150
```
151
152
Process labels for the legend:
153
```{r}
154
get.labels <- function(pvals) {
155
  lev = levels(factor(pvals$mutation))
156
  lev = gsub("^(gain)([0-9]+)([a-z][0-9]+)$", "\\1(\\2)(\\3)", lev)
157
  lev =  gsub("^(del)([0-9]+)([a-z].+)$", "\\1(\\2)(\\3)", lev)
158
  lev = gsub("trisomy12", "trisomy 12", lev)
159
  lev
160
}
161
```
162
163
Get order of mutations
164
```{r}
165
get.mutation.order <- function(lev) {
166
  ord = c("trisomy 12", "TP53",
167
          "del(11)(q22.3)", "del(13)(q14)",
168
          "del(17)(p13)",
169
          "gain(8)(q24)",
170
          "BRAF", "CREBBP", "PRPF8",
171
          "KLHL6", "NRAS", "ABI3BP", "UMODL1")
172
  mut.order = c(match(ord, lev),
173
                grep("Other", lev), grep("Below", lev))
174
  
175
  mut.order
176
}
177
```
178
179
Group drugs by pathway/target
180
```{r}
181
get.drug.order <- function(pvals, drugs) {
182
  ## determine drug order by column sums of log-p values
183
  dr.order = pvals %>% 
184
    mutate(logp = -log10(p.value)) %>% 
185
    group_by(drug) %>% summarise(logsum = sum(logp)) 
186
  
187
  dr.order = inner_join(dr.order, pvals %>%
188
                          group_by(drug) %>% 
189
                          summarise(n = length(unique(mutation)))) %>% 
190
    arrange(desc(n), desc(logsum))
191
  
192
  dr.order = inner_join(dr.order, drugs %>% rename(drug = name))
193
  
194
  dr.order = left_join(dr.order, dr.order %>% 
195
                         group_by(`target_category`) ) %>%
196
    arrange(`target_category`, drug) %>%
197
    filter(! `target_category` %in% c("ALK", "Angiogenesis", "Other")) %>%
198
    filter(!is.na(`target_category`))
199
  
200
  dr.order
201
}
202
```
203
204
Add pathway annotations for selected drug classes
205
```{r}
206
make.annot <- function(g, dr.order) {
207
  # make a color palette for drug pathways
208
  drug.class = c("#273649", "#647184", "#B1B2C8",
209
                 "#A7755D", "#5D2E1C", "#38201C")
210
  pathways = c("BH3","B-cell receptor","DNA damage",
211
               "MAPK", "PI3K", "Reactive oxygen species")
212
  names(pathways) = c("BH3", "BCR inhibitors", "DNA damage",
213
                      "MAPK", "PI3K", "ROS")
214
  
215
  for (i in 1:6) {
216
    prange = grep(pathways[i], dr.order$`target_category`)
217
    path.grob <- grobTree(rectGrob(gp=gpar(fill=drug.class[i])),
218
                          textGrob(names(pathways)[i], 
219
                                   gp = gpar(cex =0.8, col = "white")))
220
    g = g + 
221
      annotation_custom(path.grob, 
222
                        xmin = min(prange) -0.25 - 0.1 * ifelse(i == 2, 1, 0), 
223
                        xmax = max(prange) + 0.25 + 0.1 * ifelse(i == 2, 1, 0), 
224
                        ymin = -0.52, ymax = -0.2)
225
  }
226
  g
227
}
228
```
229
230
Define a function for `glegend`
231
```{r}
232
g_legend<-function(a.gplot){
233
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
234
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
235
  legend <- tmp$grobs[[leg]]
236
  legend
237
} ## end define
238
```
239
240
## Data setup
241
242
Load the data.
243
```{r}
244
data(list=c("conctab", "drugs", "lpdAll", "patmeta"))
245
```
246
247
Get drug response data.
248
```{r preprocesslpd}
249
lpdCLL <- lpdAll[ , lpdAll$Diagnosis=="CLL"]
250
## extract viability data for 5 different concentrations
251
drugresp = get.drugresp(lpdCLL)
252
```
253
254
Get somatic mutations and structural variants.
255
```{r preprocessMuts}
256
## extract somatic variants
257
somatic = get.somatic(lpdCLL) %>% 
258
  mutate(mut.value = as.factor(mut.value))
259
```
260
261
## Test for drug-gene associations
262
263
Summarize drug response using median polish.
264
```{r medPolish, warning=FALSE, results='hide'}
265
## compute median polish patient effects and recalculate p-values
266
medp.viab = get.medp(drugresp)
267
dr = make.dr(medp.viab, somatic, patmeta, lpdCLL)
268
```
269
270
Calculate $p$ values and FDR (10%).
271
```{r pvalsAndFDR}
272
pvals = dr %>% group_by(drug, mutation) %>%
273
  do(tidy(t.test(viab ~ mut.value, data = ., var.equal = T))) %>%
274
  dplyr::select(drug, mutation, p.value)
275
276
# compute the FDR threshold
277
fd.thresh = 10
278
padj = p.adjust(pvals$p.value, method = "BH")
279
fdr = max(pvals$p.value[which(padj <= fd.thresh/100)])
280
```
281
282
Remove unnecessary mutations and bad drugs.
283
```{r subsetPvals}
284
# selected mutations
285
select.mutations = c("trisomy12", "TP53",
286
        "del11q22.3", "del13q14",
287
        "del17p13",
288
        "gain8q24",
289
        "BRAF", "CREBBP", "PRPF8",
290
        "KLHL6", "NRAS", "ABI3BP", "UMODL1")
291
292
pvals = filter(pvals, mutation != 'IGHV')
293
pvals = pvals %>% ungroup() %>%
294
  mutate(mutation = ifelse(p.value > fdr, 
295
                           paste0("Below ", fd.thresh,"% FDR"), mutation)) %>%
296
  mutate(mutation = ifelse(!(mutation %in% select.mutations) & 
297
                             !(mutation == paste0("Below ", fd.thresh,"% FDR")), 
298
                           "Other", mutation)) %>%
299
  filter(drug != "bortezomib" & drug != "NSC 74859")
300
```
301
302
Reshape names of genomic rearrangements.
303
```{r renameMuts}
304
## order of mutations
305
lev = get.labels(pvals)
306
folge = get.mutation.order(lev)
307
```
308
309
Set order of drugs.
310
```{r pvalsForGGplot}
311
drugs = drugs[,c("name", "target_category")]
312
# get the drug order
313
dr.order = get.drug.order(pvals, drugs)
314
```
315
316
317
## Plot results
318
319
### Main Figure
320
321
Function for generating the figure.
322
```{r}
323
plot.pvalues <- function(pvals, dr.order, folge, colors, shapes) {
324
  g = ggplot(data = filter(pvals, drug %in% dr.order$drug)) +
325
  geom_point(aes(x = factor(drug, levels = dr.order$drug), y = -log10(p.value), 
326
                 colour = factor(mutation, levels(factor(mutation))[folge]),
327
                 shape  = factor(mutation, levels(factor(mutation))[folge])),
328
             size=5, show.legend = T)  + 
329
  scale_color_manual(name = "Mutations",
330
                     values = colors,
331
                     labels = lev[folge]) + 
332
  scale_shape_manual(name = "Mutations",
333
                     values = shapes,
334
                     labels = lev[folge]) + t1 + 
335
  labs(x = "", y = expression(paste(-log[10], "p")), title = "") +
336
    scale_y_continuous(expression(italic(p)*"-value"),
337
                       breaks=seq(0,10,5),
338
                       labels=math_format(expr=10^.x)(-seq(0,10,5))) 
339
  g
340
}
341
```
342
343
344
```{r pvalsMain, fig.path=plotDir, dev=c("png", "pdf"), fig.width=14, fig.height=10}
345
#FIG# 4A
346
## plot the p-values 
347
g = plot.pvalues(pvals, dr.order, folge, 
348
                 colors, shapes = c(rep(16,13), c(1,1)))
349
350
## add FDR threshold
351
g = g + geom_hline(yintercept = -log10(fdr),
352
                   linetype="dashed", size=0.3)
353
354
g = g + 
355
  annotation_custom(grob = textGrob(label = paste0("FDR", fd.thresh, "%"), 
356
                                    hjust = 1, vjust = 1, 
357
                                    gp = gpar(cex = 0.5,
358
                                              fontface = "bold",
359
                                              fontsize = 25)),
360
                    ymin = -log10(fdr) - 0.2, 
361
                    ymax = -log10(fdr) + 0.5, 
362
                    xmin = -1.3, xmax = 1.5) + 
363
  theme(legend.position = "none")
364
365
# generate pathway/target annotations for certain drug classes
366
#g = make.annot(g, dr.order)
367
368
# legend guide
369
leg.guides <- guides(colour = guide_legend(ncol = 1, 
370
                                           byrow = TRUE,
371
                                           override.aes = list(size = 3),
372
                                           title = "Mutations",
373
                                           label.hjust = 0,
374
                                           keywidth = 0.4,
375
                                           keyheight = 0.8), 
376
                      shape = guide_legend(ncol = 1, 
377
                                           byrow = TRUE,
378
                                           title = "Mutations",
379
                                           label.hjust = 0,
380
                                           keywidth = 0.4,
381
                                           keyheight = 0.8))
382
383
# create a legend grob
384
legend = g_legend(g + t.leg +  leg.guides)
385
386
## arranget the main plot and the legend
387
# using grid graphics
388
gt <- ggplot_gtable(ggplot_build(g + theme(legend.position = 'none')))
389
gt$layout$clip[gt$layout$name == "panel"] <- "off"
390
391
grid.arrange(gt, legend,
392
             ncol=2, nrow=1, widths=c(0.92,0.08))
393
```
394
395
### Supplementary Figure (incl. pretreatment)
396
397
In the supplementary figure we use pretreatment status as a blocking factor, i.e. we model drug sensitivity - gene variant associations as: `lm(viability ~ mutation + pretreatment)`
398
399
```{r}
400
## lm(viab ~ mutation + pretreatment.status)
401
pvals = dr %>% group_by(drug, mutation) %>%
402
  do(tidy(lm(viab ~ mut.value + pretreat, data = .))) %>%
403
  filter(term == 'mut.value1') %>%
404
  dplyr::select(drug, mutation, p.value)
405
406
# compute the FDR threshold
407
fd.thresh = 10
408
padj = p.adjust(pvals$p.value, method = "BH")
409
fdr = max(pvals$p.value[which(padj <= fd.thresh/100)])
410
411
412
pvals = filter(pvals, mutation != 'IGHV')
413
pvals = pvals %>% ungroup() %>%
414
  mutate(mutation = ifelse(p.value > fdr,
415
                           paste0("Below ", fd.thresh,"% FDR"),
416
                           mutation)) %>%
417
  mutate(mutation = ifelse(!(mutation %in% select.mutations) &
418
                             !(mutation == paste0("Below ",
419
                                                  fd.thresh,"% FDR")), 
420
                           "Other", mutation)) %>%
421
  filter(drug != "bortezomib" & drug != "NSC 74859")
422
423
424
lev = get.labels(pvals)
425
folge = get.mutation.order(lev)
426
427
# get the drug order
428
dr.order = get.drug.order(pvals, drugs)
429
430
mut.order = folge[!is.na(folge)]
431
```
432
433
After recomputing the $p$-values (using a linear model that accounts for pretreatment status), plot the figure:
434
```{r pvalsSupp, fig.path=plotDir, dev=c("png", "pdf"), fig.width=14, fig.height=10}
435
#FIG# S19
436
## plot the p-values 
437
g = plot.pvalues(pvals, dr.order, mut.order, 
438
                 colors[which(!is.na(folge))], shapes = c(rep(16,9), c(1,1)))
439
440
## add FDR threshold
441
g = g + geom_hline(yintercept = -log10(fdr),
442
                   linetype="dashed", size=0.3)
443
444
g = g + 
445
  annotation_custom(grob = textGrob(label = paste0("FDR", fd.thresh, "%"), 
446
                                    hjust = 1, vjust = 1, 
447
                                    gp = gpar(cex = 0.5,
448
                                              fontface = "bold",
449
                                              fontsize = 25)),
450
                    ymin = -log10(fdr) - 0.2, 
451
                    ymax = -log10(fdr) + 0.5, 
452
                    xmin = -1.3, xmax = 1.5) + 
453
  theme(legend.position = "none")
454
455
# generate pathway/target annotations for certain drug classes
456
#g = make.annot(g, dr.order)
457
458
# legend guide
459
leg.guides <- guides(colour = guide_legend(ncol = 1, 
460
                                           byrow = TRUE,
461
                                           override.aes = list(size = 3),
462
                                           title = "Mutations",
463
                                           label.hjust = 0,
464
                                           keywidth = 0.4,
465
                                           keyheight = 0.8), 
466
                      shape = guide_legend(ncol = 1, 
467
                                           byrow = TRUE,
468
                                           title = "Mutations",
469
                                           label.hjust = 0,
470
                                           keywidth = 0.4,
471
                                           keyheight = 0.8))
472
473
# create a legend grob
474
legend = g_legend(g + t.leg +  leg.guides)
475
476
## arranget the main plot and the legend
477
# using grid graphics
478
gt <- ggplot_gtable(ggplot_build(g + theme(legend.position = 'none')))
479
gt$layout$clip[gt$layout$name == "panel"] <- "off"
480
481
grid.arrange(gt, legend,
482
             ncol=2, nrow=1, widths=c(0.92,0.08))
483
```
484
485
486
## Comparison of $P$-Values
487
488
```{r}
489
pvals.main = dr %>% group_by(drug, mutation) %>%
490
  do(tidy(t.test(viab ~ mut.value, data = ., var.equal = T))) %>%
491
  dplyr::select(drug, mutation, p.value)
492
493
p.main.adj = p.adjust(pvals.main$p.value, method = "BH")
494
fdr.main = max(pvals.main$p.value[which(p.main.adj <= fd.thresh/100)])
495
496
pvals.main = filter(pvals.main, mutation != "IGHV") %>%
497
             rename(p.main = p.value)
498
499
500
## lm(viab ~ mutation + pretreatment.status)
501
pvals.sup = dr %>% group_by(drug, mutation) %>%
502
  do(tidy(lm(viab ~ mut.value + pretreat, data = .))) %>%
503
  filter(term == 'mut.value1') %>%
504
  dplyr::select(drug, mutation, p.value)
505
506
p.sup.adj = p.adjust(pvals.sup$p.value, method = "BH")
507
fdr.sup = max(pvals.sup$p.value[which(p.sup.adj <= fd.thresh/100)])
508
509
pvals.sup = filter(pvals.sup, mutation != "IGHV") %>%
510
  rename(p.sup = p.value)
511
512
513
pvals = inner_join(pvals.main, pvals.sup)
514
pvals = mutate(pvals, signif = ifelse(p.main > fdr.main, 
515
                                      ifelse(p.sup > fdr.sup, 
516
                                             "Below 10% FDR in both models", 
517
                                             "Significant with pretreatment accounted"), 
518
                                      ifelse(p.sup > fdr.sup, 
519
                                             "Significant without pretreatment in the model", 
520
                                             "Significant in both models")))
521
522
t2<-theme(                              
523
  plot.background = element_blank(), 
524
  panel.grid.major = element_line(),
525
  panel.grid.major.x = element_line(),
526
  panel.grid.minor = element_blank(), 
527
  panel.border = element_blank(), 
528
  panel.background = element_blank(),
529
  axis.line = element_line(size=.4),
530
  axis.line.x = element_line(),
531
  axis.line.y = element_line(),
532
  axis.text.x  = element_text(size=12),
533
  axis.text.y = element_text(size = 12),
534
  axis.title.x = element_text(face="bold", size=12), 
535
  axis.title.y = element_text(face="bold", size=12),
536
  legend.title = element_text(face='bold', 
537
                                            hjust = 1, size=10),
538
  legend.position = c(0.78, 0.11),
539
  legend.key = element_blank(),
540
  legend.text = element_text(size=10),
541
  legend.background = element_rect(color = "black")
542
)
543
```
544
545
546
```{r pvalComparisonScatterplot, fig.path=plotDir, dev=c("png", "pdf"), fig.width=10, fig.height=7}
547
#FIG# S19
548
ggplot(pvals, aes(-log10(p.main), -log10(p.sup), colour = factor(signif))) + 
549
  geom_point() + t2 + labs(x = expression(paste(-log[10], "p, pretreatment not considered", sep = "")),
550
                                   y = expression(paste(-log[10], "p, accounting for pretreatment", sep = ""))) +
551
  coord_fixed() +
552
  scale_x_continuous(breaks = seq(0,9,by = 3)) + 
553
  scale_y_continuous(breaks = seq(0,9,by = 3)) + 
554
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
555
  scale_color_manual(name = "Statistical Significance",
556
                     values = c("#F1BB7B","#669999", "#FD6467", "#5B1A18"))
557
```
558
559
560
What are the drug-mutation pairs that are significant only in one or another model (i.e. only without pretreatment or with pretreatment included)? 
561
562
```{r}
563
signif.in.one = filter(pvals, 
564
                       signif %in% c("Significant with pretreatment accounted",
565
                            "Significant without pretreatment in the model")) %>%
566
        arrange(signif)
567
kable(signif.in.one, digits = 4,
568
      align = c("l", "l", "c", "c", "c"),
569
      col.names = c("Drug", "Mutation", "P-value (Main)",
570
                    "P-value (Supplement)", "Statistical significance"),
571
      format.args = list(width = 14))
572
```
573
574
Produce LaTeX output for the Supplement:
575
```{r, comment=NA, eval=FALSE}
576
print(kable(signif.in.one, format = "latex", digits = 4,
577
       align = c("l", "l", "c", "c", "c"),
578
      col.names = c("Drug", "Mutation", "P-value (Main)",
579
                    "P-value (Supplement)", "Statistical significance")))
580
```
581
582
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
583
sessionInfo()
584
```
585
586
```{r, message=FALSE, warning=FALSE, include=FALSE}
587
rm(list=ls())
588
```