Diff of /vignettes/src/part03.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part03.Rmd
1
---
2
title: "Part 3: Heatmap for Fig.3A"
3
output:
4
  BiocStyle::html_document
5
---
6
7
# Global overview of the drug response landscape
8
9
```{r setup, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
10
options(error = recover)
11
knitr::opts_chunk$set(tidy = FALSE, # cache = TRUE, autodep = TRUE,
12
                      message = FALSE, error = FALSE, warning = TRUE)
13
14
library("BloodCancerMultiOmics2017")
15
library("Biobase")
16
library("dplyr")
17
library("RColorBrewer")
18
library("dendsort")
19
library("nat")  # provides nlapply
20
library("grid")
21
library("magrittr")
22
```
23
24
```{r echo=FALSE}
25
plotDir = ifelse(exists(".standalone"), "", "part03/")
26
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
27
```
28
29
30
Load data 
31
```{r load}
32
data("conctab", "lpdAll", "drugs", "patmeta")
33
lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"]
34
lpdAll = lpdAll[, lpdAll$Diagnosis!="hMNC"]
35
```
36
37
## Setup
38
39
### Additional functions
40
41
```{r}
42
someMatch <- function(...) {
43
  rv <- match(...)
44
  if (all(is.na(rv)))
45
    stop(sprintf("`match` failed to match any of the following: %s",
46
                 paste(x[is.na(rv)], collapse=", ")))
47
  rv  
48
}
49
```
50
51
52
### Preprocess `IGHV gene usage`
53
54
Find the largest categories, which we will represent by colours, 
55
and merge the others in a group `other`.
56
57
```{r geneusage}
58
colnames(pData(lpdAll))
59
gu    <- pData(lpdAll)$`IGHV Uppsala gene usage`
60
tabgu <- sort(table(gu), decreasing = TRUE)
61
biggestGroups <- names(tabgu)[1:5]
62
gu[ is.na(match(gu, biggestGroups)) & !is.na(gu) ] <- "other"
63
pData(lpdAll)$`IGHV gene usage` <- factor(gu, levels = c(biggestGroups, "other"))
64
```
65
66
### Some drugs that we are particularly interested in
67
68
```{r targetedDrugNames}
69
stopifnot(is.null(drugs$id))
70
drugs$id <- rownames(drugs)
71
72
targetedDrugNames <- c("ibrutinib", "idelalisib",  "PRT062607 HCl", 
73
   "duvelisib", "spebrutinib", "selumetinib", "MK-2206",  
74
   "everolimus", "encorafenib")
75
id1 <- safeMatch(targetedDrugNames, drugs$name)
76
targetedDrugs <- paste( rep(drugs[id1, "id"], each = 2), 4:5, sep="_" )
77
78
chemoDrugNames <- c("fludarabine", "doxorubicine",  "nutlin-3")
79
id2 <- safeMatch(chemoDrugNames, drugs$name)
80
chemoDrugs <- paste( rep(drugs[id2, "id"], each = 5), 3:5, sep="_" )
81
82
tzselDrugNames <- c("ibrutinib", "idelalisib", "duvelisib", "selumetinib", 
83
    "AZD7762", "MK-2206", "everolimus", "venetoclax", "thapsigargin", 
84
    "AT13387", "YM155", "encorafenib", "tamatinib", "ruxolitinib", 
85
    "PF 477736", "fludarabine", "nutlin-3")
86
id3 <- safeMatch(tzselDrugNames, drugs$name)
87
tzselDrugs <- unlist(lapply(seq(along = tzselDrugNames), function(i)
88
  paste(drugs[id3[i], "id"], 
89
      if (tzselDrugNames[i] %in% c("fludarabine", "nutlin-3")) 2:3 else 4:5, 
90
    sep = "_" )))
91
```
92
93
### Feature weighting and score for dendrogram reordering
94
95
The `weights` are used for weighting the similarity metric used in heatmap clustering. 
96
`weights$hclust` affects the clustering of the patients.
97
`weights$score` affects the dendrogram reordering of the drugs.
98
99
```{r addBCR}
100
bcrDrugs <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "spebrutinib")
101
everolID <- drugs$id[ safeMatch("everolimus",  drugs$name)]
102
bcrID    <- drugs$id[ safeMatch(bcrDrugs, drugs$name)]
103
104
is_BCR  <- 
105
  (fData(lpdAll)$id %in% bcrID)    & (fData(lpdAll)$subtype %in% paste(4:5))
106
is_mTOR <- 
107
  (fData(lpdAll)$id %in% everolID) & (fData(lpdAll)$subtype %in% paste(4:5))
108
109
myin <- function(x, y) as.numeric( (x %in% y) & !is.na(x) )
110
111
weights1 <- data.frame(
112
  hclust = rep(1, nrow(lpdAll)) + 1.75 * is_mTOR, 
113
  score  = as.numeric( is_BCR ), 
114
  row.names = rownames(lpdAll))
115
116
weights2 <- data.frame(
117
  row.names = tzselDrugs,
118
  hclust = myin(drugs$target_category[id3], "B-cell receptor") * 0.3 + 0.7,
119
  score  = rep(1, length(tzselDrugs))) 
120
```
121
122
Remove drugs that failed quality control: NSC 74859, bortezomib.
123
124
```{r badDrugs}
125
badDrugs <- c(bortezomib = "D_008", `NSC 74859` = "D_025")
126
stopifnot(identical(drugs[ badDrugs, "name"], names(badDrugs)))
127
128
candDrugs <- rownames(lpdAll)[  
129
  fData(lpdAll)$type=="viab" & !(fData(lpdAll)$id %in% badDrugs) &
130
  fData(lpdAll)$subtype %in% paste(2:5)
131
]
132
```
133
134
Threshold parameters: drugs are accepted that for at least `effectNum` samples 
135
have a viability effect less than or equal to `effectVal`. On the other hand, the 
136
mean viability effect should not be below `viab`.
137
138
```{r threshs}
139
thresh <- list(effectVal = 0.7, effectNum = 4, viab = 0.6, maxval = 1.1)
140
overallMean  <- rowMeans(Biobase::exprs(lpdAll)[candDrugs, ])
141
nthStrongest <- apply(Biobase::exprs(lpdAll)[candDrugs, ], 1,
142
                      function(x) sort(x)[thresh$effectNum])
143
```
144
```{r hists, fig.width = 8, fig.height = 2.6}
145
par(mfrow = c(1, 3))
146
hist(overallMean,  breaks = 30, col = "pink")
147
abline(v = thresh$viab,      col="blue")
148
hist(nthStrongest, breaks = 30, col = "pink")
149
abline(v = thresh$effectVal, col="blue")
150
plot(overallMean, nthStrongest)
151
abline(h = thresh$effectVal, v = thresh$viab, col = "blue")
152
```
153
154
`seldrugs1` and `d1`: as in the version of 
155
Figure 3A we had in the first submission to JCI. `d2`: two concentrations for each drug in `tzselDrugNames`.
156
157
```{r seldrugs1}
158
seldrugs1 <- candDrugs[ overallMean >= thresh$viab &
159
                        nthStrongest <= thresh$effectVal ] %>%
160
            union(targetedDrugs) %>% 
161
            union(chemoDrugs)
162
163
d1 <- Biobase::exprs(lpdAll[seldrugs1,, drop = FALSE ]) %>%
164
  deckel(lower = 0, upper = thresh$maxval)
165
166
d2 <- Biobase::exprs(lpdAll[tzselDrugs,, drop = FALSE ]) %>%
167
  deckel(lower = 0, upper = thresh$maxval)
168
```
169
170
We are going to scale the data. But was is the best measure of scale (or spread)? Let's explore
171
different measures of spread. We'll see that it does not seem to matter too much which one we use. 
172
We'll use median centering and scaling by mad.
173
174
```{r differentspread, fig.width = 4, fig.height = 4}
175
spreads <- sapply(list(mad = mad, `Q95-Q05` = function(x)
176
  diff(quantile(x, probs = c(0.05, 0.95)))), function(s) apply(d1, 1, s))
177
plot( spreads )
178
179
jj <- names(which( spreads[, "mad"] < 0.15 & spreads[, "Q95-Q05"] > 0.7))
180
jj
181
drugs[ stripConc(jj), "name" ]
182
```
183
184
```{r transf}
185
medianCenter_MadScale <- function(x) {
186
  s <- median(x)
187
  (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
188
}
189
190
scaleDrugResp  <- function(x) t(apply(x, 1, medianCenter_MadScale)) 
191
192
scd1 <- scaleDrugResp(d1)
193
scd2 <- scaleDrugResp(d2)
194
```
195
196
## Define disease groups
197
198
```{r defdisgrp}
199
sort(table(lpdAll$Diagnosis), decreasing = TRUE)
200
201
diseaseGroups <- list(
202
  `CLL` = c("CLL"), 
203
  `MCL` = c("MCL"),
204
  `HCL` = c("HCL", "HCL-V"),
205
  `other B-cell` = c("B-PLL", "MZL", "LPL", "FL"), 
206
  `T-cell` = c("T-PLL", "Sezary", "PTCL-NOS"),
207
  `myeloid` = c("AML"))
208
stopifnot(setequal(unlist(diseaseGroups), unique(lpdAll$Diagnosis)))
209
210
fdg <- factor(rep(NA, ncol(lpdAll)), levels = names(diseaseGroups))
211
for (i in names(diseaseGroups))
212
  fdg[ lpdAll$Diagnosis %in% diseaseGroups[[i]] ] <- i
213
lpdAll$`Disease Group` <- fdg
214
```
215
216
## Code for heatmap 
217
218
### Matrix row / column clustering
219
220
The helper function `matClust` clusters a matrix `x`, 
221
whose columns represent samples and whose rows represent drugs.
222
Its arguments control how the columns are clustered:
223
224
- `weights`: a `data.frame` with a weight for each row of `x`. The weights are used in the computation of distances 
225
  between columns and thus for column sorting. The `data.frame`'s column `hclust` contains the weights 
226
  for `hclust(dist())`. The column `score` contains the weights for computing the score used for dendrogram reordering.
227
  See `weights1` and `weights2` defined above.
228
- `colgroups`: a `factor` by which to first split the columns before clustering
229
- `reorderrows`: logical. `FALSE` for previous behaviour (old Fig. 3A), `TRUE` for reordering the row dendrogram, too.
230
231
```{r matclust}
232
matClust <- function(x, 
233
                     rowweights,
234
                     colgroups   = factor(rep("all", ncol(x))),
235
                     reorderrows = FALSE) {
236
 
237
  stopifnot(is.data.frame(rowweights),
238
            c("hclust", "score") %in% colnames(rowweights),
239
            !is.null(rownames(rowweights)),
240
            !is.null(rownames(x)), 
241
            all(rownames(x) %in% rownames(rowweights)),
242
            is.factor(colgroups), 
243
            !any(is.na(colgroups)), 
244
            length(colgroups) == ncol(x))
245
246
  wgt <- rowweights[ rownames(x), ]
247
  
248
  columnsClust <- function(xk) {
249
    score   <- -svd(xk * wgt[, "score"])$v[, 1]
250
    cmns    <- colSums(xk * wgt[, "score"])
251
    ## make sure that high score = high sensitivity  
252
    if (cor(score, cmns) > 0) score <- (-score)
253
    
254
    ddraw  <- as.dendrogram(hclust(dist(t(xk * wgt[, "hclust"]), 
255
                                        method = "euclidean"), 
256
                                   method = "complete"))
257
    dd  <- reorder(ddraw, wts = -score, agglo.FUN = mean)
258
    ord <- order.dendrogram(dd)
259
    list(dd = dd, ord = ord, score = score)
260
  }
261
262
  sp <- split(seq(along = colgroups), colgroups)
263
  cc <- lapply(sp, function(k) columnsClust(x[, k, drop=FALSE]))
264
  cidx <- unlist(lapply(seq(along = cc), function (i)
265
    sp[[i]][ cc[[i]]$ord ]))
266
  csc  <- unlist(lapply(seq(along = cc), function (i)
267
    cc[[i]]$score[ cc[[i]]$ord ]))
268
269
  rddraw <- as.dendrogram(hclust(dist(x, method = "euclidean"),
270
                                   method = "complete"))
271
  ridx  <- if (reorderrows) {
272
    ww <- (colgroups == "CLL")
273
    stopifnot(!any(is.na(ww)), any(ww))
274
    rowscore <- svd(t(x) * ww)$v[, 1]
275
    dd <- reorder(rddraw, wts = rowscore, agglo.FUN = mean)
276
    order.dendrogram(dd)
277
  } else {
278
    rev(order.dendrogram(dendsort(rddraw)))
279
  }
280
281
  res <- x[ridx, cidx]
282
  
283
  stopifnot(identical(dim(res), dim(x)))
284
  attr(res, "colgap") <- cumsum(sapply(cc, function(x) length(x$score)))
285
  res
286
}
287
```
288
289
### Prepare sample annotations 
290
I.e. the right hand side color bar. `IGHV Uppsala U/M` is implied by 
291
`IGHV Uppsala % SHM` (see sensi2.Rmd).
292
`cut(..., right=FALSE)` will use intervals that are closed on the left 
293
and open on the right.
294
295
```{r}
296
translation = list(IGHV=c(U=0, M=1),
297
                   Methylation_Cluster=c(`LP-CLL`=0, `IP-CLL`=1, `HP-CLL`=2))
298
```
299
300
```{r annosamp}
301
make_pd <- function(cn, ...) {
302
  df <- function(...) data.frame(..., check.names = FALSE)
303
  
304
  x <- lpdAll[, cn]
305
  pd <- df(
306
    t(Biobase::exprs(x)[    c("del17p13", "TP53", "trisomy12"), , drop = FALSE]) %>% 
307
      `colnames<-`(c("del 17p13", "TP53", "trisomy 12")))
308
  
309
  # pd <- df(pd,
310
  #    t(Biobase::exprs(x)[  c("SF3B1", "del11q22.3", "del13q14_any"),, drop = FALSE]) %>% 
311
  #    `colnames<-`(c("SF3B1", "del11q22.3", "del13q14"))) 
312
  
313
  pd <- df(pd,
314
      cbind(as.integer(Biobase::exprs(x)["KRAS",] | Biobase::exprs(x)["NRAS",])) %>% 
315
      `colnames<-`("KRAS | NRAS")) 
316
  
317
  pd <- df(pd,
318
    # IGHV = Biobase::exprs(x)["IGHV Uppsala U/M", ],
319
    `IGHV (%)` = cut(x[["IGHV Uppsala % SHM"]],
320
                     breaks = c(0, seq(92, 100, by = 2), Inf), right = FALSE),
321
    `Meth. Cluster` = names(translation$Methylation_Cluster)[
322
          someMatch(paste(Biobase::exprs(x)["Methylation_Cluster", ]),
323
                    translation$Methylation_Cluster)],
324
    `Gene usage` = x$`IGHV gene usage`)
325
  
326
  if(length(unique(x$Diagnosis)) > 1)  
327
    pd <- df(pd, Diagnosis = x$Diagnosis)
328
  
329
  pd <- df(pd, 
330
    pretreated  = ifelse(patmeta[colnames(x),"IC50beforeTreatment"],"no","yes"), 
331
    alive       = ifelse(patmeta[colnames(x),"died"]>0, "no", "yes"),
332
    sex         = factor(x$Gender))
333
  
334
  rownames(pd) <- colnames(Biobase::exprs(x))
335
  
336
  for (i in setdiff(colnames(pd), "BCR score")) {
337
    if (!is.factor(pd[[i]]))
338
      pd[[i]] <- factor(pd[[i]])
339
    if (any(is.na(pd[[i]]))) {
340
      levels(pd[[i]]) <- c(levels(pd[[i]]), "n.d.")
341
      pd[[i]][ is.na(pd[[i]]) ] <- "n.d."   
342
    }
343
  }
344
  
345
  pd
346
}
347
```
348
349
### Define the annotation colors
350
351
```{r annokey}
352
gucol <- rev(brewer.pal(nlevels(lpdAll$`IGHV gene usage`), "Set3")) %>% 
353
             `names<-`(sort(levels(lpdAll$`IGHV gene usage`)))
354
gucol["IGHV3-21"] <- "#E41A1C"
355
356
make_ann_colors <- function(pd) {
357
  bw <- c(`TRUE` = "darkblue", `FALSE` = "#ffffff")
358
  res <- list(
359
  Btk = bw, Syk = bw, PI3K = bw, MEK = bw)
360
  
361
  if ("exptbatch" %in% colnames(pd))
362
    res$exptbatch <- brewer.pal(nlevels(pd$exptbatch), "Set2") %>%
363
    `names<-`(levels(pd$exptbatch))
364
  
365
  if ("IGHV (%)" %in% colnames(pd))
366
    res$`IGHV (%)` <- 
367
       c(rev(colorRampPalette(
368
         brewer.pal(9, "Blues"))(nlevels(pd$`IGHV (%)`)-1)), "white") %>% 
369
                        `names<-`(levels(pd$`IGHV (%)`))
370
  
371
  if ("CD38" %in% colnames(pd))
372
    res$CD38 <- colorRampPalette(
373
      c("blue", "yellow"))(nlevels(pd$CD38)) %>% `names<-`(levels(pd$CD38))
374
  
375
  if("Gene usage" %in% colnames(pd)) 
376
    res$`Gene usage` <- gucol 
377
378
  if("Meth. Cluster" %in% colnames(pd)) 
379
    res$`Meth. Cluster` <- brewer.pal(9, "Blues")[c(1, 5, 9)] %>% 
380
        `names<-`(names(translation$Methylation_Cluster))
381
  
382
  res <- c(res, BloodCancerMultiOmics2017:::sampleColors)   # from addons.R  
383
  
384
  if("Diagnosis" %in% colnames(pd)) 
385
    res$Diagnosis <- BloodCancerMultiOmics2017:::colDiagS[
386
      names(BloodCancerMultiOmics2017:::colDiagS) %in% levels(pd$Diagnosis) ] %>% 
387
         (function(x) x[order(names(x))])
388
  
389
  for(i in names(res)) {
390
    whnd <- which(names(res[[i]]) == "n.d.")
391
    if(length(whnd)==1)
392
      res[[i]][whnd] <- "#e0e0e0" else 
393
        res[[i]] <- c(res[[i]], `n.d.` = "#e0e0e0")
394
    stopifnot(all(pd[[i]] %in% names(res[[i]])))
395
  }
396
  
397
  res
398
}
399
```
400
401
### Heatmap drawing function
402
403
```{r dm}
404
theatmap <- function(x, cellwidth = 7, cellheight = 11) {
405
  
406
  stopifnot(is.matrix(x))
407
  patDat <- make_pd(colnames(x))
408
  
409
  bpp <- brewer.pal(9, "Set1")
410
  breaks <- 2.3 * c(seq(-1, 1, length.out = 101)) %>% `names<-`(
411
      colorRampPalette(c(rev(brewer.pal(7, "YlOrRd")), 
412
                         "white", "white", "white", 
413
                         brewer.pal(7, "Blues")))(101))
414
415
  if (!is.null(attr(x, "colgap")))
416
    stopifnot(last(attr(x, "colgap")) == ncol(x))
417
418
  pheatmapwh(deckel(x, lower = first(breaks), upper = last(breaks)), 
419
         cluster_rows = FALSE,  
420
         cluster_cols = FALSE,
421
         gaps_col = attr(x, "colgap"),
422
         gaps_row = attr(x, "rowgap"),
423
         scale = "none",
424
         annotation_col = patDat,
425
         annotation_colors = make_ann_colors(patDat),
426
         color = names(breaks),
427
         breaks = breaks, 
428
         show_rownames = TRUE, 
429
         show_colnames = !TRUE, 
430
         cellwidth = cellwidth, cellheight = cellheight, 
431
         fontsize = 10, fontsize_row = 11, fontsize_col = 8,
432
         annotation_legend = TRUE, drop_levels = TRUE)
433
  }
434
```
435
436
## Draw the heatmaps
437
438
Things we see in the plot:
439
440
- separation of U-CLL and M-CLL within CLL
441
- BCR-targeting drugs a cluster at the top
442
- everolimus stands out in M-CLL with a separate sensitivity pattern
443
- encorafenib clusters together and pops out in HCL
444
445
clscd1/2: **cl**ustered and **sc**aled **d**rug **mat**rix
446
447
```{r doheatmaps}
448
clscd1 <- matClust(scd1, rowweights = weights1, 
449
                  colgroups = lpdAll$`Disease Group`)
450
clscd2 <- matClust(scd2, rowweights = weights2, 
451
                  colgroups = lpdAll$`Disease Group`, reorderrows = TRUE)
452
```
453
454
### Identify places where gaps between the rows should be
455
456
```{r gapsrow}
457
setGapPositions <- function(x, gapAt) {
458
  rg <- if (missing(gapAt)) c(0) else {
459
    s <- strsplit(gapAt, split = "--")
460
    stopifnot(all(listLen(s) == 2L))
461
    s <- strsplit(unlist(s), split = ":")
462
    spname <- drugs[safeMatch(sapply(s, `[`, 1), drugs$name), "id"]
463
    spconc <- as.numeric(sapply(s, `[`, 2))
464
    spi <- mapply(function(d, cc) {
465
      i <- which(cc == conctab[d, ])
466
      stopifnot(length(i) == 1)
467
      i
468
     }, spname, spconc)
469
    spdrug <- paste(spname, spi, sep = "_")
470
    mt <- safeMatch(spdrug, rownames(x))
471
    igp <- seq(1, length(mt), by = 2L)
472
    stopifnot(all( mt[igp] - mt[igp + 1] == 1))
473
    #stopifnot(all( mt[igp] - mt[igp + 1] == 1))
474
    c(mt[igp + 1], 0)
475
  }
476
  attr(x, "rowgap") <- rg
477
  x
478
}
479
480
clscd1 %<>% setGapPositions(gapAt = c(
481
  "PF 477736:10--idelalisib:10",
482
  "spebrutinib:2.5--PF 477736:2.5",
483
  "PRT062607 HCl:10--selumetinib:2.5",
484
  "selumetinib:10--MK-2206:2.5",
485
  "MK-2206:0.156--tipifarnib:10",
486
  "AT13387:0.039--encorafenib:10",
487
  "encorafenib:2.5--SD07:1.111",
488
  "doxorubicine:0.016--encorafenib:0.625",
489
  "encorafenib:0.156--rotenone:2.5",
490
  "SCH 900776:0.625--everolimus:0.625",
491
  "everolimus:10--afatinib:1.667",
492
  "arsenic trioxide:1--thapsigargin:5",
493
  "thapsigargin:0.313--fludarabine:0.156"
494
))
495
496
clscd2 %<>% setGapPositions(gapAt = c(
497
  "AT13387:0.039--everolimus:0.156",
498
  "everolimus:0.625--nutlin-3:10", 
499
  "fludarabine:10--thapsigargin:0.078",
500
  "thapsigargin:0.313--encorafenib:0.625", 
501
  "encorafenib:0.156--ruxolitinib:0.156"
502
))
503
```
504
505
```{r Fig3AheatmapV1, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd1) * 0.12, fig.width = 31, fig.wide = TRUE}
506
#FIG# S8
507
rownames(clscd1) <- with(fData(lpdAll)[ rownames(clscd1),, drop = FALSE], 
508
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 
509
rownames(clscd1)
510
theatmap(clscd1)
511
```
512
513
```{r Fig3AheatmapV2, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd2) * 0.12, fig.width = 31, fig.wide = TRUE}
514
#FIG# 3A
515
rownames(clscd2) <- with(fData(lpdAll)[ rownames(clscd2),, drop = FALSE], 
516
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 
517
518
rownames(clscd2)
519
theatmap(clscd2)
520
```
521
522
523
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
524
devtools::session_info()
525
```
526
527
```{r, message=FALSE, warning=FALSE, include=FALSE, eval=exists(".standalone")}
528
rm(list=ls())
529
```