530 lines (427 with data), 17.5 kB
---
title: "Part 3: Heatmap for Fig.3A"
output:
BiocStyle::html_document
---
# Global overview of the drug response landscape
```{r setup, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
options(error = recover)
knitr::opts_chunk$set(tidy = FALSE, # cache = TRUE, autodep = TRUE,
message = FALSE, error = FALSE, warning = TRUE)
library("BloodCancerMultiOmics2017")
library("Biobase")
library("dplyr")
library("RColorBrewer")
library("dendsort")
library("nat") # provides nlapply
library("grid")
library("magrittr")
```
```{r echo=FALSE}
plotDir = ifelse(exists(".standalone"), "", "part03/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
```
Load data
```{r load}
data("conctab", "lpdAll", "drugs", "patmeta")
lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"]
lpdAll = lpdAll[, lpdAll$Diagnosis!="hMNC"]
```
## Setup
### Additional functions
```{r}
someMatch <- function(...) {
rv <- match(...)
if (all(is.na(rv)))
stop(sprintf("`match` failed to match any of the following: %s",
paste(x[is.na(rv)], collapse=", ")))
rv
}
```
### Preprocess `IGHV gene usage`
Find the largest categories, which we will represent by colours,
and merge the others in a group `other`.
```{r geneusage}
colnames(pData(lpdAll))
gu <- pData(lpdAll)$`IGHV Uppsala gene usage`
tabgu <- sort(table(gu), decreasing = TRUE)
biggestGroups <- names(tabgu)[1:5]
gu[ is.na(match(gu, biggestGroups)) & !is.na(gu) ] <- "other"
pData(lpdAll)$`IGHV gene usage` <- factor(gu, levels = c(biggestGroups, "other"))
```
### Some drugs that we are particularly interested in
```{r targetedDrugNames}
stopifnot(is.null(drugs$id))
drugs$id <- rownames(drugs)
targetedDrugNames <- c("ibrutinib", "idelalisib", "PRT062607 HCl",
"duvelisib", "spebrutinib", "selumetinib", "MK-2206",
"everolimus", "encorafenib")
id1 <- safeMatch(targetedDrugNames, drugs$name)
targetedDrugs <- paste( rep(drugs[id1, "id"], each = 2), 4:5, sep="_" )
chemoDrugNames <- c("fludarabine", "doxorubicine", "nutlin-3")
id2 <- safeMatch(chemoDrugNames, drugs$name)
chemoDrugs <- paste( rep(drugs[id2, "id"], each = 5), 3:5, sep="_" )
tzselDrugNames <- c("ibrutinib", "idelalisib", "duvelisib", "selumetinib",
"AZD7762", "MK-2206", "everolimus", "venetoclax", "thapsigargin",
"AT13387", "YM155", "encorafenib", "tamatinib", "ruxolitinib",
"PF 477736", "fludarabine", "nutlin-3")
id3 <- safeMatch(tzselDrugNames, drugs$name)
tzselDrugs <- unlist(lapply(seq(along = tzselDrugNames), function(i)
paste(drugs[id3[i], "id"],
if (tzselDrugNames[i] %in% c("fludarabine", "nutlin-3")) 2:3 else 4:5,
sep = "_" )))
```
### Feature weighting and score for dendrogram reordering
The `weights` are used for weighting the similarity metric used in heatmap clustering.
`weights$hclust` affects the clustering of the patients.
`weights$score` affects the dendrogram reordering of the drugs.
```{r addBCR}
bcrDrugs <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "spebrutinib")
everolID <- drugs$id[ safeMatch("everolimus", drugs$name)]
bcrID <- drugs$id[ safeMatch(bcrDrugs, drugs$name)]
is_BCR <-
(fData(lpdAll)$id %in% bcrID) & (fData(lpdAll)$subtype %in% paste(4:5))
is_mTOR <-
(fData(lpdAll)$id %in% everolID) & (fData(lpdAll)$subtype %in% paste(4:5))
myin <- function(x, y) as.numeric( (x %in% y) & !is.na(x) )
weights1 <- data.frame(
hclust = rep(1, nrow(lpdAll)) + 1.75 * is_mTOR,
score = as.numeric( is_BCR ),
row.names = rownames(lpdAll))
weights2 <- data.frame(
row.names = tzselDrugs,
hclust = myin(drugs$target_category[id3], "B-cell receptor") * 0.3 + 0.7,
score = rep(1, length(tzselDrugs)))
```
Remove drugs that failed quality control: NSC 74859, bortezomib.
```{r badDrugs}
badDrugs <- c(bortezomib = "D_008", `NSC 74859` = "D_025")
stopifnot(identical(drugs[ badDrugs, "name"], names(badDrugs)))
candDrugs <- rownames(lpdAll)[
fData(lpdAll)$type=="viab" & !(fData(lpdAll)$id %in% badDrugs) &
fData(lpdAll)$subtype %in% paste(2:5)
]
```
Threshold parameters: drugs are accepted that for at least `effectNum` samples
have a viability effect less than or equal to `effectVal`. On the other hand, the
mean viability effect should not be below `viab`.
```{r threshs}
thresh <- list(effectVal = 0.7, effectNum = 4, viab = 0.6, maxval = 1.1)
overallMean <- rowMeans(Biobase::exprs(lpdAll)[candDrugs, ])
nthStrongest <- apply(Biobase::exprs(lpdAll)[candDrugs, ], 1,
function(x) sort(x)[thresh$effectNum])
```
```{r hists, fig.width = 8, fig.height = 2.6}
par(mfrow = c(1, 3))
hist(overallMean, breaks = 30, col = "pink")
abline(v = thresh$viab, col="blue")
hist(nthStrongest, breaks = 30, col = "pink")
abline(v = thresh$effectVal, col="blue")
plot(overallMean, nthStrongest)
abline(h = thresh$effectVal, v = thresh$viab, col = "blue")
```
`seldrugs1` and `d1`: as in the version of
Figure 3A we had in the first submission to JCI. `d2`: two concentrations for each drug in `tzselDrugNames`.
```{r seldrugs1}
seldrugs1 <- candDrugs[ overallMean >= thresh$viab &
nthStrongest <= thresh$effectVal ] %>%
union(targetedDrugs) %>%
union(chemoDrugs)
d1 <- Biobase::exprs(lpdAll[seldrugs1,, drop = FALSE ]) %>%
deckel(lower = 0, upper = thresh$maxval)
d2 <- Biobase::exprs(lpdAll[tzselDrugs,, drop = FALSE ]) %>%
deckel(lower = 0, upper = thresh$maxval)
```
We are going to scale the data. But was is the best measure of scale (or spread)? Let's explore
different measures of spread. We'll see that it does not seem to matter too much which one we use.
We'll use median centering and scaling by mad.
```{r differentspread, fig.width = 4, fig.height = 4}
spreads <- sapply(list(mad = mad, `Q95-Q05` = function(x)
diff(quantile(x, probs = c(0.05, 0.95)))), function(s) apply(d1, 1, s))
plot( spreads )
jj <- names(which( spreads[, "mad"] < 0.15 & spreads[, "Q95-Q05"] > 0.7))
jj
drugs[ stripConc(jj), "name" ]
```
```{r transf}
medianCenter_MadScale <- function(x) {
s <- median(x)
(x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}
scaleDrugResp <- function(x) t(apply(x, 1, medianCenter_MadScale))
scd1 <- scaleDrugResp(d1)
scd2 <- scaleDrugResp(d2)
```
## Define disease groups
```{r defdisgrp}
sort(table(lpdAll$Diagnosis), decreasing = TRUE)
diseaseGroups <- list(
`CLL` = c("CLL"),
`MCL` = c("MCL"),
`HCL` = c("HCL", "HCL-V"),
`other B-cell` = c("B-PLL", "MZL", "LPL", "FL"),
`T-cell` = c("T-PLL", "Sezary", "PTCL-NOS"),
`myeloid` = c("AML"))
stopifnot(setequal(unlist(diseaseGroups), unique(lpdAll$Diagnosis)))
fdg <- factor(rep(NA, ncol(lpdAll)), levels = names(diseaseGroups))
for (i in names(diseaseGroups))
fdg[ lpdAll$Diagnosis %in% diseaseGroups[[i]] ] <- i
lpdAll$`Disease Group` <- fdg
```
## Code for heatmap
### Matrix row / column clustering
The helper function `matClust` clusters a matrix `x`,
whose columns represent samples and whose rows represent drugs.
Its arguments control how the columns are clustered:
- `weights`: a `data.frame` with a weight for each row of `x`. The weights are used in the computation of distances
between columns and thus for column sorting. The `data.frame`'s column `hclust` contains the weights
for `hclust(dist())`. The column `score` contains the weights for computing the score used for dendrogram reordering.
See `weights1` and `weights2` defined above.
- `colgroups`: a `factor` by which to first split the columns before clustering
- `reorderrows`: logical. `FALSE` for previous behaviour (old Fig. 3A), `TRUE` for reordering the row dendrogram, too.
```{r matclust}
matClust <- function(x,
rowweights,
colgroups = factor(rep("all", ncol(x))),
reorderrows = FALSE) {
stopifnot(is.data.frame(rowweights),
c("hclust", "score") %in% colnames(rowweights),
!is.null(rownames(rowweights)),
!is.null(rownames(x)),
all(rownames(x) %in% rownames(rowweights)),
is.factor(colgroups),
!any(is.na(colgroups)),
length(colgroups) == ncol(x))
wgt <- rowweights[ rownames(x), ]
columnsClust <- function(xk) {
score <- -svd(xk * wgt[, "score"])$v[, 1]
cmns <- colSums(xk * wgt[, "score"])
## make sure that high score = high sensitivity
if (cor(score, cmns) > 0) score <- (-score)
ddraw <- as.dendrogram(hclust(dist(t(xk * wgt[, "hclust"]),
method = "euclidean"),
method = "complete"))
dd <- reorder(ddraw, wts = -score, agglo.FUN = mean)
ord <- order.dendrogram(dd)
list(dd = dd, ord = ord, score = score)
}
sp <- split(seq(along = colgroups), colgroups)
cc <- lapply(sp, function(k) columnsClust(x[, k, drop=FALSE]))
cidx <- unlist(lapply(seq(along = cc), function (i)
sp[[i]][ cc[[i]]$ord ]))
csc <- unlist(lapply(seq(along = cc), function (i)
cc[[i]]$score[ cc[[i]]$ord ]))
rddraw <- as.dendrogram(hclust(dist(x, method = "euclidean"),
method = "complete"))
ridx <- if (reorderrows) {
ww <- (colgroups == "CLL")
stopifnot(!any(is.na(ww)), any(ww))
rowscore <- svd(t(x) * ww)$v[, 1]
dd <- reorder(rddraw, wts = rowscore, agglo.FUN = mean)
order.dendrogram(dd)
} else {
rev(order.dendrogram(dendsort(rddraw)))
}
res <- x[ridx, cidx]
stopifnot(identical(dim(res), dim(x)))
attr(res, "colgap") <- cumsum(sapply(cc, function(x) length(x$score)))
res
}
```
### Prepare sample annotations
I.e. the right hand side color bar. `IGHV Uppsala U/M` is implied by
`IGHV Uppsala % SHM` (see sensi2.Rmd).
`cut(..., right=FALSE)` will use intervals that are closed on the left
and open on the right.
```{r}
translation = list(IGHV=c(U=0, M=1),
Methylation_Cluster=c(`LP-CLL`=0, `IP-CLL`=1, `HP-CLL`=2))
```
```{r annosamp}
make_pd <- function(cn, ...) {
df <- function(...) data.frame(..., check.names = FALSE)
x <- lpdAll[, cn]
pd <- df(
t(Biobase::exprs(x)[ c("del17p13", "TP53", "trisomy12"), , drop = FALSE]) %>%
`colnames<-`(c("del 17p13", "TP53", "trisomy 12")))
# pd <- df(pd,
# t(Biobase::exprs(x)[ c("SF3B1", "del11q22.3", "del13q14_any"),, drop = FALSE]) %>%
# `colnames<-`(c("SF3B1", "del11q22.3", "del13q14")))
pd <- df(pd,
cbind(as.integer(Biobase::exprs(x)["KRAS",] | Biobase::exprs(x)["NRAS",])) %>%
`colnames<-`("KRAS | NRAS"))
pd <- df(pd,
# IGHV = Biobase::exprs(x)["IGHV Uppsala U/M", ],
`IGHV (%)` = cut(x[["IGHV Uppsala % SHM"]],
breaks = c(0, seq(92, 100, by = 2), Inf), right = FALSE),
`Meth. Cluster` = names(translation$Methylation_Cluster)[
someMatch(paste(Biobase::exprs(x)["Methylation_Cluster", ]),
translation$Methylation_Cluster)],
`Gene usage` = x$`IGHV gene usage`)
if(length(unique(x$Diagnosis)) > 1)
pd <- df(pd, Diagnosis = x$Diagnosis)
pd <- df(pd,
pretreated = ifelse(patmeta[colnames(x),"IC50beforeTreatment"],"no","yes"),
alive = ifelse(patmeta[colnames(x),"died"]>0, "no", "yes"),
sex = factor(x$Gender))
rownames(pd) <- colnames(Biobase::exprs(x))
for (i in setdiff(colnames(pd), "BCR score")) {
if (!is.factor(pd[[i]]))
pd[[i]] <- factor(pd[[i]])
if (any(is.na(pd[[i]]))) {
levels(pd[[i]]) <- c(levels(pd[[i]]), "n.d.")
pd[[i]][ is.na(pd[[i]]) ] <- "n.d."
}
}
pd
}
```
### Define the annotation colors
```{r annokey}
gucol <- rev(brewer.pal(nlevels(lpdAll$`IGHV gene usage`), "Set3")) %>%
`names<-`(sort(levels(lpdAll$`IGHV gene usage`)))
gucol["IGHV3-21"] <- "#E41A1C"
make_ann_colors <- function(pd) {
bw <- c(`TRUE` = "darkblue", `FALSE` = "#ffffff")
res <- list(
Btk = bw, Syk = bw, PI3K = bw, MEK = bw)
if ("exptbatch" %in% colnames(pd))
res$exptbatch <- brewer.pal(nlevels(pd$exptbatch), "Set2") %>%
`names<-`(levels(pd$exptbatch))
if ("IGHV (%)" %in% colnames(pd))
res$`IGHV (%)` <-
c(rev(colorRampPalette(
brewer.pal(9, "Blues"))(nlevels(pd$`IGHV (%)`)-1)), "white") %>%
`names<-`(levels(pd$`IGHV (%)`))
if ("CD38" %in% colnames(pd))
res$CD38 <- colorRampPalette(
c("blue", "yellow"))(nlevels(pd$CD38)) %>% `names<-`(levels(pd$CD38))
if("Gene usage" %in% colnames(pd))
res$`Gene usage` <- gucol
if("Meth. Cluster" %in% colnames(pd))
res$`Meth. Cluster` <- brewer.pal(9, "Blues")[c(1, 5, 9)] %>%
`names<-`(names(translation$Methylation_Cluster))
res <- c(res, BloodCancerMultiOmics2017:::sampleColors) # from addons.R
if("Diagnosis" %in% colnames(pd))
res$Diagnosis <- BloodCancerMultiOmics2017:::colDiagS[
names(BloodCancerMultiOmics2017:::colDiagS) %in% levels(pd$Diagnosis) ] %>%
(function(x) x[order(names(x))])
for(i in names(res)) {
whnd <- which(names(res[[i]]) == "n.d.")
if(length(whnd)==1)
res[[i]][whnd] <- "#e0e0e0" else
res[[i]] <- c(res[[i]], `n.d.` = "#e0e0e0")
stopifnot(all(pd[[i]] %in% names(res[[i]])))
}
res
}
```
### Heatmap drawing function
```{r dm}
theatmap <- function(x, cellwidth = 7, cellheight = 11) {
stopifnot(is.matrix(x))
patDat <- make_pd(colnames(x))
bpp <- brewer.pal(9, "Set1")
breaks <- 2.3 * c(seq(-1, 1, length.out = 101)) %>% `names<-`(
colorRampPalette(c(rev(brewer.pal(7, "YlOrRd")),
"white", "white", "white",
brewer.pal(7, "Blues")))(101))
if (!is.null(attr(x, "colgap")))
stopifnot(last(attr(x, "colgap")) == ncol(x))
pheatmapwh(deckel(x, lower = first(breaks), upper = last(breaks)),
cluster_rows = FALSE,
cluster_cols = FALSE,
gaps_col = attr(x, "colgap"),
gaps_row = attr(x, "rowgap"),
scale = "none",
annotation_col = patDat,
annotation_colors = make_ann_colors(patDat),
color = names(breaks),
breaks = breaks,
show_rownames = TRUE,
show_colnames = !TRUE,
cellwidth = cellwidth, cellheight = cellheight,
fontsize = 10, fontsize_row = 11, fontsize_col = 8,
annotation_legend = TRUE, drop_levels = TRUE)
}
```
## Draw the heatmaps
Things we see in the plot:
- separation of U-CLL and M-CLL within CLL
- BCR-targeting drugs a cluster at the top
- everolimus stands out in M-CLL with a separate sensitivity pattern
- encorafenib clusters together and pops out in HCL
clscd1/2: **cl**ustered and **sc**aled **d**rug **mat**rix
```{r doheatmaps}
clscd1 <- matClust(scd1, rowweights = weights1,
colgroups = lpdAll$`Disease Group`)
clscd2 <- matClust(scd2, rowweights = weights2,
colgroups = lpdAll$`Disease Group`, reorderrows = TRUE)
```
### Identify places where gaps between the rows should be
```{r gapsrow}
setGapPositions <- function(x, gapAt) {
rg <- if (missing(gapAt)) c(0) else {
s <- strsplit(gapAt, split = "--")
stopifnot(all(listLen(s) == 2L))
s <- strsplit(unlist(s), split = ":")
spname <- drugs[safeMatch(sapply(s, `[`, 1), drugs$name), "id"]
spconc <- as.numeric(sapply(s, `[`, 2))
spi <- mapply(function(d, cc) {
i <- which(cc == conctab[d, ])
stopifnot(length(i) == 1)
i
}, spname, spconc)
spdrug <- paste(spname, spi, sep = "_")
mt <- safeMatch(spdrug, rownames(x))
igp <- seq(1, length(mt), by = 2L)
stopifnot(all( mt[igp] - mt[igp + 1] == 1))
#stopifnot(all( mt[igp] - mt[igp + 1] == 1))
c(mt[igp + 1], 0)
}
attr(x, "rowgap") <- rg
x
}
clscd1 %<>% setGapPositions(gapAt = c(
"PF 477736:10--idelalisib:10",
"spebrutinib:2.5--PF 477736:2.5",
"PRT062607 HCl:10--selumetinib:2.5",
"selumetinib:10--MK-2206:2.5",
"MK-2206:0.156--tipifarnib:10",
"AT13387:0.039--encorafenib:10",
"encorafenib:2.5--SD07:1.111",
"doxorubicine:0.016--encorafenib:0.625",
"encorafenib:0.156--rotenone:2.5",
"SCH 900776:0.625--everolimus:0.625",
"everolimus:10--afatinib:1.667",
"arsenic trioxide:1--thapsigargin:5",
"thapsigargin:0.313--fludarabine:0.156"
))
clscd2 %<>% setGapPositions(gapAt = c(
"AT13387:0.039--everolimus:0.156",
"everolimus:0.625--nutlin-3:10",
"fludarabine:10--thapsigargin:0.078",
"thapsigargin:0.313--encorafenib:0.625",
"encorafenib:0.156--ruxolitinib:0.156"
))
```
```{r Fig3AheatmapV1, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd1) * 0.12, fig.width = 31, fig.wide = TRUE}
#FIG# S8
rownames(clscd1) <- with(fData(lpdAll)[ rownames(clscd1),, drop = FALSE],
paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM"))
rownames(clscd1)
theatmap(clscd1)
```
```{r Fig3AheatmapV2, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd2) * 0.12, fig.width = 31, fig.wide = TRUE}
#FIG# 3A
rownames(clscd2) <- with(fData(lpdAll)[ rownames(clscd2),, drop = FALSE],
paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM"))
rownames(clscd2)
theatmap(clscd2)
```
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
devtools::session_info()
```
```{r, message=FALSE, warning=FALSE, include=FALSE, eval=exists(".standalone")}
rm(list=ls())
```