[226bc8]: / vignettes / src / part03.Rmd

Download this file

530 lines (427 with data), 17.5 kB

---
title: "Part 3: Heatmap for Fig.3A"
output:
  BiocStyle::html_document
---

# Global overview of the drug response landscape

```{r setup, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
options(error = recover)
knitr::opts_chunk$set(tidy = FALSE, # cache = TRUE, autodep = TRUE,
                      message = FALSE, error = FALSE, warning = TRUE)

library("BloodCancerMultiOmics2017")
library("Biobase")
library("dplyr")
library("RColorBrewer")
library("dendsort")
library("nat")  # provides nlapply
library("grid")
library("magrittr")
```

```{r echo=FALSE}
plotDir = ifelse(exists(".standalone"), "", "part03/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
```


Load data 
```{r load}
data("conctab", "lpdAll", "drugs", "patmeta")
lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"]
lpdAll = lpdAll[, lpdAll$Diagnosis!="hMNC"]
```

## Setup

### Additional functions

```{r}
someMatch <- function(...) {
  rv <- match(...)
  if (all(is.na(rv)))
    stop(sprintf("`match` failed to match any of the following: %s",
                 paste(x[is.na(rv)], collapse=", ")))
  rv  
}
```


### Preprocess `IGHV gene usage`

Find the largest categories, which we will represent by colours, 
and merge the others in a group `other`.

```{r geneusage}
colnames(pData(lpdAll))
gu    <- pData(lpdAll)$`IGHV Uppsala gene usage`
tabgu <- sort(table(gu), decreasing = TRUE)
biggestGroups <- names(tabgu)[1:5]
gu[ is.na(match(gu, biggestGroups)) & !is.na(gu) ] <- "other"
pData(lpdAll)$`IGHV gene usage` <- factor(gu, levels = c(biggestGroups, "other"))
```

### Some drugs that we are particularly interested in

```{r targetedDrugNames}
stopifnot(is.null(drugs$id))
drugs$id <- rownames(drugs)

targetedDrugNames <- c("ibrutinib", "idelalisib",  "PRT062607 HCl", 
   "duvelisib", "spebrutinib", "selumetinib", "MK-2206",  
   "everolimus", "encorafenib")
id1 <- safeMatch(targetedDrugNames, drugs$name)
targetedDrugs <- paste( rep(drugs[id1, "id"], each = 2), 4:5, sep="_" )

chemoDrugNames <- c("fludarabine", "doxorubicine",  "nutlin-3")
id2 <- safeMatch(chemoDrugNames, drugs$name)
chemoDrugs <- paste( rep(drugs[id2, "id"], each = 5), 3:5, sep="_" )

tzselDrugNames <- c("ibrutinib", "idelalisib", "duvelisib", "selumetinib", 
    "AZD7762", "MK-2206", "everolimus", "venetoclax", "thapsigargin", 
    "AT13387", "YM155", "encorafenib", "tamatinib", "ruxolitinib", 
    "PF 477736", "fludarabine", "nutlin-3")
id3 <- safeMatch(tzselDrugNames, drugs$name)
tzselDrugs <- unlist(lapply(seq(along = tzselDrugNames), function(i)
  paste(drugs[id3[i], "id"], 
      if (tzselDrugNames[i] %in% c("fludarabine", "nutlin-3")) 2:3 else 4:5, 
    sep = "_" )))
```

### Feature weighting and score for dendrogram reordering

The `weights` are used for weighting the similarity metric used in heatmap clustering. 
`weights$hclust` affects the clustering of the patients.
`weights$score` affects the dendrogram reordering of the drugs.

```{r addBCR}
bcrDrugs <- c("ibrutinib", "idelalisib", "PRT062607 HCl", "spebrutinib")
everolID <- drugs$id[ safeMatch("everolimus",  drugs$name)]
bcrID    <- drugs$id[ safeMatch(bcrDrugs, drugs$name)]

is_BCR  <- 
  (fData(lpdAll)$id %in% bcrID)    & (fData(lpdAll)$subtype %in% paste(4:5))
is_mTOR <- 
  (fData(lpdAll)$id %in% everolID) & (fData(lpdAll)$subtype %in% paste(4:5))

myin <- function(x, y) as.numeric( (x %in% y) & !is.na(x) )

weights1 <- data.frame(
  hclust = rep(1, nrow(lpdAll)) + 1.75 * is_mTOR, 
  score  = as.numeric( is_BCR ), 
  row.names = rownames(lpdAll))

weights2 <- data.frame(
  row.names = tzselDrugs,
  hclust = myin(drugs$target_category[id3], "B-cell receptor") * 0.3 + 0.7,
  score  = rep(1, length(tzselDrugs))) 
```

Remove drugs that failed quality control: NSC 74859, bortezomib.

```{r badDrugs}
badDrugs <- c(bortezomib = "D_008", `NSC 74859` = "D_025")
stopifnot(identical(drugs[ badDrugs, "name"], names(badDrugs)))

candDrugs <- rownames(lpdAll)[  
  fData(lpdAll)$type=="viab" & !(fData(lpdAll)$id %in% badDrugs) &
  fData(lpdAll)$subtype %in% paste(2:5)
]
```

Threshold parameters: drugs are accepted that for at least `effectNum` samples 
have a viability effect less than or equal to `effectVal`. On the other hand, the 
mean viability effect should not be below `viab`.

```{r threshs}
thresh <- list(effectVal = 0.7, effectNum = 4, viab = 0.6, maxval = 1.1)
overallMean  <- rowMeans(Biobase::exprs(lpdAll)[candDrugs, ])
nthStrongest <- apply(Biobase::exprs(lpdAll)[candDrugs, ], 1,
                      function(x) sort(x)[thresh$effectNum])
```
```{r hists, fig.width = 8, fig.height = 2.6}
par(mfrow = c(1, 3))
hist(overallMean,  breaks = 30, col = "pink")
abline(v = thresh$viab,      col="blue")
hist(nthStrongest, breaks = 30, col = "pink")
abline(v = thresh$effectVal, col="blue")
plot(overallMean, nthStrongest)
abline(h = thresh$effectVal, v = thresh$viab, col = "blue")
```

`seldrugs1` and `d1`: as in the version of 
Figure 3A we had in the first submission to JCI. `d2`: two concentrations for each drug in `tzselDrugNames`.

```{r seldrugs1}
seldrugs1 <- candDrugs[ overallMean >= thresh$viab &
                        nthStrongest <= thresh$effectVal ] %>%
            union(targetedDrugs) %>% 
            union(chemoDrugs)

d1 <- Biobase::exprs(lpdAll[seldrugs1,, drop = FALSE ]) %>%
  deckel(lower = 0, upper = thresh$maxval)

d2 <- Biobase::exprs(lpdAll[tzselDrugs,, drop = FALSE ]) %>%
  deckel(lower = 0, upper = thresh$maxval)
```

We are going to scale the data. But was is the best measure of scale (or spread)? Let's explore
different measures of spread. We'll see that it does not seem to matter too much which one we use. 
We'll use median centering and scaling by mad.

```{r differentspread, fig.width = 4, fig.height = 4}
spreads <- sapply(list(mad = mad, `Q95-Q05` = function(x)
  diff(quantile(x, probs = c(0.05, 0.95)))), function(s) apply(d1, 1, s))
plot( spreads )

jj <- names(which( spreads[, "mad"] < 0.15 & spreads[, "Q95-Q05"] > 0.7))
jj
drugs[ stripConc(jj), "name" ]
```

```{r transf}
medianCenter_MadScale <- function(x) {
  s <- median(x)
  (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}

scaleDrugResp  <- function(x) t(apply(x, 1, medianCenter_MadScale)) 

scd1 <- scaleDrugResp(d1)
scd2 <- scaleDrugResp(d2)
```

## Define disease groups

```{r defdisgrp}
sort(table(lpdAll$Diagnosis), decreasing = TRUE)

diseaseGroups <- list(
  `CLL` = c("CLL"), 
  `MCL` = c("MCL"),
  `HCL` = c("HCL", "HCL-V"),
  `other B-cell` = c("B-PLL", "MZL", "LPL", "FL"), 
  `T-cell` = c("T-PLL", "Sezary", "PTCL-NOS"),
  `myeloid` = c("AML"))
stopifnot(setequal(unlist(diseaseGroups), unique(lpdAll$Diagnosis)))

fdg <- factor(rep(NA, ncol(lpdAll)), levels = names(diseaseGroups))
for (i in names(diseaseGroups))
  fdg[ lpdAll$Diagnosis %in% diseaseGroups[[i]] ] <- i
lpdAll$`Disease Group` <- fdg
```

## Code for heatmap 

### Matrix row / column clustering

The helper function `matClust` clusters a matrix `x`, 
whose columns represent samples and whose rows represent drugs.
Its arguments control how the columns are clustered:

- `weights`: a `data.frame` with a weight for each row of `x`. The weights are used in the computation of distances 
  between columns and thus for column sorting. The `data.frame`'s column `hclust` contains the weights 
  for `hclust(dist())`. The column `score` contains the weights for computing the score used for dendrogram reordering.
  See `weights1` and `weights2` defined above.
- `colgroups`: a `factor` by which to first split the columns before clustering
- `reorderrows`: logical. `FALSE` for previous behaviour (old Fig. 3A), `TRUE` for reordering the row dendrogram, too.

```{r matclust}
matClust <- function(x, 
                     rowweights,
                     colgroups   = factor(rep("all", ncol(x))),
                     reorderrows = FALSE) {
 
  stopifnot(is.data.frame(rowweights),
            c("hclust", "score") %in% colnames(rowweights),
            !is.null(rownames(rowweights)),
            !is.null(rownames(x)), 
            all(rownames(x) %in% rownames(rowweights)),
            is.factor(colgroups), 
            !any(is.na(colgroups)), 
            length(colgroups) == ncol(x))

  wgt <- rowweights[ rownames(x), ]
  
  columnsClust <- function(xk) {
    score   <- -svd(xk * wgt[, "score"])$v[, 1]
    cmns    <- colSums(xk * wgt[, "score"])
    ## make sure that high score = high sensitivity  
    if (cor(score, cmns) > 0) score <- (-score)
    
    ddraw  <- as.dendrogram(hclust(dist(t(xk * wgt[, "hclust"]), 
                                        method = "euclidean"), 
                                   method = "complete"))
    dd  <- reorder(ddraw, wts = -score, agglo.FUN = mean)
    ord <- order.dendrogram(dd)
    list(dd = dd, ord = ord, score = score)
  }

  sp <- split(seq(along = colgroups), colgroups)
  cc <- lapply(sp, function(k) columnsClust(x[, k, drop=FALSE]))
  cidx <- unlist(lapply(seq(along = cc), function (i)
    sp[[i]][ cc[[i]]$ord ]))
  csc  <- unlist(lapply(seq(along = cc), function (i)
    cc[[i]]$score[ cc[[i]]$ord ]))

  rddraw <- as.dendrogram(hclust(dist(x, method = "euclidean"),
                                   method = "complete"))
  ridx  <- if (reorderrows) {
    ww <- (colgroups == "CLL")
    stopifnot(!any(is.na(ww)), any(ww))
    rowscore <- svd(t(x) * ww)$v[, 1]
    dd <- reorder(rddraw, wts = rowscore, agglo.FUN = mean)
    order.dendrogram(dd)
  } else {
    rev(order.dendrogram(dendsort(rddraw)))
  }

  res <- x[ridx, cidx]
  
  stopifnot(identical(dim(res), dim(x)))
  attr(res, "colgap") <- cumsum(sapply(cc, function(x) length(x$score)))
  res
}
```

### Prepare sample annotations 
I.e. the right hand side color bar. `IGHV Uppsala U/M` is implied by 
`IGHV Uppsala % SHM` (see sensi2.Rmd).
`cut(..., right=FALSE)` will use intervals that are closed on the left 
and open on the right.

```{r}
translation = list(IGHV=c(U=0, M=1),
                   Methylation_Cluster=c(`LP-CLL`=0, `IP-CLL`=1, `HP-CLL`=2))
```

```{r annosamp}
make_pd <- function(cn, ...) {
  df <- function(...) data.frame(..., check.names = FALSE)
  
  x <- lpdAll[, cn]
  pd <- df(
    t(Biobase::exprs(x)[    c("del17p13", "TP53", "trisomy12"), , drop = FALSE]) %>% 
      `colnames<-`(c("del 17p13", "TP53", "trisomy 12")))
  
  # pd <- df(pd,
  #    t(Biobase::exprs(x)[  c("SF3B1", "del11q22.3", "del13q14_any"),, drop = FALSE]) %>% 
  #    `colnames<-`(c("SF3B1", "del11q22.3", "del13q14"))) 
  
  pd <- df(pd,
      cbind(as.integer(Biobase::exprs(x)["KRAS",] | Biobase::exprs(x)["NRAS",])) %>% 
      `colnames<-`("KRAS | NRAS")) 
  
  pd <- df(pd,
    # IGHV = Biobase::exprs(x)["IGHV Uppsala U/M", ],
    `IGHV (%)` = cut(x[["IGHV Uppsala % SHM"]],
                     breaks = c(0, seq(92, 100, by = 2), Inf), right = FALSE),
    `Meth. Cluster` = names(translation$Methylation_Cluster)[
          someMatch(paste(Biobase::exprs(x)["Methylation_Cluster", ]),
                    translation$Methylation_Cluster)],
    `Gene usage` = x$`IGHV gene usage`)
  
  if(length(unique(x$Diagnosis)) > 1)  
    pd <- df(pd, Diagnosis = x$Diagnosis)
  
  pd <- df(pd, 
    pretreated  = ifelse(patmeta[colnames(x),"IC50beforeTreatment"],"no","yes"), 
    alive       = ifelse(patmeta[colnames(x),"died"]>0, "no", "yes"),
    sex         = factor(x$Gender))
  
  rownames(pd) <- colnames(Biobase::exprs(x))
  
  for (i in setdiff(colnames(pd), "BCR score")) {
    if (!is.factor(pd[[i]]))
      pd[[i]] <- factor(pd[[i]])
    if (any(is.na(pd[[i]]))) {
      levels(pd[[i]]) <- c(levels(pd[[i]]), "n.d.")
      pd[[i]][ is.na(pd[[i]]) ] <- "n.d."   
    }
  }
  
  pd
}
```

### Define the annotation colors

```{r annokey}
gucol <- rev(brewer.pal(nlevels(lpdAll$`IGHV gene usage`), "Set3")) %>% 
             `names<-`(sort(levels(lpdAll$`IGHV gene usage`)))
gucol["IGHV3-21"] <- "#E41A1C"

make_ann_colors <- function(pd) {
  bw <- c(`TRUE` = "darkblue", `FALSE` = "#ffffff")
  res <- list(
  Btk = bw, Syk = bw, PI3K = bw, MEK = bw)
  
  if ("exptbatch" %in% colnames(pd))
    res$exptbatch <- brewer.pal(nlevels(pd$exptbatch), "Set2") %>%
    `names<-`(levels(pd$exptbatch))
  
  if ("IGHV (%)" %in% colnames(pd))
    res$`IGHV (%)` <- 
       c(rev(colorRampPalette(
         brewer.pal(9, "Blues"))(nlevels(pd$`IGHV (%)`)-1)), "white") %>% 
                        `names<-`(levels(pd$`IGHV (%)`))
  
  if ("CD38" %in% colnames(pd))
    res$CD38 <- colorRampPalette(
      c("blue", "yellow"))(nlevels(pd$CD38)) %>% `names<-`(levels(pd$CD38))
  
  if("Gene usage" %in% colnames(pd)) 
    res$`Gene usage` <- gucol 

  if("Meth. Cluster" %in% colnames(pd)) 
    res$`Meth. Cluster` <- brewer.pal(9, "Blues")[c(1, 5, 9)] %>% 
        `names<-`(names(translation$Methylation_Cluster))
  
  res <- c(res, BloodCancerMultiOmics2017:::sampleColors)   # from addons.R  
  
  if("Diagnosis" %in% colnames(pd)) 
    res$Diagnosis <- BloodCancerMultiOmics2017:::colDiagS[
      names(BloodCancerMultiOmics2017:::colDiagS) %in% levels(pd$Diagnosis) ] %>% 
         (function(x) x[order(names(x))])
  
  for(i in names(res)) {
    whnd <- which(names(res[[i]]) == "n.d.")
    if(length(whnd)==1)
      res[[i]][whnd] <- "#e0e0e0" else 
        res[[i]] <- c(res[[i]], `n.d.` = "#e0e0e0")
    stopifnot(all(pd[[i]] %in% names(res[[i]])))
  }
  
  res
}
```

### Heatmap drawing function

```{r dm}
theatmap <- function(x, cellwidth = 7, cellheight = 11) {
  
  stopifnot(is.matrix(x))
  patDat <- make_pd(colnames(x))
  
  bpp <- brewer.pal(9, "Set1")
  breaks <- 2.3 * c(seq(-1, 1, length.out = 101)) %>% `names<-`(
      colorRampPalette(c(rev(brewer.pal(7, "YlOrRd")), 
                         "white", "white", "white", 
                         brewer.pal(7, "Blues")))(101))

  if (!is.null(attr(x, "colgap")))
    stopifnot(last(attr(x, "colgap")) == ncol(x))

  pheatmapwh(deckel(x, lower = first(breaks), upper = last(breaks)), 
         cluster_rows = FALSE,  
         cluster_cols = FALSE,
         gaps_col = attr(x, "colgap"),
         gaps_row = attr(x, "rowgap"),
         scale = "none",
         annotation_col = patDat,
         annotation_colors = make_ann_colors(patDat),
         color = names(breaks),
         breaks = breaks, 
         show_rownames = TRUE, 
         show_colnames = !TRUE, 
         cellwidth = cellwidth, cellheight = cellheight, 
         fontsize = 10, fontsize_row = 11, fontsize_col = 8,
         annotation_legend = TRUE, drop_levels = TRUE)
  }
```

## Draw the heatmaps

Things we see in the plot:

- separation of U-CLL and M-CLL within CLL
- BCR-targeting drugs a cluster at the top
- everolimus stands out in M-CLL with a separate sensitivity pattern
- encorafenib clusters together and pops out in HCL

clscd1/2: **cl**ustered and **sc**aled **d**rug **mat**rix

```{r doheatmaps}
clscd1 <- matClust(scd1, rowweights = weights1, 
                  colgroups = lpdAll$`Disease Group`)
clscd2 <- matClust(scd2, rowweights = weights2, 
                  colgroups = lpdAll$`Disease Group`, reorderrows = TRUE)
```

### Identify places where gaps between the rows should be

```{r gapsrow}
setGapPositions <- function(x, gapAt) {
  rg <- if (missing(gapAt)) c(0) else {
    s <- strsplit(gapAt, split = "--")
    stopifnot(all(listLen(s) == 2L))
    s <- strsplit(unlist(s), split = ":")
    spname <- drugs[safeMatch(sapply(s, `[`, 1), drugs$name), "id"]
    spconc <- as.numeric(sapply(s, `[`, 2))
    spi <- mapply(function(d, cc) {
      i <- which(cc == conctab[d, ])
      stopifnot(length(i) == 1)
      i
     }, spname, spconc)
    spdrug <- paste(spname, spi, sep = "_")
    mt <- safeMatch(spdrug, rownames(x))
    igp <- seq(1, length(mt), by = 2L)
    stopifnot(all( mt[igp] - mt[igp + 1] == 1))
    #stopifnot(all( mt[igp] - mt[igp + 1] == 1))
    c(mt[igp + 1], 0)
  }
  attr(x, "rowgap") <- rg
  x
}

clscd1 %<>% setGapPositions(gapAt = c(
  "PF 477736:10--idelalisib:10",
  "spebrutinib:2.5--PF 477736:2.5",
  "PRT062607 HCl:10--selumetinib:2.5",
  "selumetinib:10--MK-2206:2.5",
  "MK-2206:0.156--tipifarnib:10",
  "AT13387:0.039--encorafenib:10",
  "encorafenib:2.5--SD07:1.111",
  "doxorubicine:0.016--encorafenib:0.625",
  "encorafenib:0.156--rotenone:2.5",
  "SCH 900776:0.625--everolimus:0.625",
  "everolimus:10--afatinib:1.667",
  "arsenic trioxide:1--thapsigargin:5",
  "thapsigargin:0.313--fludarabine:0.156"
))

clscd2 %<>% setGapPositions(gapAt = c(
  "AT13387:0.039--everolimus:0.156",
  "everolimus:0.625--nutlin-3:10", 
  "fludarabine:10--thapsigargin:0.078",
  "thapsigargin:0.313--encorafenib:0.625", 
  "encorafenib:0.156--ruxolitinib:0.156"
))
```

```{r Fig3AheatmapV1, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd1) * 0.12, fig.width = 31, fig.wide = TRUE}
#FIG# S8
rownames(clscd1) <- with(fData(lpdAll)[ rownames(clscd1),, drop = FALSE], 
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 
rownames(clscd1)
theatmap(clscd1)
```

```{r Fig3AheatmapV2, dev = c("png", "pdf"), fig.path = plotDir, fig.height = 8.0 + nrow(clscd2) * 0.12, fig.width = 31, fig.wide = TRUE}
#FIG# 3A
rownames(clscd2) <- with(fData(lpdAll)[ rownames(clscd2),, drop = FALSE], 
  paste0(drugs[id, "name"], " ", conctab[cbind(id, paste0("c", subtype))], "uM")) 

rownames(clscd2)
theatmap(clscd2)
```


```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
devtools::session_info()
```

```{r, message=FALSE, warning=FALSE, include=FALSE, eval=exists(".standalone")}
rm(list=ls())
```