Diff of /vignettes/src/part10.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part10.Rmd
1
---
2
title: "Part 10"
3
output:
4
  BiocStyle::html_document
5
---
6
7
```{r, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
8
library("BloodCancerMultiOmics2017")
9
library("Biobase")
10
library("RColorBrewer")
11
library("colorspace") # hex2RGB
12
#library("ggtern") # ggtern
13
library("reshape2") # melt
14
library("limma")
15
library("pheatmap")
16
library("beeswarm")
17
library("dplyr")
18
library("ggplot2")
19
library("tibble") # as_tibble
20
library("survival")
21
library("SummarizedExperiment")
22
library("DESeq2")
23
```
24
25
```{r echo=FALSE}
26
plotDir = ifelse(exists(".standalone"), "", "part10/")
27
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
28
```
29
30
```{r}
31
options(stringsAsFactors=TRUE)
32
```
33
34
# Relative drug effects - ternary diagrams
35
36
Ternary diagrams are a good visualisation tool to compare the relative drug effects of three selected drugs. Here we call the drugs by their targets (ibrutinib = BTK, idelalisib = PI3K, PRT062607 HCl = SYK, everolimus = mTOR and selumetinib = MEK). We compare the drug effects within CLL samples as well as U-CLL and M-CLL separatelly.
37
38
Load the data. 
39
```{r}
40
data("conctab", "lpdAll", "drugs", "patmeta")
41
```
42
43
Select CLL patients.
44
```{r}
45
lpdCLL <- lpdAll[, lpdAll$Diagnosis=="CLL"]
46
```
47
48
## Additional settings
49
50
Function that set the point transparency.
51
```{r}
52
makeTransparent = function(..., alpha=0.18) {
53
  
54
  if(alpha<0 | alpha>1) stop("alpha must be between 0 and 1")
55
  
56
  alpha = floor(255*alpha)  
57
  newColor = col2rgb(col=unlist(list(...)), alpha=FALSE)
58
  
59
  .makeTransparent = function(col, alpha) {
60
    rgb(red=col[1], green=col[2], blue=col[3], alpha=alpha, maxColorValue=255)
61
  }
62
  newColor = apply(newColor, 2, .makeTransparent, alpha=alpha)
63
  
64
  return(newColor)
65
}
66
67
giveColors = function(idx, alpha=1) {
68
  bp = brewer.pal(12, "Paired")
69
  makeTransparent(
70
    sequential_hcl(12, h = coords(as(hex2RGB(bp[idx]), "polarLUV"))[1, "H"])[1],
71
    alpha=alpha)
72
}
73
```
74
75
## Calculating the coordinates
76
77
```{r calculate-input}
78
# calculate  (x+c)/(s+3c), (y+c)/(s+3c), (z+c)/(s+3c)
79
prepareTernaryData = function(lpd, targets, invDrugs) {
80
81
  # calculate values for ternary
82
  df = sapply(targets, function(tg) {
83
    dr = paste(invDrugs[tg], c(4,5), sep="_")
84
    tmp = 1-Biobase::exprs(lpd)[dr,]
85
    tmp = colMeans(tmp)
86
    pmax(tmp, 0)
87
  })
88
  df = data.frame(df, sum=rowSums(df), max=rowMax(df))
89
  
90
  tern = apply(df[,targets], 2, function(x) {
91
    (x+0.005) / (df$sum+3*0.005)
92
  })
93
  colnames(tern) = paste0("tern", 1:3)
94
  # add IGHV status
95
  cbind(df, tern, IGHV=patmeta[rownames(df),"IGHV"],
96
        treatNaive=patmeta[rownames(df),"IC50beforeTreatment"])
97
}
98
```
99
100
## Plot ternaries
101
102
```{r plottingFunction, eval=FALSE}
103
makeTernaryPlot = function(td=ternData, targets, invDrugs) {
104
  
105
  drn = setNames(drugs[invDrugs[targets],"name"], nm=targets)
106
  
107
  plot = ggtern(data=td, aes(x=tern1, y=tern2, z=tern3)) +
108
        #countours
109
        stat_density_tern(geom='polygon', aes(fill=..level..),
110
                          position = "identity", contour=TRUE, n=400,
111
                          weight = 1, base = 'identity', expand = c(1.5, 1.5)) +  
112
        scale_fill_gradient(low='lightblue', high='red', guide = FALSE) +
113
     
114
        #points
115
        geom_mask() + 
116
        geom_point(size=35*td[,"max"],
117
                   fill=ifelse(td[,"treatNaive"],"green","yellow"),
118
                   color="black", shape=21) +
119
  
120
        #themes
121
        theme_rgbw( ) + 
122
        theme_custom(
123
          col.T=giveColors(2),
124
          col.L=giveColors(10),
125
          col.R=giveColors(4),
126
          tern.plot.background="white", base_size = 18 ) +
127
    
128
    labs( x = targets[1], xarrow = drn[targets[1]],
129
              y = targets[2], yarrow = drn[targets[2]],
130
              z = targets[3], zarrow = drn[targets[3]] ) +
131
              theme_showarrows() + theme_clockwise() +
132
    
133
        # lines 
134
        geom_Tline(Tintercept=.5, colour=giveColors(2)) +
135
        geom_Lline(Lintercept=.5, colour=giveColors(10)) +
136
        geom_Rline(Rintercept=.5, colour=giveColors(4)) 
137
138
   plot
139
}
140
```
141
142
```{r, eval=FALSE}
143
# RUN TERNARY
144
makeTernary = function(lpd, targets, ighv=NA) {
145
146
  # list of investigated drugs and their targets
147
  invDrugs = c("PI3K"="D_003", "BTK"="D_002", "SYK"="D_166",
148
               "MTOR"="D_063", "MEK"="D_012")
149
  
150
  ternData = prepareTernaryData(lpd, targets, invDrugs)
151
  if(!is.na(ighv)) ternData = ternData[which(ternData$IGHV==ighv),]
152
  
153
  print(table(ternData$treatNaive))
154
  ternPlot = makeTernaryPlot(ternData, targets, invDrugs)
155
  
156
  ternPlot
157
}
158
```
159
160
### BCR drugs
161
162
```{r BCR-tern-CLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
163
#FIG# 3B
164
makeTernary(lpdCLL, c("PI3K", "BTK", "SYK"), ighv=NA)
165
```
166
167
```{r BCR-tern-MCLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
168
#FIG# 3B
169
makeTernary(lpdCLL, c("PI3K", "BTK", "SYK"), ighv="M")
170
```
171
172
```{r BCR-tern-UCLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
173
#FIG# 3B
174
makeTernary(lpdCLL, c("PI3K", "BTK", "SYK"), ighv="U")
175
```
176
177
### BTK & MEK & MTOR
178
179
```{r MEK-mTOR-tern, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
180
#FIG# 3BC
181
makeTernary(lpdCLL, c("MTOR", "BTK", "MEK"), ighv=NA)
182
```
183
184
```{r MEK-mTOR-tern-MCLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
185
#FIG# 3BC
186
makeTernary(lpdCLL, c("MTOR", "BTK", "MEK"), ighv="M")
187
```
188
189
```{r MEK-mTOR-tern-UCLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
190
#FIG# 3BC
191
makeTernary(lpdCLL, c("MTOR", "BTK", "MEK"), ighv="U")
192
```
193
194
### PI3K & MEK & MTOR
195
196
All CLL samples included.
197
```{r PI3K-MEK-mTOR-tern-CLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
198
#FIG# S9 left
199
makeTernary(lpdCLL, c("MTOR", "PI3K", "MEK"), ighv=NA)
200
```
201
202
### SYK & MEK & MTOR
203
204
All CLL samples included.
205
```{r SYK-MEK-mTOR-tern-CLL, fig.path=plotDir, dev=c('png','pdf'), fig.width = 7, fig.height = 7, eval=FALSE}
206
#FIG# S9 right
207
makeTernary(lpdCLL, c("MTOR", "SYK", "MEK"), ighv=NA)
208
```
209
210
# Comparison of gene expression responses to drug treatments
211
212
12 CLL samples (6 M-CLL and 6 U-CLL) were treated with ibrutinb, idelalisib, selumetinib, everolimus and negative control. Gene expression profiling was performed after 12 hours of drug incubation using Illumina microarrays.
213
214
Load the data.
215
```{r}
216
data("exprTreat", "drugs")
217
```
218
219
Do some cosmetics.
220
```{R}
221
e <- exprTreat
222
colnames( pData(e) ) <- sub( "PatientID", "Patient", colnames( pData(e) ) )
223
colnames( pData(e) ) <- sub( "DrugID", "Drug", colnames( pData(e) ) )
224
pData(e)$Drug[ is.na(pData(e)$Drug) ] <- "none"
225
pData(e)$Drug <- relevel( factor( pData(e)$Drug ), "none" )
226
pData(e)$SampleID <- colnames(e)
227
colnames(e) <- paste( pData(e)$Patient, pData(e)$Drug, sep=":" )
228
229
head( pData(e) )
230
```
231
232
Remove uninteresting fData columns
233
```{r}
234
fData(e) <- fData(e)[ , c( "ProbeID", "Entrez_Gene_ID", "Symbol", 
235
    "Cytoband", "Definition" ) ]
236
```
237
238
Here is a simple heat map of correlation between arrays.
239
```{r}
240
pheatmap( cor(Biobase::exprs(e)), symm=TRUE, cluster_rows = FALSE, cluster_cols = FALSE, 
241
   color = colorRampPalette(c("gray10","lightpink"))(100) )
242
```
243
244
## Differential expression using Limma
245
246
Construct a model matrix with a baseline expression per patient, treatment effects
247
for all drugs, and symmetric (+/- 1/2) effects for U-vs-M differences in drug effects.
248
249
```{r}
250
mm <- model.matrix( ~ 0 + Patient + Drug, pData(e) )
251
colnames(mm) <- sub( "Patient", "", colnames(mm) )
252
colnames(mm) <- sub( "Drug", "", colnames(mm) )
253
head(mm)
254
```
255
256
Run LIMMA on this.
257
258
```{r}
259
fit <- lmFit( e, mm )
260
fit <- eBayes( fit )
261
```
262
263
How many genes do we get that are significantly differentially expressed due to
264
a drug, at 10% FDR?
265
266
```{r}
267
a <- decideTests( fit, p.value = 0.1 )
268
t( apply( a[ , grepl( "D_...", colnames(a) ) ], 2, 
269
          function(x) table( factor(x,levels=c(-1,0,1)) ) ) )
270
```
271
272
What is the % overlap of genes across drugs? 
273
274
```{r overlap}
275
a <-
276
sapply( levels(pData(e)$Drug)[-1], function(dr1) 
277
   sapply( levels(pData(e)$Drug)[-1], function(dr2) 
278
     
279
   100*( length( intersect(
280
          unique( topTable( fit, coef=dr1, p.value=0.1,
281
                            number=Inf )$`Entrez_Gene_ID` ),
282
          unique( topTable( fit, coef=dr2, p.value=0.1,
283
                            number=Inf )$`Entrez_Gene_ID` ) ) ) / 
284
                      length(unique( topTable( fit, coef=dr1, p.value=0.1,
285
                                               number=Inf )$`Entrez_Gene_ID`)))
286
     ) 
287
   )
288
289
rownames(a) <-drugs[ rownames(a), "name" ]
290
colnames(a) <-rownames(a)
291
a <- a[-4, -4]
292
a
293
```
294
295
Correlate top 2000 genes with median largest lfc with each other:
296
297
1. For each patient and drug, compute the LFC (log fold changed) treated/untreated
298
2. Select the 2000 genes that have the highest across all patients and drugs (median absolute LFC)
299
3. Compute the average LFC for each drug across the patients, resulting in 4 vectors of length 2000 (one for each drug) 
300
4. Compute the pairwise correlation between them
301
302
```{r corrGenes}
303
extractGenes = function(fit, coef) {
304
  tmp = topTable(fit, coef=coef, number=Inf )[, c("ProbeID", "logFC")]
305
  rownames(tmp) = tmp$ProbeID
306
  colnames(tmp)[2] = drugs[coef,"name"]
307
  tmp[order(rownames(tmp)),2, drop=FALSE]
308
}
309
310
runExtractGenes = function(fit, drs) {
311
  tmp = do.call(cbind, lapply(drs, function(dr) {
312
    extractGenes(fit, dr)
313
  }))
314
  as.matrix(tmp)
315
}
316
317
mt = runExtractGenes(fit, drs=c("D_002","D_003","D_012","D_063"))
318
```
319
320
```{r}
321
mt <- cbind( mt, median=rowMedians(mt))
322
mt <- mt[order(mt[,"median"]), ]
323
mt <- mt[1:2000, ]
324
mt <- mt[,-5]
325
```
326
327
```{r}
328
(mtcr = cor(mt))
329
```
330
331
```{r plot-corrGenes, fig.path=plotDir, fig.width = 4, fig.height = 4, dev = c("png", "pdf")}
332
#FIG# 3D
333
pheatmap(mtcr, cluster_rows = FALSE, cluster_cols = FALSE,
334
         col=colorRampPalette(c("white", "lightblue","darkblue") )(100))
335
```
336
337
# Co-sensitivity patterns of the four response groups
338
339
Load data.
340
```{r}
341
data("lpdAll", "drugs")
342
```
343
344
```{r}
345
lpdCLL <- lpdAll[ , lpdAll$Diagnosis== "CLL"]
346
```
347
348
## Methodology of building groups
349
350
351
Here we look at the distribution of viabilities for the three drugs concerned and use the mirror method to derive, first, a measure of the background variation of the values for these drugs (`ssd`) and then define a cutoff as multiple (`z_factor`) of that. The mirror method assumes that the observed values are a mixture of two components:
352
353
- a null distribution, which is symmetric about 1, and
354
- responder distribution, which has negligible mass above 1.
355
356
The choice of `z_factor` is, of course, a crucial step.
357
It determines the trade-off between falsely called responders (false positives)
358
versus falsely called non-responders (false negatives).
359
Under normality assumption, it is related to the false positive rate (FPR) by
360
361
$$
362
\text{FPR} = 1 - \text{pnorm}(z)
363
$$
364
365
An FPR of 0.05 thus corresponds to 
366
```{r z_factor}
367
z_factor <- qnorm(0.05, lower.tail = FALSE)
368
z_factor
369
```
370
371
Defining drugs representing BTK, mTOR and MEK inhibition. 
372
```{r seldrugs}
373
drugnames <- c( "ibrutinib", "everolimus", "selumetinib")
374
ib  <- "D_002_4:5" 
375
ev  <- "D_063_4:5" 
376
se  <- "D_012_4:5"
377
stopifnot(identical(fData(lpdAll)[c(ib, ev, se), "name"], drugnames))
378
```
379
380
```{r}
381
df  <- Biobase::exprs(lpdAll)[c(ib, ev, se), lpdAll$Diagnosis=="CLL"] %>%
382
  t %>% as_tibble %>% `colnames<-`(drugnames)
383
```
384
```{r}
385
mdf <- melt(data.frame(df))
386
```
387
388
```{r scatter2, fig.width = 7, fig.height = 3.5, fig.path=plotDir}
389
grid.arrange(ncol = 2,
390
  ggplot(df, aes(x = 1-ibrutinib,  y = 1-everolimus )) + geom_point(),
391
  ggplot(df, aes(x = 1-everolimus, y = 1-selumetinib)) + geom_point()  
392
  )
393
```
394
395
Determine standard deviation using mirror method.
396
```{r mirror}
397
pmdf <- filter(mdf, value >= 1)
398
ssd  <- mean( (pmdf$value - 1) ^ 2 ) ^ 0.5
399
ssd
400
```
401
402
Normal density.
403
```{r dnorm}
404
dn <- tibble(
405
  x = seq(min(mdf$value), max(mdf$value), length.out = 100),
406
  y = dnorm(x, mean = 1, sd = ssd) * 2 * nrow(pmdf) / nrow(mdf) 
407
)
408
```
409
410
First, draw histogram for all three drugs pooled.
411
```{r hist1, fig.width = 10, fig.height = 5, dev = c("png", "pdf"), fig.path=plotDir}
412
#FIG# S10 A
413
thresh   <- 1 - z_factor * ssd
414
thresh
415
hh <- ggplot() +
416
  geom_histogram(aes(x = value, y = ..density..),
417
                 binwidth = 0.025, data = mdf) +
418
  theme_minimal() +
419
  geom_line(aes(x = x, y = y), col = "darkblue", data = dn) +
420
  geom_vline(col = "red", xintercept = thresh)
421
hh
422
```
423
424
Then split by drug.
425
```{r hist2, fig.width = 10, fig.height = 5, dev = c("png", "pdf"), fig.path=plotDir}
426
hh + facet_grid( ~ variable)
427
```
428
429
Decision rule.
430
```{r dec}
431
thresh 
432
433
df <- mutate(df,
434
  group = ifelse(ibrutinib < thresh, "BTK",
435
          ifelse(everolimus < thresh, "mTOR",
436
          ifelse(selumetinib < thresh, "MEK", "Non-responder")))
437
)
438
```
439
440
Present the decision rule in the plots.
441
```{r scatter3, fig.width = 10, fig.height = 5, dev = c("png", "pdf"), fig.path=plotDir, warning=FALSE}
442
#FIG# S10 B
443
mycol <- c(`BTK` = "Royalblue4",
444
           `mTOR` = "chartreuse4",
445
           `MEK` = "mediumorchid4",
446
           `Non-responder` = "grey61")
447
448
plots <- list(
449
  ggplot(df, aes(x = 1-ibrutinib,  y = 1-everolimus)),
450
  ggplot(filter(df, group != "BTK"), aes(x = 1-selumetinib, y = 1-everolimus))
451
)
452
453
plots <- lapply(plots, function(x)
454
  x + geom_point(aes(col = group), size = 1.5) + theme_minimal() +
455
    coord_fixed() + 
456
    scale_color_manual(values = mycol) +
457
    geom_hline(yintercept = 1 - thresh) + 
458
    geom_vline(xintercept = 1- thresh) +
459
    ylim(-0.15, 0.32) + xlim(-0.15, 0.32) 
460
    
461
)
462
463
grid.arrange(ncol = 2, grobs  = plots)
464
```
465
466
The above roules of stratification of patients into drug response groups is contained within `defineResponseGroups` function inside the package.
467
468
```{r}
469
sel = defineResponseGroups(lpd=lpdAll)
470
```
471
472
## Mean of each group
473
474
```{r co-sens-all}
475
# colors
476
c1 = giveColors(2, 0.5)
477
c2 = giveColors(4, 0.85)
478
c3 = giveColors(10, 0.75)
479
480
# vectors
481
p <- vector(); d <- vector();
482
pMTOR <- vector(); pBTK <- vector(); pMEK <- vector(); pNONE <- vector()
483
dMTOR <- vector(); dBTK <- vector(); dMEK <- vector(); dNONE <- vector()
484
dMTOR_NONE <- vector(); pMTOR_NONE <- vector()
485
486
# groups                               
487
sel$grMTOR_NONE <- ifelse(sel$group=="mTOR", "mTOR", NA)
488
sel$grMTOR_NONE <- ifelse(sel$group=="none", "none", sel$grMTOR_NONE)
489
490
sel$grMTOR <- ifelse(sel$group=="mTOR", "mTOR", "rest")
491
sel$col <- ifelse(sel$group=="mTOR", c3, "grey")
492
sel$grBTK  <- ifelse(sel$group=="BTK",  "BTK", "rest")
493
sel$col <- ifelse(sel$group=="BTK",  c1, sel$col)
494
sel$grMEK  <- ifelse(sel$group=="MEK",  "MEK", "rest")
495
sel$col <- ifelse(sel$group=="MEK",  c2, sel$col)
496
sel$grNONE <- ifelse(sel$group=="none", "none", "rest")
497
498
499
500
for (i in 1: max(which(fData(lpdCLL)$type=="viab"))) {
501
  
502
  fit <- aov(Biobase::exprs(lpdCLL)[i, rownames(sel)] ~ sel$group)
503
  p[i] <- summary(fit)[[1]][["Pr(>F)"]][1]
504
  
505
  
506
  calc_p = function(clmn) {
507
    p.adjust(
508
      t.test(Biobase::exprs(lpdCLL)[i, rownames(sel)] ~ sel[,clmn],
509
             alternative = c("two.sided") )$p.value, "BH" )
510
  }
511
  
512
  calc_d = function(clmn) {
513
    diff(
514
      t.test(Biobase::exprs(lpdCLL)[i, rownames(sel)] ~ sel[,clmn])$estimate,
515
      alternative = c("two.sided") )
516
  }
517
  
518
  pMTOR_NONE[i] <- calc_p("grMTOR_NONE")
519
  dMTOR_NONE[i] <- calc_d("grMTOR_NONE")
520
  
521
  pMTOR[i] <- calc_p("grMTOR")
522
  dMTOR[i] <- calc_d("grMTOR")
523
 
524
  pBTK[i] <- calc_p("grBTK")
525
  dBTK[i] <- calc_d("grBTK")
526
 
527
  pMEK[i] <- calc_p("grMEK")
528
  dMEK[i] <- calc_d("grMEK")
529
 
530
  pNONE[i] <- calc_p("grNONE")
531
  dNONE[i] <- calc_d("grNONE")
532
 
533
  # drugnames
534
  d[i] <- rownames(lpdCLL)[i]
535
}
536
```
537
538
539
```{r heatmap_mean_4_5, fig.path=plotDir, fig.width = 6, fig.height = 8, dev = c("png", "pdf")}
540
#FIG# 3F
541
#construct data frame
542
ps <- data.frame(drug=d, pMTOR, pBTK, pMEK, pNONE, p )
543
ds <- data.frame(dMTOR, dBTK, dMEK, dNONE)
544
rownames(ps) <- ps[,1]; rownames(ds) <- ps[,1]
545
546
# selcet only rows for singel concentrations, set non-sig to zero 
547
ps45 <- ps[rownames(ps)[grep(rownames(ps), pattern="_4:5")],2:5 ]
548
for (i in 1:4) { ps45[,i] <- ifelse(ps45[,i]<0.05, ps45[,i], 0) }
549
550
ds45 <- ds[rownames(ds)[grep(rownames(ds), pattern="_4:5")],1:4 ]
551
for (i in 1:4) { ds45[,i] <- ifelse(ps45[,i]<0.05, ds45[,i], 0) }
552
553
# exclude non-significant rows
554
selDS <- rownames(ds45)[rowSums(ps45)>0]
555
selPS <- rownames(ps45)[rowSums(ps45)>0]
556
ps45 <- ps45[selPS, ]
557
ds45 <- ds45[selDS, ]
558
559
groupMean = function(gr) {
560
  rowMeans(Biobase::exprs(lpdCLL)[rownames(ps45), rownames(sel)[sel$group==gr]])
561
}
562
563
MBTK <- groupMean("BTK")
564
MMEK <- groupMean("MEK")
565
MmTOR <- groupMean("mTOR")
566
MNONE <- groupMean("none")
567
568
# create data frame, new colnames
569
ms <- data.frame(BTK=MBTK, MEK=MMEK, mTOR=MmTOR, NONE=MNONE)
570
colnames(ms) <- c("BTK", "MEK", "mTOR", "WEAK")
571
rownames(ms) <- drugs[substr(selPS, 1,5), "name"]
572
573
# select rows with effect sizes group vs. rest >0.05
574
ms <- ms[ which(rowMax(as.matrix(ds45)) > 0.05 ) ,  ]
575
576
# exclude some drugs
577
ms <- ms[-c(
578
  which(rownames(ms) %in%
579
          c("everolimus", "ibrutinib", "selumetinib", "bortezomib"))),]
580
581
pheatmap(ms[, c("MEK", "BTK","mTOR", "WEAK")], cluster_cols = FALSE,
582
         cluster_rows =TRUE, clustering_method = "centroid",
583
         scale = "row",
584
         color=colorRampPalette(
585
           c(rev(brewer.pal(7, "YlOrRd")), "white", "white", "white",
586
             brewer.pal(7, "Blues")))(101)
587
        )
588
```
589
590
## Bee swarm plots for groups 
591
592
For selected drugs, we show differences of drug response between patient response groups.
593
594
```{r sel-beeswarm, fig.path=plotDir, fig.width = 7, fig.height = 5.5, dev = c("png", "pdf")}
595
#FIG# 3G
596
# drug label
597
giveDrugLabel3 <- function(drid) {
598
  vapply(strsplit(drid, "_"), function(x) {
599
    k <- paste(x[1:2], collapse="_")
600
    paste0(drugs[k, "name"])
601
  }, character(1))
602
}
603
604
groups = sel[,"group", drop=FALSE]
605
groups[which(groups=="none"), "group"] = "WEAK"
606
607
# beeswarm function
608
beeDrug <- function(xDrug) {
609
 
610
   par(bty="l", cex.axis=1.5)
611
   beeswarm(
612
     Biobase::exprs(lpdCLL)[xDrug, rownames(sel)] ~ groups$group,
613
     axes=FALSE, cex.lab=1.5, ylab="Viability", xlab="", pch = 16,
614
     pwcol=sel$col, cex=1,
615
     ylim=c(min( Biobase::exprs(lpdCLL)[xDrug, rownames(sel)] ) - 0.05, 1.2) )
616
 
617
   boxplot(Biobase::exprs(lpdCLL)[xDrug, rownames(sel)] ~ groups$group,  add = T,
618
           col="#0000ff22", cex.lab=2, outline = FALSE) 
619
   
620
   mtext(side=3, outer=F, line=0, 
621
       paste0(giveDrugLabel3(xDrug) ), cex=2) 
622
}
623
 
624
625
beeDrug("D_001_4:5")
626
beeDrug("D_081_4:5")
627
beeDrug("D_013_4:5")
628
beeDrug("D_003_4:5")
629
beeDrug("D_020_4:5")
630
beeDrug("D_165_3")
631
```
632
633
634
# Kaplan-Meier plot for groups (time from sample to next treatment)
635
636
```{r surv, fig.path=plotDir, fig.width = 8, fig.height = 8, dev = c("png", "pdf") }
637
#FIG# S11 A
638
patmeta[, "group"] <- sel[rownames(patmeta), "group"]
639
640
c1n <- giveColors(2)
641
c2n <- giveColors(4)
642
c3n <- giveColors(10)
643
c4n <- "lightgrey"
644
645
survplot(Surv(patmeta[ , "T5"],
646
              patmeta[ , "treatedAfter"] == TRUE)  ~ patmeta$group ,
647
  lwd=3, cex.axis = 1.2, cex.lab=1.5, col=c(c1n, c2n, c3n, c4n), 
648
  data = patmeta,  
649
  legend.pos = 'bottomleft', stitle = 'Drug response', 
650
  xlab = 'Time (years)', ylab = 'Patients without treatment (%)',
651
)
652
```
653
654
# Incidence of somatic gene mutations and CNVs in the four groups
655
656
Load data.
657
```{r}
658
data(lpdAll)
659
```
660
661
Select CLL patients.
662
```{r selectCLL}
663
lpdCLL <- lpdAll[ , lpdAll$Diagnosis== "CLL"]
664
```
665
666
Build groups.
667
```{r}
668
sel = defineResponseGroups(lpd=lpdAll)
669
```
670
671
Calculate total number of mutations per patient.
672
```{r}
673
genes <- data.frame(
674
  t(Biobase::exprs(lpdCLL)[fData(lpdCLL)$type %in% c("gen", "IGHV"), rownames(sel)]),
675
  group = factor(sel$group)
676
)
677
678
genes <- genes[!(is.na(rownames(genes))), ]
679
colnames(genes) %<>%
680
  sub("del13q14_any", "del13q14", .) %>%
681
  sub("IGHV.Uppsala.U.M", "IGHV", .)
682
683
Nmut = rowSums(genes[, colnames(genes) != "group"], na.rm = TRUE)
684
685
mf <- sapply(c("BTK", "MEK", "mTOR", "none"), function(i) 
686
  mean(Nmut[genes$group==i]) 
687
)
688
```
689
690
```{r bp, fig.width = 4, fig.height = 3.5}
691
barplot(mf, ylab="Total number of mutations/CNVs per patient", col="darkgreen")
692
```
693
694
Test each mutation, and each group, marginally for an effect. 
695
```{r mutationTests}
696
mutsUse <- setdiff( colnames(genes), "group" )
697
mutsUse <- mutsUse[ colSums(genes[, mutsUse], na.rm = TRUE) >= 4 ]
698
mutationTests <- lapply(mutsUse, function(m) {
699
  tibble(
700
    mutation = m, 
701
    p = fisher.test(genes[, m], genes$group)$p.value)
702
}) %>% bind_rows  %>% mutate(pBH = p.adjust(p, "BH")) %>% arrange(p)
703
704
mutationTests <- mutationTests %>% filter(pBH < 0.1)
705
```
706
707
Number of mutations with the p-value meeting the threshold.
708
```{r numMutationTests, fig.width = 3, fig.height = 2}
709
nrow(mutationTests)
710
```
711
712
```{r groupTests}
713
groupTests <- lapply(unique(genes$group), function(g) {
714
  tibble(
715
    group = g, 
716
    p = fisher.test(
717
      colSums(genes[genes$group == g, mutsUse], na.rm=TRUE),
718
      colSums(genes[genes$group != g, mutsUse], na.rm=TRUE),
719
      simulate.p.value = TRUE, B = 10000)$p.value)
720
}) %>% bind_rows %>% arrange(p)
721
722
groupTests 
723
```
724
725
These results show that __each__ of the groups has an imbalanced mutation distribution, and that each of the above-listed mutations is somehow imbalanced between the groups.
726
727
## Test gene mutations vs. groups
728
729
Fisher.test genes vs. groups function. Below function assumes that `g` is a data.frame one of whose columns is `group` 
730
and all other columns are numeric (i.e., 0 or 1) mutation indicators. 
731
```{r ft}
732
fisher.genes <- function(g, ref) {
733
  stopifnot(length(ref) == 1)
734
  ggg <- ifelse(g$group == ref, ref, "other")
735
  idx <- which(colnames(g) != "group") 
736
  
737
  lapply(idx, function(i)
738
    if (sum(g[, i], na.rm = TRUE) > 2) {
739
      ft  <- fisher.test(ggg, g[, i])
740
      tibble(
741
        p    = ft$p.value,
742
        es   = ft$estimate, 
743
        prop = sum((ggg == ref) & !is.na(g[, i]), na.rm = TRUE),
744
        mut1 = sum((ggg == ref) & (g[, i] != 0),  na.rm = TRUE),
745
        gene = colnames(g)[i])
746
    }  else {
747
      tibble(p = 1, es = 1, prop = 0, mut1 = 0, gene = colnames(g)[i])
748
    }
749
  ) %>% bind_rows
750
}
751
```
752
753
Calculate p values and effects using the Fisher test and group of interest vs. rest.
754
```{r}
755
pMTOR <- fisher.genes(genes, ref="mTOR") 
756
pBTK  <- fisher.genes(genes, ref="BTK") 
757
pMEK  <- fisher.genes(genes, ref="MEK") 
758
pNONE <- fisher.genes(genes, ref="none")
759
760
p    <- cbind(pBTK$p,    pMEK$p,    pMTOR$p,    pNONE$p)
761
es   <- cbind(pBTK$es,   pMEK$es,   pMTOR$es,   pNONE$es)
762
prop <- cbind(pBTK$prop, pMEK$prop, pMTOR$prop, pNONE$prop)
763
mut1 <- cbind(pBTK$mut1, pMEK$mut1, pMTOR$mut1, pNONE$mut1)
764
```
765
766
Prepare matrix for heatmap.
767
```{r prepmat}
768
p <- ifelse(p < 0.05, 1, 0) 
769
p <- ifelse(es <= 1, p, -p) 
770
rownames(p) <- pMTOR$gene
771
colnames(p) <- c("BTK", "MEK", "mTOR", "NONE")
772
773
pM <- p[rowSums(abs(p))!=0, ]
774
propM <- prop[rowSums(abs(p))!=0, ]
775
```
776
777
Cell labels.
778
```{r}
779
N <- cbind( paste0(mut1[,1],"/",prop[,1] ),
780
            paste0(mut1[,2],"/",prop[,2] ), 
781
            paste0(mut1[,3],"/",prop[,3] ),
782
            paste0(mut1[,4],"/",prop[,4] )
783
)
784
785
rownames(N) <- rownames(p)
786
```
787
788
Draw the heatmap only for significant factors in mutUse. 
789
Selection criteria for mutUse are >=4 mutations and adjusted p.value < 0.1 for 4x2 fisher test groups (mtor, mek, btk, none) vs mutation. 
790
```{r heatmap2_1, fig.path=plotDir, fig.width = 7, fig.height = 7, dev = c("png", "pdf")}
791
#FIG# S11 B
792
mutationTests <-
793
  mutationTests[which(!(mutationTests$mutation %in%
794
                          c("del13q14_bi", "del13q14_mono"))), ]
795
pMA <- p[mutationTests$mutation, ]
796
pMA
797
798
pheatmap(pMA, cluster_cols = FALSE, 
799
         cluster_rows = FALSE, legend=TRUE, annotation_legend = FALSE, 
800
         color = c("red", "white", "lightblue"),
801
         display_numbers = N[ rownames(pMA), ]
802
        )
803
```
804
805
# Differences in gene expression profiles between drug-response groups
806
807
```{r}
808
data("dds")
809
810
sel = defineResponseGroups(lpd=lpdAll)
811
sel$group = gsub("none","weak", sel$group)
812
```
813
814
```{r}
815
# select patients with CLL in the main screen data 
816
colnames(dds) <- colData(dds)$PatID
817
pat <- intersect(colnames(lpdCLL), colnames(dds))
818
dds_CLL <- dds[,which(colData(dds)$PatID %in% pat)]
819
820
# add group label 
821
colData(dds_CLL)$group <- factor(sel[colnames(dds_CLL), "group"])
822
colData(dds_CLL)$IGHV = factor(patmeta[colnames(dds_CLL),"IGHV"])
823
```
824
825
Select genes with most variable expression levels.
826
```{r} 
827
vsd <- varianceStabilizingTransformation( assay(dds_CLL) )
828
colnames(vsd) = colData(dds_CLL)$PatID
829
rowVariance <- setNames(rowVars(vsd), nm=rownames(vsd))
830
sortedVar <- sort(rowVariance, decreasing=TRUE)
831
mostVariedGenes <- sortedVar[1:10000]
832
dds_CLL <- dds_CLL[names(mostVariedGenes), ]
833
```
834
835
Run DESeq2.
836
```{r}
837
cb <- combn(unique(colData(dds_CLL)$group), 2)
838
839
gg <- list(); ggM <- list(); ggU <- list()
840
DE <- function(ighv) {
841
 for (i in 1:ncol(cb)) {
842
   dds_sel <- dds_CLL[,which(colData(dds_CLL)$IGHV %in% ighv)]
843
   dds_sel <- dds_sel[,which(colData(dds_sel)$group %in% cb[,i])]
844
   design(dds_sel) = ~group
845
   dds_sel$group <- droplevels(dds_sel$group)
846
   dds_sel$group <- relevel(dds_sel$group, ref=as.character(cb[2,i]) )
847
   dds_sel <- DESeq(dds_sel)
848
   res <- results(dds_sel)
849
   gg[[i]] <- res[order(res$padj, decreasing = FALSE), ]
850
   names(gg)[i] <- paste0(cb[1,i], "_", cb[2,i])
851
 }
852
return(gg)
853
}
854
```
855
856
```{r eval=FALSE}
857
ggM <- DE(ighv="M")
858
ggU <- DE(ighv="U")
859
gg <- DE(ighv=c("M", "U"))
860
```
861
862
```{r echo=FALSE, eval=FALSE}
863
save(ggM, ggU, gg, file=paste0(plotDir,"gexGroups.RData"))
864
```
865
866
The above code is not executed due to long running time. We load the output from the presaved object instead.
867
```{r echo=TRUE, eval=TRUE}
868
load(system.file("extdata","gexGroups.RData", package="BloodCancerMultiOmics2017"))
869
```
870
871
We use biomaRt package to map ensembl gene ids to hgnc gene symbols. The maping requires Internet connection and to omit this obstacle we load the presaved output. For completness, we provide the code used to generate the mapping.
872
873
```{r, eval=FALSE}
874
library("biomaRt")
875
# extract all ensembl ids
876
allGenes = unique(unlist(lapply(gg, function(x) rownames(x))))
877
# get gene ids for ensembl ids
878
genSymbols = getBM(filters="ensembl_gene_id",
879
                   attributes=c("ensembl_gene_id", "hgnc_symbol"),
880
                   values=allGenes, mart=mart)
881
# select first id if more than one is present
882
genSymbols = genSymbols[!duplicated(genSymbols[,"ensembl_gene_id"]),]
883
# set rownames to ens id
884
rownames(genSymbols) = genSymbols[,"ensembl_gene_id"]
885
```
886
887
```{r echo=TRUE, eval=TRUE}
888
load(system.file("extdata","genSymbols.RData", package="BloodCancerMultiOmics2017"))
889
```
890
891
892
Correlation of IL-10 mRNA expression and response to everolimus within the mTOR subgroup.
893
894
```{r IL10, fig.width=6.5, fig.height=6.5, dev = c("png", "pdf"), fig.path=plotDir}
895
#FIG# S14
896
gen="ENSG00000136634" #IL10
897
drug   <- "D_063_4:5" 
898
899
patsel <- intersect( rownames(sel)[sel$group %in% c("mTOR")], colnames(vsd) )
900
901
c <- cor.test( Biobase::exprs(lpdCLL)[drug, patsel], vsd[gen, patsel] )
902
903
# get hgnc_symbol for gen
904
# mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
905
# genSym = getBM(filters="ensembl_gene_id", attributes="hgnc_symbol",
906
# values=gen, mart=mart)
907
genSym = genSymbols[gen, "hgnc_symbol"]
908
909
plot(vsd[gen, patsel], Biobase::exprs(lpdCLL)[drug, patsel],
910
     xlab=paste0(genSym, " expression"),
911
     ylab="Viability (everolimus)", pch=19, ylim=c(0.70, 0.92), col="purple",
912
     main = paste0("mTOR-group", "\n cor = ", round(c$estimate, 3),
913
                   ", p = ", signif(c$p.value,2 )),
914
     cex=1.2)
915
abline(lm(Biobase::exprs(lpdCLL)[drug, patsel] ~ vsd[gen, patsel]))
916
```
917
918
Set colors.
919
```{r}
920
c1 = giveColors(2, 0.4)
921
c2 = giveColors(4, 0.7)
922
c3 = giveColors(10, 0.6)
923
```
924
925
Function which extracts significant genes.
926
```{r}
927
sigEx <- function(real) {
928
  
929
  ggsig = lapply(real, function(x) {
930
    x = data.frame(x)
931
    x = x[which(!(is.na(x$padj))),]
932
    x = x[x$padj<0.1,]
933
    x = x[order(x$padj, decreasing = TRUE),]
934
    x = data.frame(x[ ,c("padj","log2FoldChange")], ensg=rownames(x) )
935
    x$hgnc1 = genSymbols[rownames(x), "hgnc_symbol"]
936
    x$hgnc2 = ifelse(x$hgnc1=="" | x$hgnc1=="T" | is.na(x$hgnc1),
937
                     as.character(x$ensg), x$hgnc1)
938
    x[-(grep(x[,"hgnc2"], pattern="IG")),]
939
  })
940
  return(ggsig)
941
}
942
```
943
944
```{r}
945
barplot1 <- function(real, tit) { 
946
  
947
  # process real diff genes
948
  sigExPlus = sigEx(real)
949
  ng <- lapply(sigExPlus, function(x){ cbind(
950
    up=nrow(x[x$log2FoldChange>0, ]),
951
    dn=nrow(x[x$log2FoldChange<0, ]) ) } ) 
952
  ng = melt(ng)
953
954
  p <- ggplot(ng, aes(reorder(L1, -value)), ylim(-500:500)) + 
955
    geom_bar(data = ng, aes(y =  value, fill=Var2), stat="identity",
956
             position=position_dodge() ) +
957
    scale_fill_brewer(palette="Paired", direction = -1,
958
                      labels = c("up", "down")) +
959
    ggtitle(label=tit) +
960
    
961
    geom_hline(yintercept = 0,colour = "grey90") + 
962
    theme(
963
           panel.grid.minor = element_blank(),
964
           
965
           axis.title.x = element_blank(),
966
           axis.text.x = element_text(size=14, angle = 60, hjust = 1),
967
           axis.ticks.x = element_blank(),
968
           
969
           axis.title.y  = element_blank(),
970
           axis.text.y = element_text(size=14, colour="black"),
971
           axis.line.y = element_line(colour = "black",
972
                                      size = 0.5, linetype = "solid"),
973
           
974
           legend.key = element_rect(fill = "white"),
975
           legend.background = element_rect(fill = "white"),
976
           legend.title = element_blank(),
977
           legend.text = element_text(size=14, colour="black"),
978
  
979
           panel.background = element_rect(fill = "white", color="white")
980
          ) 
981
  
982
  plot(p) 
983
}
984
```
985
986
987
```{r deGroups, fig.width = 10, fig.height = 7, dev = c("png", "pdf"), fig.path=plotDir}
988
#FIG# S11 C
989
barplot1(real=gg, tit="")
990
```
991
992
## Cytokine / chemokine expression in mTOR group
993
994
Here we compare expression levels of cytokines/chemokines within the mTOR group.
995
996
Set helpful functions.
997
```{r}
998
# beeswarm funtion
999
beefun <- function(df, sym) {
1000
 par(bty="l", cex.axis=1.5)
1001
 beeswarm(df$x ~ df$y, axes=FALSE, cex.lab=1.5, col="grey", ylab=sym, xlab="",
1002
          pch = 16, pwcol=sel[colnames(vsd),"col"], cex=1.3)
1003
 
1004
 boxplot(df$x ~ df$y, add = T, col="#0000ff22", cex.lab=1.5) 
1005
}
1006
```
1007
1008
Bulid response groups.
1009
```{r groups}
1010
sel = defineResponseGroups(lpdCLL)
1011
sel[,1:3] = 1-sel[,1:3]
1012
sel$IGHV = pData(lpdCLL)[rownames(sel), "IGHV Uppsala U/M"]
1013
1014
c1 = giveColors(2, 0.5)
1015
c2 = giveColors(4, 0.85)
1016
c3 = giveColors(10, 0.75)
1017
1018
# add colors
1019
sel$col <- ifelse(sel$group=="mTOR", c3, "grey")
1020
sel$col <- ifelse(sel$group=="BTK",  c1, sel$col)
1021
sel$col <- ifelse(sel$group=="MEK",  c2, sel$col)
1022
```
1023
1024
For each cytokine/chemokine we compare level of expression between the response groups. 
1025
1026
```{r}
1027
cytokines <- c("CXCL2","TGFB1","CCL2","IL2","IL12B","IL4","IL6","IL10","CXCL8",
1028
               "TNF")
1029
cyEN =  sapply(cytokines, function(i) {
1030
  genSymbols[which(genSymbols$hgnc_symbol==i)[1],"ensembl_gene_id"]
1031
})
1032
1033
makeEmpty = function() {
1034
  data.frame(matrix(ncol=3, nrow=length(cyEN),
1035
                    dimnames=list(names(cyEN), c("BTK", "MEK", "mTOR"))) )
1036
}
1037
p = makeEmpty()
1038
ef = makeEmpty()
1039
```
1040
1041
```{r CYTOKINES, fig.path=plotDir, fig.width=7, fig.height=5.5, dev=c("png", "pdf")}
1042
for (i in 1:length(cyEN) ) {
1043
 
1044
 geneID <- cyEN[i]
1045
 df <- data.frame(x=vsd[geneID,  ], y=sel[colnames(vsd) ,"group"])
1046
 df$y <- as.factor(df$y)
1047
1048
 beefun(df, sym=names(geneID))
1049
 
1050
 df <- within(df, y <- relevel(y, ref = "none"))
1051
 fit <- lm(x ~y, data=df)
1052
 
1053
 p[i,] <- summary(fit)$coefficients[ 2:4, "Pr(>|t|)"]
1054
 
1055
 abtk = mean(df[df$y=="BTK", "x"], na.rm=TRUE) - mean(df[df$y=="none", "x"],
1056
                                                      na.rm=TRUE)
1057
 amek = mean(df[df$y=="MEK", "x"], na.rm=TRUE) - mean(df[df$y=="none", "x"],
1058
                                                      na.rm=TRUE)
1059
 amtor= mean(df[df$y=="mTOR", "x"], na.rm=TRUE) - mean(df[df$y=="none", "x"],
1060
                                                       na.rm=TRUE)
1061
1062
 ef[i,] <-  c(as.numeric(abtk), as.numeric(amek), as.numeric(amtor)) 
1063
 
1064
 mtext( paste(    "pBTK=", summary(fit)$coefficients[ 2, "Pr(>|t|)"], 
1065
                "\npMEK=", summary(fit)$coefficients[ 3, "Pr(>|t|)"], 
1066
               "\npMTOR=", summary(fit)$coefficients[ 4, "Pr(>|t|)"], 
1067
              side=3 ))
1068
}
1069
```
1070
1071
As a next step, we summarize the above comparisons in one plot.
1072
```{r CYTOKINES-summary, fig.path=plotDir, fig.width=4.5, fig.height=3.5, dev = c("png", "pdf")}
1073
#FIG# S11 D
1074
# log p-values
1075
plog <- apply(p, 2, function(x){-log(x)} )
1076
plog_m <- melt(as.matrix(plog))
1077
ef_m <- melt(as.matrix(ef))
1078
1079
# introduce effect direction
1080
plog_m$value <- ifelse(ef_m$value>0, plog_m$value, -plog_m$value)
1081
1082
rownames(plog_m) <- 1:nrow(plog_m)
1083
1084
# fdr
1085
fdrmin = min( p.adjust(p$mTOR, "fdr") )
1086
1087
### plot ####
1088
colnames(plog_m) <- c("cytokine", "group", "p")
1089
1090
lev = names(sort(tapply(plog_m$p, plog_m$cytokine, function(p) min(p))))
1091
1092
plog_m$cytokine <- factor(plog_m$cytokine, levels=lev)
1093
1094
1095
 ggplot(data=plog_m, mapping=aes(x=cytokine, y=p, color=group)) + 
1096
    scale_colour_manual(values = c(c1, c2, c3)) +
1097
    geom_point( size=3 ) + 
1098
       theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.9),
1099
       panel.grid.minor = element_blank(),
1100
       panel.grid.major = element_blank(),
1101
       panel.background = element_blank(),
1102
       axis.line.x=element_line(),
1103
       axis.line.y=element_line(),
1104
       legend.position="none"
1105
       ) +
1106
    scale_y_continuous(name="-log(p-value)", breaks=seq(-3,7.5,2),
1107
                       limits=c(-3,7.5)) +
1108
    xlab("") +
1109
    geom_hline(yintercept = 0) +
1110
    geom_hline(yintercept = -log(0.004588897), color="purple", linetype="dashed") +
1111
    geom_hline(yintercept = (-log(0.05)), color="grey", linetype="dashed") 
1112
```
1113
1114
Within the mTOR group it is only IL-10 which have significantly increased expression. The other important cytokines/chemokines were not differentially expressed.
1115
1116
1117
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
1118
sessionInfo()
1119
```
1120
1121
```{r, message=FALSE, warning=FALSE, include=FALSE}
1122
rm(list=ls())
1123
```