Diff of /vignettes/src/part07.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part07.Rmd
1
---
2
title: "Part 7"
3
output:
4
  BiocStyle::html_document
5
---
6
7
```{r, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
8
library("BloodCancerMultiOmics2017")
9
library("DESeq2")
10
library("reshape2")
11
library("dplyr")
12
library("tibble")
13
library("Biobase")
14
library("SummarizedExperiment")
15
library("genefilter")
16
library("piano") # loadGSC
17
library("ggplot2")
18
library("gtable")
19
library("grid")
20
```
21
22
```{r echo=FALSE}
23
plotDir = ifelse(exists(".standalone"), "", "part07/")
24
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
25
```
26
27
```{r}
28
options(stringsAsFactors=FALSE)
29
```
30
31
32
# Gene set enrichment analysis on BTK, mTOR, MEK groups
33
34
Based on the classification of drug response phenotypes we divided CLL samples into distinct groups driven by the increased sensitivity towards BTK, mTOR and MEK inhibition. Here we perform gene set enrichment analysis to find the causes of distinctive drug response phenotypes in the gene expression data.
35
36
Load objects.
37
```{r}
38
data(list=c("dds", "lpdAll"))
39
40
gmts = list(H=system.file("extdata","h.all.v5.1.symbols.gmt",
41
                          package="BloodCancerMultiOmics2017"),
42
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt",
43
                           package="BloodCancerMultiOmics2017"))
44
```
45
46
Divide patients into response groups.
47
```{r}
48
patGroup = defineResponseGroups(lpd=lpdAll)
49
```
50
51
## Preprocessing RNAseq data
52
53
Subsetting RNAseq data to include the CLL patients for which the drug screen was performed.
54
```{r subset}
55
lpdCLL <- lpdAll[fData(lpdAll)$type=="viab",
56
                 pData(lpdAll)$Diagnosis %in% c("CLL")]
57
58
ddsCLL <- dds[,colData(dds)$PatID %in% colnames(lpdCLL)]
59
```
60
61
Read in group and add annotations to the RNAseq data set.
62
```{r pat_group}
63
ddsCLL <- ddsCLL[,colData(ddsCLL)$PatID %in% rownames(patGroup)]
64
65
#remove genes without gene symbol annotations
66
ddsCLL <- ddsCLL[!is.na(rowData(ddsCLL)$symbol),]
67
ddsCLL <- ddsCLL[rowData(ddsCLL)$symbol != "",]
68
69
#add drug sensitivity annotations to coldata
70
colData(ddsCLL)$group <- factor(patGroup[colData(ddsCLL)$PatID, "group"])
71
```
72
73
Remove rows that contain too few counts.
74
```{r remove_low_counts}
75
#only keep genes that have counts higher than 10 in any sample
76
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) 
77
ddsCLL <- ddsCLL[keep,]
78
dim(ddsCLL)
79
```
80
81
Remove transcripts which do not show variance across samples.
82
```{r filtering}
83
ddsCLL <- estimateSizeFactors(ddsCLL)
84
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
85
sh <- shorth(sds)
86
ddsCLL <- ddsCLL[sds >= sh,]
87
```
88
89
Variance stabilizing transformation
90
```{r, cache=TRUE}
91
ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL)
92
```
93
94
## Differential gene expression
95
96
Perform differential gene expression using DESeq2.
97
```{r DE, cache=TRUE}
98
DEres <- list()
99
design(ddsCLL) <- ~ group
100
101
rnaRaw <- DESeq(ddsCLL, betaPrior = FALSE)
102
103
#extract results for different comparisons
104
# responders versus weak-responders
105
DEres[["BTKnone"]] <- results(rnaRaw, contrast = c("group","BTK","none"))
106
DEres[["MEKnone"]] <- results(rnaRaw, contrast = c("group","MEK","none"))
107
DEres[["mTORnone"]] <- results(rnaRaw, contrast = c("group","mTOR","none"))
108
```
109
110
## Gene set enrichment analysis
111
112
The gene set enrichment analysis will be performed by using the MSigDB gene set collections C6 and Hallmark (http://software.broadinstitute.org/gsea/msigdb/ ). For each collection we will show the top five enriched gene sets and respective differentially expressed genes. Gene set enrichment analysis will be performed on the ranked gene lists using the Parametric Analysis of Gene Set Enrichment (PAGE).
113
114
115
### Functions for enrichment analysis and plots
116
117
Define cut-off.
118
```{r}
119
pCut = 0.05
120
```
121
122
Function to run GSEA or PAGE in R.
123
```{r gsea_function}
124
runGSEA <- function(inputTab, gmtFile, GSAmethod="gsea", nPerm=1000){
125
    inGMT <- loadGSC(gmtFile,type="gmt")
126
     #re-rank by score
127
    rankTab <- inputTab[order(inputTab[,1],decreasing = TRUE),,drop=FALSE] 
128
    if (GSAmethod == "gsea"){
129
        #readin geneset database
130
        #GSEA analysis
131
        res <- runGSA(geneLevelStats = rankTab,
132
                      geneSetStat = GSAmethod,
133
                      adjMethod = "fdr", gsc=inGMT,
134
                      signifMethod = 'geneSampling', nPerm = nPerm)
135
        GSAsummaryTable(res)
136
    } else if (GSAmethod == "page"){
137
        res <- runGSA(geneLevelStats = rankTab,
138
                      geneSetStat = GSAmethod,
139
                      adjMethod = "fdr", gsc=inGMT,
140
                      signifMethod = 'nullDist')
141
        GSAsummaryTable(res)
142
    }
143
}
144
```
145
146
Function which run the GSE for each response group.
147
```{r}
148
runGSE = function(gmt) {
149
  
150
  Res <- list()
151
  for (i in names(DEres)) {
152
    dataTab <- data.frame(DEres[[i]])
153
    dataTab$ID <- rownames(dataTab)
154
    #filter using pvalues
155
    dataTab <- filter(dataTab, pvalue <= pCut) %>%
156
                  arrange(pvalue) %>% 
157
                  mutate(Symbol = rowData(ddsCLL[ID,])$symbol)
158
    dataTab <- dataTab[!duplicated(dataTab$Symbol),]
159
    statTab <- data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
160
    resTab <- runGSEA(inputTab=statTab, gmtFile=gmt, GSAmethod="page")
161
    Res[[i]] <- arrange(resTab,desc(`Stat (dist.dir)`))
162
  }
163
  Res
164
}
165
```
166
167
Function to get the list of genes enriched in a set.
168
```{r get_genes_function}
169
getGenes <- function(inputTab, gmtFile){
170
  geneList <- loadGSC(gmtFile,type="gmt")$gsc
171
  enrichedUp <- lapply(geneList, function(x) 
172
    intersect(rownames(inputTab[inputTab[,1] >0,,drop=FALSE]),x))
173
  enrichedDown <- lapply(geneList, function(x)
174
    intersect(rownames(inputTab[inputTab[,1] <0,,drop=FALSE]),x))
175
  return(list(up=enrichedUp, down=enrichedDown))
176
}
177
```
178
179
A function to plot the heat map of intersection of genes in different gene sets.
180
```{r intersection_heatmap}
181
plotSetHeatmap <- 
182
  function(geneTab, enrichTab, topN, gmtFile, tittle="",
183
           asterixList = NULL, anno=FALSE) {
184
185
    if (nrow(enrichTab) < topN) topN <- nrow(enrichTab)
186
    enrichTab <- enrichTab[seq(1,topN),]
187
188
    geneList <- getGenes(geneTab,gmtFile)
189
    
190
    geneList$up <- geneList$up[enrichTab[,1]]
191
    geneList$down <- geneList$down[enrichTab[,1]]
192
    
193
    #form a table 
194
    allGenes <- unique(c(unlist(geneList$up),unlist(geneList$down)))
195
    allSets <- unique(c(names(geneList$up),names(geneList$down)))
196
    plotTable <- matrix(data=NA,ncol = length(allSets),
197
                        nrow = length(allGenes),
198
                        dimnames = list(allGenes,allSets))
199
    for (setName in names(geneList$up)) {
200
      plotTable[geneList$up[[setName]],setName] <- 1
201
    }
202
    for (setName in names(geneList$down)) {
203
      plotTable[geneList$down[[setName]],setName] <- -1
204
    }
205
206
    if(is.null(asterixList)) {
207
      #if no correlation table specified, order by the number of
208
      # significant gene
209
      geneOrder <- rev(
210
        rownames(plotTable[order(rowSums(plotTable, na.rm = TRUE),
211
                                 decreasing =FALSE),]))
212
    } else {
213
      #otherwise, order by the p value of correlation
214
      asterixList <- arrange(asterixList, p)
215
      geneOrder <- filter(
216
        asterixList, symbol %in% rownames(plotTable))$symbol
217
      geneOrder <- c(
218
        geneOrder, rownames(plotTable)[! rownames(plotTable) %in% geneOrder])
219
    }
220
    
221
    plotTable <- melt(plotTable)
222
    colnames(plotTable) <- c("gene","set","value")
223
    plotTable$gene <- as.character(plotTable$gene)
224
    
225
    if(!is.null(asterixList)) {
226
      #add + if gene is positivily correlated with sensitivity, else add "-"
227
      plotTable$ifSig <- asterixList[
228
        match(plotTable$gene, asterixList$symbol),]$coef
229
      plotTable <- mutate(plotTable, ifSig =
230
                            ifelse(is.na(ifSig) | is.na(value), "",
231
                                   ifelse(ifSig > 0, "-", "+")))
232
    }
233
    plotTable$value <- replace(plotTable$value,
234
                               plotTable$value %in% c(1), "Up")
235
    plotTable$value <- replace(plotTable$value,
236
                               plotTable$value %in%  c(-1), "Down")
237
    
238
    allSymbols <- plotTable$gene
239
    
240
    geneSymbol <- geneOrder
241
    
242
    if (anno) { #if add functional annotations in addition to gene names
243
      annoTab <- tibble(symbol = rowData(ddsCLL)$symbol, 
244
                        anno = sapply(rowData(ddsCLL)$description,
245
                                      function(x) unlist(strsplit(x,"[[]"))[1]))
246
      annoTab <- annoTab[!duplicated(annoTab$symbol),]
247
      annoTab$combine <- sprintf("%s (%s)",annoTab$symbol, annoTab$anno)
248
      plotTable$gene <- annoTab[match(plotTable$gene,annoTab$symbol),]$combine
249
      geneOrder <- annoTab[match(geneOrder,annoTab$symbol),]$combine
250
      geneOrder <- rev(geneOrder)
251
    }
252
    
253
    plotTable$gene <- factor(plotTable$gene, levels =geneOrder)
254
    plotTable$set <- factor(plotTable$set, levels = enrichTab[,1])
255
256
257
    g <- ggplot(plotTable, aes(x=set, y = gene)) +
258
      geom_tile(aes(fill=value), color = "black") +
259
      scale_fill_manual(values = c("Up"="red","Down"="blue")) +
260
      xlab("") + ylab("") + theme_classic() +
261
      theme(axis.text.x=element_text(size=7, angle = 60, hjust = 0),
262
            axis.text.y=element_text(size=7),
263
            axis.ticks = element_line(color="white"),
264
            axis.line = element_line(color="white"),
265
            legend.position = "none") +
266
      scale_x_discrete(position = "top") +
267
      scale_y_discrete(position = "right")
268
    
269
    if(!is.null(asterixList)) {
270
      g <- g + geom_text(aes(label = ifSig), vjust =0.40)
271
    }
272
    
273
    # construct the gtable
274
    wdths = c(0.05, 0.25*length(levels(plotTable$set)), 5)
275
    hghts = c(2.8, 0.1*length(levels(plotTable$gene)), 0.05)
276
    gt = gtable(widths=unit(wdths, "in"), heights=unit(hghts, "in"))
277
    ## make grobs
278
    ggr = ggplotGrob(g)
279
    ## fill in the gtable
280
    gt = gtable_add_grob(gt, gtable_filter(ggr, "panel"), 2, 2)
281
    gt = gtable_add_grob(gt, ggr$grobs[[5]], 1, 2) # top axis
282
    gt = gtable_add_grob(gt, ggr$grobs[[9]], 2, 3) # right axis
283
    
284
    return(list(list(plot=gt,
285
                     width=sum(wdths),
286
                     height=sum(hghts),
287
                     genes=geneSymbol)))
288
}    
289
```
290
291
Prepare stats per gene for plotting.
292
```{r}
293
statTab = setNames(lapply(c("mTORnone","BTKnone","MEKnone"), function(gr) {
294
  dataTab <- data.frame(DEres[[gr]])
295
  dataTab$ID <- rownames(dataTab)
296
  #filter using pvalues
297
  dataTab <- filter(dataTab, pvalue <= pCut) %>%
298
    arrange(pvalue) %>%
299
    mutate(Symbol = rowData(ddsCLL[ID,])$symbol) %>%
300
    filter(log2FoldChange > 0)
301
  dataTab <- dataTab[!duplicated(dataTab$Symbol),]
302
  data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
303
}), nm=c("mTORnone","BTKnone","MEKnone"))
304
```
305
306
### Geneset enrichment based on Hallmark set (H)
307
308
Perform enrichment analysis using PAGE method.
309
```{r run_GSE_hallmark, warning=FALSE, message= FALSE}
310
hallmarkRes = runGSE(gmt=gmts[["H"]])
311
```
312
313
### Geneset enrichment based on oncogenic signature set (C6)
314
315
Perform enrichment analysis using PAGE method.
316
```{r run_GSE_C6, warning=FALSE, message= FALSE}
317
c6Res = runGSE(gmt=gmts[["C6"]])
318
```
319
320
## Everolimus response VS gene expression (within mTOR group)
321
322
To further investiage the association between expression and drug sensitivity group at gene level, correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the mTOR inhibitor (everolimus) sensitivity within the mTOR group.
323
324
### Correlation test
325
Prepare drug sensitivity vector and gene expression matrix
326
```{r}
327
ddsCLL.mTOR <- ddsCLL.norm[,ddsCLL.norm$group %in% "mTOR"]
328
viabMTOR <- Biobase::exprs(lpdCLL["D_063_4:5", ddsCLL.mTOR$PatID])[1,]
329
stopifnot(all(ddsCLL.mTOR$PatID == colnames(viabMTOR)))  
330
```
331
332
Filtering and applying variance stabilizing transformation on RNAseq data
333
```{r}
334
#only keep genes that have counts higher than 10 in any sample
335
keep <- apply(assay(ddsCLL.mTOR), 1, function(x) any(x >= 10)) 
336
ddsCLL.mTOR <- ddsCLL.mTOR[keep,]
337
dim(ddsCLL.mTOR)
338
```
339
340
Association test using Pearson correlation
341
```{r}
342
tmp = do.call(rbind, lapply(1:nrow(ddsCLL.mTOR), function(i) {
343
  res = cor.test(viabMTOR, assay(ddsCLL.mTOR[i,])[1,], method = "pearson")
344
  data.frame(coef=unname(res$estimate), p=res$p.value)
345
}))
346
347
corResult <- tibble(ID = rownames(ddsCLL.mTOR), 
348
                    symbol = rowData(ddsCLL.mTOR)$symbol,
349
                    coef = tmp$coef,
350
                    p = tmp$p)
351
352
corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))
353
```
354
355
### Enrichment heatmaps for mTOR group with overlapped genes indicated
356
357
The genes that are positively correlated with everolimus sensitivity are labeled as "+" in the heatmap and the negatively correlated genes are labeled as "-".
358
359
Plot for C6 gene sets
360
```{r}
361
pCut = 0.05
362
corResult.sig <- filter(corResult, p <= pCut)
363
c6Plot <- plotSetHeatmap(geneTab=statTab[["mTORnone"]],
364
                enrichTab=c6Res[["mTORnone"]],
365
                topN=5, gmtFile=gmts[["C6"]],
366
                #add asterix in front of the overlapped genes
367
                asterixList = corResult.sig, 
368
                anno=TRUE, i)
369
```
370
371
```{r fig_mTOR_C6_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=c6Plot[[1]][["height"]], fig.width=c6Plot[[1]][["width"]]}
372
#FIG# S13 B left
373
grid.draw(c6Plot[[1]]$plot)
374
```
375
376
Plot for Hallmark gene sets
377
```{r}
378
hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["mTORnone"]],
379
                enrichTab=hallmarkRes[["mTORnone"]],
380
                topN=5, gmtFile=gmts[["H"]],
381
                asterixList = corResult.sig,
382
                anno=TRUE, i)
383
```
384
385
```{r fig_mTOR_H_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=hallmarkPlot[[1]][["height"]], fig.width=hallmarkPlot[[1]][["width"]]}
386
#FIG# S13 B right
387
grid.draw(hallmarkPlot[[1]]$plot)
388
```
389
390
## Ibrutinib response VS gene expression (within BTK group)
391
392
Correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the BTK inhibitor (ibrutinib) sensitivity within the BTK  group.
393
394
### Correlation test
395
396
Prepare drug sensitivity vector and gene expression matrix
397
```{r}
398
ddsCLL.BTK <- ddsCLL.norm[,ddsCLL.norm$group %in% "BTK"]
399
viabBTK <- Biobase::exprs(lpdCLL["D_002_4:5", ddsCLL.BTK$PatID])[1,]
400
stopifnot(all(ddsCLL.BTK$PatID == colnames(viabBTK)))  
401
```
402
403
Filtering and applying variance stabilizing transformation on RNAseq data
404
```{r}
405
#only keep genes that have counts higher than 10 in any sample
406
keep <- apply(assay(ddsCLL.BTK), 1, function(x) any(x >= 10)) 
407
ddsCLL.BTK <- ddsCLL.BTK[keep,]
408
dim(ddsCLL.BTK)
409
```
410
411
Association test using Pearson correlation
412
```{r}
413
tmp = do.call(rbind, lapply(1:nrow(ddsCLL.BTK), function(i) {
414
  res = cor.test(viabBTK, assay(ddsCLL.BTK[i,])[1,], method = "pearson")
415
  data.frame(coef=unname(res$estimate), p=res$p.value)
416
}))
417
418
corResult <- tibble(ID = rownames(ddsCLL.BTK), 
419
                    symbol = rowData(ddsCLL.BTK)$symbol,
420
                    coef = tmp$coef,
421
                    p = tmp$p)
422
423
corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))
424
```
425
426
### Enrichment heatmaps for BTK group with overlapped genes indicated
427
428
Plot for C6 gene sets
429
```{r}
430
pCut = 0.05
431
corResult.sig <- filter(corResult, p <= pCut)
432
c6Plot <- plotSetHeatmap(geneTab=statTab[["BTKnone"]],
433
                enrichTab=c6Res[["BTKnone"]],
434
                topN=5, gmtFile=gmts[["C6"]],
435
                #add asterix in front of the overlapped genes
436
                asterixList = corResult.sig, 
437
                anno=TRUE, i)
438
```
439
440
```{r fig_BTK_C6_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=c6Plot[[1]][["height"]], fig.width=c6Plot[[1]][["width"]]}
441
#FIG# S12 left
442
grid.draw(c6Plot[[1]]$plot)
443
```
444
445
Plot for Hallmark gene sets
446
```{r}
447
hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["BTKnone"]],
448
                enrichTab=hallmarkRes[["BTKnone"]],
449
                topN=5, gmtFile=gmts[["H"]],
450
                asterixList = corResult.sig,
451
                anno=TRUE, i)
452
```
453
454
```{r fig_BTK_H_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=hallmarkPlot[[1]][["height"]], fig.width=hallmarkPlot[[1]][["width"]]}
455
#FIG# S12 right
456
grid.draw(hallmarkPlot[[1]]$plot)
457
```
458
459
### Selumetinib response VS gene expression (within MEK group)
460
461
Correlation test was performed to identify genes whose expressions are correlated with the sensitivity to the MEK inhibitor (selumetinib) sensitivity within the MEK  group.
462
463
### Correlation test
464
465
Prepare drug sensitivity vector and gene expression matrix
466
```{r}
467
ddsCLL.MEK <- ddsCLL.norm[,ddsCLL.norm$group %in% "MEK"]
468
viabMEK <- Biobase::exprs(lpdCLL["D_012_4:5", ddsCLL.MEK$PatID])[1,]
469
stopifnot(all(ddsCLL.MEK$PatID == colnames(viabMEK)))  
470
```
471
472
Filtering and applying variance stabilizing transformation on RNAseq data
473
```{r}
474
#only keep genes that have counts higher than 10 in any sample
475
keep <- apply(assay(ddsCLL.MEK), 1, function(x) any(x >= 10)) 
476
ddsCLL.MEK <- ddsCLL.MEK[keep,]
477
dim(ddsCLL.MEK)
478
```
479
480
Association test using Pearson correlation
481
```{r}
482
tmp = do.call(rbind, lapply(1:nrow(ddsCLL.MEK), function(i) {
483
  res = cor.test(viabMEK, assay(ddsCLL.MEK[i,])[1,], method = "pearson")
484
  data.frame(coef=unname(res$estimate), p=res$p.value)
485
}))
486
487
corResult <- tibble(ID = rownames(ddsCLL.MEK), 
488
                    symbol = rowData(ddsCLL.MEK)$symbol,
489
                    coef = tmp$coef,
490
                    p = tmp$p)
491
492
corResult <- arrange(corResult, p) %>% mutate(p.adj = p.adjust(p, method="BH"))
493
```
494
Within MEK group, no gene expression was correlated with Selumetinib response
495
496
### Enrichment heatmaps for MEK group
497
498
Plot for C6 gene sets
499
```{r}
500
pCut = 0.05
501
corResult.sig <- filter(corResult, p <= pCut)
502
c6Plot <- plotSetHeatmap(geneTab=statTab[["MEKnone"]],
503
                enrichTab=c6Res[["MEKnone"]],
504
                topN=5, gmtFile=gmts[["C6"]],
505
                anno=TRUE, i)
506
```
507
508
```{r fig_MEK_C6_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=c6Plot[[1]][["height"]], fig.width=c6Plot[[1]][["width"]]}
509
#FIG# S13 A left
510
grid.draw(c6Plot[[1]]$plot)
511
```
512
513
Plot for Hallmark gene sets
514
```{r}
515
hallmarkPlot <- plotSetHeatmap(geneTab=statTab[["MEKnone"]],
516
                enrichTab=hallmarkRes[["MEKnone"]],
517
                topN=5, gmtFile=gmts[["H"]],
518
                asterixList = corResult.sig,
519
                anno=TRUE, i)
520
```
521
522
```{r fig_MEK_H_asterix, echo=FALSE, fig.path=plotDir, dev=c("png", "pdf"), fig.height=hallmarkPlot[[1]][["height"]], fig.width=hallmarkPlot[[1]][["width"]]}
523
#FIG# S13 A right
524
grid.draw(hallmarkPlot[[1]]$plot)
525
```
526
527
528
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
529
sessionInfo()
530
```
531
532
```{r, message=FALSE, warning=FALSE, include=FALSE}
533
rm(list=ls())
534
```