Diff of /vignettes/src/part13.Rmd [000000] .. [226bc8]

Switch to unified view

a b/vignettes/src/part13.Rmd
1
---
2
title: "Expression profiling analysis of trisomy 12"
3
output:
4
  BiocStyle::html_document
5
---
6
7
```{r, echo=FALSE, include=!exists(".standalone")}
8
knitr::opts_chunk$set(cache = TRUE)
9
```
10
11
```{r, message=FALSE, include=!exists(".standalone"), eval=!exists(".standalone")}
12
library("BloodCancerMultiOmics2017")
13
library("DESeq2")
14
library("piano")
15
library("pheatmap")
16
library("genefilter")
17
library("grid")
18
library("gridExtra")
19
library("RColorBrewer")
20
library("cowplot")
21
library("dplyr")
22
library("ggplot2")
23
library("tibble")
24
```
25
26
```{r echo=FALSE}
27
plotDir = ifelse(exists(".standalone"), "", "part13/")
28
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
29
```
30
31
# Expression profiling analysis of trisomy 12
32
33
Load and prepare expression data set.
34
```{r}
35
data(list=c("dds", "patmeta", "mutCOM"))
36
37
#load genesets
38
gmts = list(
39
  H=system.file("extdata","h.all.v5.1.symbols.gmt",
40
                package="BloodCancerMultiOmics2017"),
41
  C6=system.file("extdata","c6.all.v5.1.symbols.gmt",
42
                 package="BloodCancerMultiOmics2017"),
43
  KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt",
44
                   package="BloodCancerMultiOmics2017"))
45
```
46
47
48
Choose CLL samples with trisomy 12 annotation from the gene expression data set.
49
```{r}
50
#only choose CLL samples
51
colData(dds)$Diagnosis <- patmeta[match(dds$PatID,rownames(patmeta)),]$Diagnosis
52
ddsCLL <- dds[,dds$Diagnosis %in% "CLL"]
53
54
#add trisomy 12 and IGHV information
55
colData(ddsCLL)$trisomy12 <-
56
  factor(assayData(mutCOM[ddsCLL$PatID,])$binary[,"trisomy12"])
57
colData(ddsCLL)$IGHV <- factor(patmeta[ddsCLL$PatID,]$IGHV)
58
59
#remove samples that do not have trisomy 12 information
60
ddsCLL <- ddsCLL[,!is.na(ddsCLL$trisomy12)]
61
62
#how many genes and samples we have?
63
dim(ddsCLL)
64
```
65
66
Remove transcripts that do not have gene symbol annotations, show low counts or do not show variance across samples. 
67
```{r, cache=TRUE}
68
#remove genes without gene symbol annotations
69
ddsCLL <- ddsCLL[!is.na(rowData(ddsCLL)$symbol),]
70
ddsCLL <- ddsCLL[rowData(ddsCLL)$symbol != "",]
71
72
#only keep genes that have counts higher than 10 in any sample
73
keep <- apply(counts(ddsCLL), 1, function(x) any(x >= 10)) 
74
ddsCLL <- ddsCLL[keep,]
75
76
#Remove transcripts do not show variance across samples
77
ddsCLL <- estimateSizeFactors(ddsCLL)
78
sds <- rowSds(counts(ddsCLL, normalized = TRUE))
79
sh <- shorth(sds)
80
ddsCLL <- ddsCLL[sds >= sh,]
81
82
#variance stabilization
83
ddsCLL.norm <- varianceStabilizingTransformation(ddsCLL, blind=TRUE)
84
85
#how many genes left
86
dim(ddsCLL)
87
```
88
89
## Differential gene expression analysis using DESeq2
90
91
DESeq2 was used to identify genes that are differentially expressed between wild-type CLL samples and samples with trisomy 12.
92
93
Run DESeq2
94
```{r, cache=TRUE}
95
design(ddsCLL) <- ~ trisomy12
96
ddsCLL <- DESeq(ddsCLL, betaPrior = FALSE)
97
DEres <- results(ddsCLL)
98
DEres.shr <- lfcShrink(ddsCLL, type="normal", contrast = c("trisomy12","1","0"),
99
                       res = DEres)
100
```
101
102
Plot gene dosage effect.
103
```{r, warning=FALSE}
104
#FIG# S23 A
105
plotTab <- as.data.frame(DEres)
106
plotTab$onChr12 <- rowData(ddsCLL)$chromosome == 12
107
dosePlot <- ggplot(plotTab) +
108
  geom_density(aes(x=log2FoldChange, col=onChr12, fill=onChr12), alpha=0.4) +
109
  xlim( -3, 3 )
110
dosePlot
111
```
112
The distributions of the logarithmic (base 2) fold change between samples with and without trisomy 12 are shown separately for the genes on chromosome 12 (green) and on other chromosomes (red). The two distributions are shifted with respected to each by an amount that is consistent with log2(3/2) ~ 0.58 and thus with gene dosage effects. 
113
114
### Heatmap plot of differentially expressed genes
115
116
A heat map plot was used to show the normalized expression value (Z-score) of the differentially expressed genes in samples with and without trisomy 12.
117
118
Prepare matrix for heat map plot.
119
```{r}
120
#filter genes
121
fdrCut <- 0.1
122
fcCut <- 1.5
123
124
allDE <- data.frame(DEres.shr) %>%
125
  rownames_to_column(var = "ID") %>% 
126
  mutate(Symbol = rowData(ddsCLL[ID,])$symbol,
127
         Chr = rowData(ddsCLL[ID,])$chromosome) %>% 
128
  filter(padj <= fdrCut & abs(log2FoldChange) > fcCut) %>% 
129
  arrange(pvalue) %>% filter(!duplicated(Symbol)) %>%
130
  mutate(Chr12 = ifelse(Chr == 12, "yes", "no"))
131
132
#get the expression matrix
133
plotMat <- assay(ddsCLL.norm[allDE$ID,])
134
colnames(plotMat) <- ddsCLL.norm$PatID
135
rownames(plotMat) <- allDE$Symbol
136
137
#sort columns of plot matrix based on trisomy 12 status
138
plotMat <- plotMat[,order(ddsCLL.norm$trisomy12)]
139
140
#calculate z-score and scale
141
plotMat <- t(scale(t(plotMat)))
142
plotMat[plotMat >= 4] <- 4
143
plotMat[plotMat <= -4] <- -4
144
```
145
146
Plot the heat map.
147
```{r,  trisomy12_heatmap, dev = c("png", "pdf"), fig.path=plotDir, fig.width = 8, fig.height = 10}
148
#FIG# S23 B
149
#prepare colums and row annotations
150
annoCol <- data.frame(row.names=ddsCLL.norm$PatID, Tris12=ddsCLL.norm$trisomy12)
151
levels(annoCol$Tris12) <- list(wt = 0, mut =1)
152
annoRow <- data.frame(row.names = allDE$Symbol, Chr12 = allDE$Chr12)
153
annoColor <- list(Tris12 = c(wt = "grey80", mut = "black"),
154
                  Chr12 = c(yes="red", no = "grey80"))
155
156
157
pheatmap(plotMat,
158
         color=colorRampPalette(rev(brewer.pal(n=7, name="RdBu")))(100),
159
         cluster_cols = FALSE,
160
         annotation_row = annoRow, annotation_col = annoCol,
161
         show_colnames = FALSE, fontsize_row = 3,
162
         breaks = seq(-4,4, length.out = 101),
163
         annotation_colors = annoColor, border_color = NA)
164
165
```
166
According to the gene expression heat map, the samples with trisomy 12 show distinct expression pattern. 84 genes are significantly up-regulated in trisomy 12 samples and 37 genes are down-regulated in trisomy 12 samples (FDR =0.1 and log2FoldChange > 1.5). Among the 84 up-regulated genes, only 12 genes are from chromosome 12, suggested the distinct expression pattern of trisomy 12 samples can not be merely explained by gene dosage effect. 
167
168
169
## Gene set enrichment analysis
170
171
Gene set enrichment analysis using PAGE (Parametric Analysis of Gene Set Enrichment) was used to unravel the pathway activity changes underlying trisomy 12.
172
173
### Perform enrichment analysis
174
175
Function to run PAGE in R.
176
```{r}
177
runGSEA <- function(inputTab, gmtFile, GSAmethod="gsea", nPerm=1000){
178
    inGMT <- loadGSC(gmtFile,type="gmt")
179
    #re-rank by score
180
    rankTab <- inputTab[order(inputTab[,1],decreasing = TRUE),,drop=FALSE] 
181
    if (GSAmethod == "gsea"){
182
        #readin geneset database
183
        #GSEA analysis
184
        res <- runGSA(geneLevelStats = rankTab,
185
                      geneSetStat = GSAmethod,
186
                      adjMethod = "fdr", gsc=inGMT,
187
                      signifMethod = 'geneSampling', nPerm = nPerm)
188
        GSAsummaryTable(res)
189
    } else if (GSAmethod == "page"){
190
        res <- runGSA(geneLevelStats = rankTab,
191
                      geneSetStat = GSAmethod,
192
                      adjMethod = "fdr", gsc=inGMT,
193
                      signifMethod = 'nullDist')
194
        GSAsummaryTable(res)
195
    }
196
}
197
```
198
199
Function for plotting enrichment bar.
200
```{r}
201
plotEnrichmentBar <- function(resTab, pCut=0.05, ifFDR=FALSE,
202
                              setName="Signatures") {
203
  pList <- list()
204
  rowNum <- c()
205
  for (i in names(resTab)) {
206
    plotTab <- resTab[[i]]
207
    if (ifFDR) {
208
      plotTab <- dplyr::filter(
209
        plotTab, `p adj (dist.dir.up)` <= pCut | `p adj (dist.dir.dn)` <= pCut)
210
    } else {
211
      plotTab <- dplyr::filter(
212
        plotTab, `p (dist.dir.up)` <= pCut | `p (dist.dir.dn)` <= pCut)
213
    }
214
    if (nrow(plotTab) == 0) {
215
      print("No sets passed the criteria")
216
      next
217
    } else {
218
      #firstly, process the result table
219
      plotTab <- apply(plotTab, 1, function(x) {
220
        statSign <- as.numeric(x[3])
221
        data.frame(Name = x[1],
222
                   p = as.numeric(ifelse(statSign >= 0, x[4], x[6])),
223
                   geneNum = ifelse(statSign >= 0, x[8], x[9]),
224
                   Direction = ifelse(statSign > 0, "Up", "Down"),
225
                   stringsAsFactors = FALSE)
226
      }) %>% do.call(rbind,.)
227
228
      plotTab$Name <- sprintf("%s (%s)",plotTab$Name,plotTab$geneNum)
229
      plotTab <- plotTab[with(plotTab,order(Direction, p, decreasing=TRUE)),]
230
      plotTab$Direction <- factor(plotTab$Direction, levels = c("Down","Up"))
231
      plotTab$Name <- factor(plotTab$Name, levels = plotTab$Name)
232
      #plot the barplot
233
      pList[[i]] <- ggplot(data=plotTab, aes(x=Name, y= -log10(p),
234
                                             fill=Direction)) +
235
        geom_bar(position="dodge",stat="identity", width = 0.5) +
236
        scale_fill_manual(values=c(Up = "blue", Down = "red")) +
237
        coord_fixed(ratio = 0.5) + coord_flip() + xlab(setName) +
238
        ggtitle(i) + theme_bw() + theme(
239
          plot.title = element_text(face = "bold", hjust =0.5),
240
          axis.title = element_text(size=15))
241
      rowNum <-c(rowNum,nrow(plotTab))
242
    }
243
  }
244
245
  if (length(pList) == 0) {
246
    print("Nothing to plot")
247
  } else {
248
    rowNum <- rowNum
249
    grobList <- lapply(pList, ggplotGrob)
250
    grobList <- do.call(rbind,c(grobList,size="max"))
251
    panels <- grobList$layout$t[grep("panel", grobList$layout$name)]
252
    grobList$heights[panels] <- unit(rowNum, "null")
253
  }
254
  return(grobList)
255
}
256
```
257
258
Prepare input table for gene set enrichment analysis. A cut-off of raw p value < 0.05 was used to select genes for the analysis.
259
```{r,message=FALSE}
260
pCut <- 0.05
261
262
dataTab <- data.frame(DEres)
263
dataTab$ID <- rownames(dataTab)
264
265
#filter using raw pvalues
266
dataTab <- filter(dataTab, pvalue <= pCut) %>%
267
  arrange(pvalue) %>%
268
  mutate(Symbol = rowData(ddsCLL[ID,])$symbol)
269
dataTab <- dataTab[!duplicated(dataTab$Symbol),]
270
statTab <- data.frame(row.names = dataTab$Symbol, stat = dataTab$stat)
271
```
272
273
Gene set enrichment analysis using Hallmarks gene set from MsigDB.
274
```{r,fig.width=8, fig.height=6, message=FALSE}
275
hallmarkRes <- list()
276
277
#run PAGE
278
resTab <- runGSEA(statTab, gmts$H ,GSAmethod = "page")
279
280
#remove the HALLMARK_
281
resTab$Name <- gsub("HALLMARK_","",resTab$Name)
282
283
hallmarkRes[["Gene set enrichment analysis"]] <- 
284
  arrange(resTab,desc(`Stat (dist.dir)`))
285
286
hallBar <- plotEnrichmentBar(hallmarkRes, pCut = 0.01, ifFDR = TRUE,
287
                             setName = "Hallmark gene sets")
288
```
289
290
Gene set enrichment analysis using kegg gene set from MsigDB.
291
```{r, message=FALSE}
292
keggRes <- list()
293
294
resTab <- runGSEA(statTab,gmts$KEGG,GSAmethod = "page")
295
296
#remove the KEGG_
297
resTab$Name <- gsub("KEGG_","",resTab$Name)
298
299
keggRes[["Gene set enrichment analysis"]] <- resTab
300
301
keggBar <- plotEnrichmentBar(keggRes, pCut = 0.01, ifFDR = TRUE,
302
                             setName = "KEGG gene sets")
303
```
304
305
306
### Heatmap for selected gene sets
307
308
Heatmap plots were used to show the expression values of differentially expressed genes from KEGG_CHEMOKINE_SIGNALING_PATHWAY gene set
309
310
Prepare the matrix for heatmap plot.
311
```{r}
312
#select differentially expressed genes
313
fdrCut <- 0.05
314
cytoDE <- data.frame(DEres) %>% rownames_to_column(var = "ID") %>% 
315
  mutate(Symbol = rowData(ddsCLL[ID,])$symbol,
316
         Chr=rowData(ddsCLL[ID,])$chromosome) %>% 
317
  filter(padj <= fdrCut, log2FoldChange > 0) %>% 
318
  arrange(pvalue) %>% filter(!duplicated(Symbol)) %>%
319
  mutate(Chr12 = ifelse(Chr == 12, "yes", "no"))
320
321
#get the expression matrix
322
plotMat <- assay(ddsCLL.norm[cytoDE$ID,])
323
colnames(plotMat) <- ddsCLL.norm$PatID
324
rownames(plotMat) <- cytoDE$Symbol
325
326
#sort columns of plot matrix based on trisomy 12 status
327
plotMat <- plotMat[,order(ddsCLL.norm$trisomy12)]
328
329
#calculate z-score and sensor
330
plotMat <- t(scale(t(plotMat)))
331
plotMat[plotMat >= 4] <- 4
332
plotMat[plotMat <= -4] <- -4
333
334
annoCol <- data.frame(row.names = ddsCLL.norm$PatID,
335
                      Tris12 = ddsCLL.norm$trisomy12)
336
levels(annoCol$Tris12) <- list(wt = 0, mut =1)
337
annoRow <- data.frame(row.names = cytoDE$Symbol, Chr12 = cytoDE$Chr12)
338
339
```
340
341
Heatmap for genes from KEGG_CHEMOKINE_SIGNALING_PATHWAY geneset.
342
```{r}
343
gsc <- loadGSC(gmts$KEGG)
344
geneList <- gsc$gsc$KEGG_CHEMOKINE_SIGNALING_PATHWAY
345
plotMat.chemo <- plotMat[rownames(plotMat) %in% geneList,]
346
keggHeatmap <- pheatmap(plotMat.chemo,
347
                        color = colorRampPalette(
348
                          rev(brewer.pal(n=7, name="RdBu")))(100),
349
         cluster_cols = FALSE, clustering_method = "ward.D2",
350
         annotation_row = annoRow, annotation_col = annoCol,
351
         show_colnames = FALSE, fontsize_row = 8,
352
         breaks = seq(-4,4, length.out = 101), treeheight_row = 0,
353
         annotation_colors = annoColor, border_color = NA,
354
         main = "CHEMOKINE_SIGNALING_PATHWAY",silent = TRUE)$gtable
355
```
356
357
Combine enrichment plot and heatmap plot.
358
```{r geneEnrichment_result, dev = c("png", "pdf"), fig.path=plotDir, fig.width = 14, fig.height = 13}
359
#FIG# S24 ABC
360
ggdraw() + 
361
  draw_plot(hallBar, 0, 0.7, 0.5, 0.3) + 
362
  draw_plot(keggBar, 0.5, 0.7, 0.5, 0.3) +
363
  draw_plot(keggHeatmap, 0.1, 0, 0.8, 0.65) +
364
  draw_plot_label(c("A","B","C"), c(0, 0.5, 0.1), c(1, 1, 0.7), 
365
                  fontface = "plain", size=20)
366
367
```
368
Based on the gene set enrichment analysis results,  genes from PI3K_ATK_MTOR pathway are significantly up-regulated in the samples with trisomy 12, which partially explained the increased sensitivity of trisomy 12 samples to PI3K and MTOR inhibitors. In addition, genes that are up-regulated in trisomy 12 are enrichment in chemokine signaling pathway. 
369
370
```{r, include=!exists(".standalone"), eval=!exists(".standalone")}
371
sessionInfo()
372
```
373
374
```{r, message=FALSE, warning=FALSE, include=FALSE}
375
rm(list=ls())
376
```