a b/Figures/clonevol/clonevol_P6B_060219.R
1
library(dplyr)
2
library(plyr)
3
library(clonevol)
4
library(fishplot)
5
library(reshape)
6
7
####coverage: from reseq BAM files 
8
####step1: Using mpileup to extract coverage for each site. (Prepared 27/10/18), default parameter
9
10
####step2: Prepare input for pyclone, using segments from ASCAT
11
12
####step3: Run Pyclone twice. The first run is to identify founding cluster, 
13
####       the second run adding --tumour_content based on the cellular prevalence for each biopsy (29/10/18)
14
15
###step4: using clonevol to build evolution tree and model (in this script) (30/10/18)
16
17
###       using Fishplot to visulaize
18
19
##P1: 2 biopsies, clinic transformed
20
21
#T1:LN_right_neck
22
#T2:LN_left_inguinal
23
24
setwd("E:/FL_resequncing/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/")
25
26
pyclone_out<- read.table(file="ouput_pyclone/output_200X_i2/P6B_i2/tables/loci.tsv",sep="\t",header=T)
27
28
mutations<-cast(pyclone_out[,1:4], mutation_id~sample_id, value="cellular_prevalence")
29
id<-unique(pyclone_out[, c(1,3)] )
30
31
P_case<- merge(id, mutations,by="mutation_id")
32
33
vafs = data.frame(cluster=P_case$cluster_id,
34
                  T1_vaf=(P_case$`JP6_1`/2)*100,
35
                  T2_vaf=(P_case$`JP6_2`/2)*100,
36
                  stringsAsFactors=F)
37
38
vafs$cluster<- vafs$cluster+1
39
40
###needs manully check density plot to identify which cluster is founding cluster
41
##clonevol requires founding cluster=1
42
##in this case founding cluster is #3
43
44
max_id<- max(vafs$cluster)+1
45
46
vafs$cluster[vafs$cluster==1]<-max_id
47
vafs$cluster[vafs$cluster==4]<-1
48
vafs$cluster[vafs$cluster==max_id]<-4
49
50
51
52
samples<-c("JP6_1","JP6_2")
53
samples1<- c("JP6_1\nRelasped_tFL1","JP6_2\nRelapsed_tFL2")
54
55
names(vafs)[2:3] = samples 
56
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
57
###########2. manual review density plot and exlcude cluster with small number of mutations
58
59
60
dir.create("./clonevol/P6B")
61
##
62
63
64
##first: use all clusters, no consensus models
65
66
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
67
                          
68
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
69
                          founding.cluster=1,
70
                          cluster.center="mean", num.boots=1000,
71
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
72
73
74
#Finding consensus models across samples...
75
#Found  0 consensus model(s)
76
#Found 0 consensus model(s)
77
78
79
##second: ignore cluster 5:7 consensus model based on manual review
80
81
##to be able to run clonevol, have to change the cellular_prevalance
82
vafs$P1_2[vafs$cluster=="2"]<- vafs$P1_2[vafs$cluster=="2"]-1.2
83
84
vafs_used<- subset(vafs, !cluster %in% c(3,6,7,9:27))
85
86
vafs_used$cluster[vafs_used$cluster=="8"]<- 3
87
88
89
res = infer.clonal.models(variants=vafs_used, cluster.col.name="cluster", vaf.col.names=samples,
90
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
91
                          founding.cluster=1,
92
                          cluster.center="mean", num.boots=1000,
93
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
94
95
96
97
res<-convert.consensus.tree.clone.to.branch(res, branch.scale = 'sqrt')
98
99
pdf("./clonevol/P6B/P6B_trees.pdf", useDingbats = FALSE)
100
plot.all.trees.clone.as.branch(res, branch.width = 0.5, node.size = 1, node.label.size = 0.5)
101
dev.off()
102
103
104
105
plot.clonal.models(res,
106
                   # box plot parameters
107
                   box.plot = TRUE,
108
                   fancy.boxplot = TRUE,
109
                   fancy.variant.boxplot.highlight = 'is.driver',
110
                   fancy.variant.boxplot.highlight.shape = 21,
111
                   fancy.variant.boxplot.highlight.fill.color = 'red',
112
                   fancy.variant.boxplot.highlight.color = 'black',
113
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
114
                   fancy.variant.boxplot.highlight.note.color = 'blue',
115
                   fancy.variant.boxplot.highlight.note.size = 2,
116
                   fancy.variant.boxplot.jitter.alpha = 1,
117
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
118
                   fancy.variant.boxplot.base_size = 12,
119
                   fancy.variant.boxplot.plot.margin = 1,
120
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
121
                   # bell plot parameters
122
                   clone.shape = 'bell',
123
                   bell.event = TRUE,
124
                   bell.event.label.color = 'blue',
125
                   bell.event.label.angle = 60,
126
                   clone.time.step.scale = 1,
127
                   bell.curve.step = 2,
128
                   # node-based consensus tree parameters
129
                   merged.tree.plot = TRUE,
130
                   tree.node.label.split.character = NULL,
131
                   tree.node.shape = 'circle',
132
                   tree.node.size = 30,
133
                   tree.node.text.size = 0.5,
134
                   merged.tree.node.size.scale = 1.25,
135
                   merged.tree.node.text.size.scale = 2,
136
                   merged.tree.cell.frac.ci = FALSE,
137
                   # branch-based consensus tree parameters
138
                   merged.tree.clone.as.branch = TRUE,
139
                   mtcab.event.sep.char = ',',
140
                   mtcab.branch.text.size = 1,
141
                   mtcab.branch.width = 0.75,
142
                   mtcab.node.size = 3,
143
                   mtcab.node.label.size = 1,
144
                   mtcab.node.text.size = 1.5,
145
                   # cellular population parameters
146
                   cell.plot = TRUE,
147
                   num.cells = 100,
148
                   cell.border.size = 0.25,
149
                   cell.border.color = 'black',
150
                   clone.grouping = 'horizontal',
151
                   #meta-parameters
152
                   scale.monoclonal.cell.frac = TRUE,
153
                   show.score = FALSE,
154
                   cell.frac.ci = TRUE,
155
                   disable.cell.frac = TRUE,
156
                   # output figure parameters
157
                   out.dir = './clonevol/P6B/',
158
                   out.format = 'pdf',
159
                   overwrite.output = TRUE,
160
                   width = 10,
161
                   height = 4,
162
                   # vector of width scales for each panel from left to right
163
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
164
dev.off()
165
166
167
plot.clonal.models(res,
168
                   # box plot parameters
169
                   box.plot = TRUE,
170
                   fancy.boxplot = TRUE,
171
                   fancy.variant.boxplot.highlight = 'is.driver',
172
                   fancy.variant.boxplot.highlight.shape = 21,
173
                   fancy.variant.boxplot.highlight.fill.color = 'red',
174
                   fancy.variant.boxplot.highlight.color = 'black',
175
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
176
                   fancy.variant.boxplot.highlight.note.color = 'blue',
177
                   fancy.variant.boxplot.highlight.note.size = 2,
178
                   fancy.variant.boxplot.jitter.alpha = 1,
179
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
180
                   fancy.variant.boxplot.base_size = 12,
181
                   fancy.variant.boxplot.plot.margin = 1,
182
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
183
                   # bell plot parameters
184
                   clone.shape = 'bell',
185
                   bell.event = TRUE,
186
                   bell.event.label.color = 'blue',
187
                   bell.event.label.angle = 60,
188
                   clone.time.step.scale = 1,
189
                   bell.curve.step = 2,
190
                   # node-based consensus tree parameters
191
                   merged.tree.plot = TRUE,
192
                   tree.node.label.split.character = NULL,
193
                   tree.node.shape = 'circle',
194
                   tree.node.size = 30,
195
                   tree.node.text.size = 0.5,
196
                   merged.tree.node.size.scale = 1.25,
197
                   merged.tree.node.text.size.scale = 2,
198
                   merged.tree.cell.frac.ci = FALSE,
199
                   # branch-based consensus tree parameters
200
                   merged.tree.clone.as.branch = TRUE,
201
                   mtcab.event.sep.char = ',',
202
                   mtcab.branch.text.size = 1,
203
                   mtcab.branch.width = 0.75,
204
                   mtcab.node.size = 3,
205
                   mtcab.node.label.size = 1,
206
                   mtcab.node.text.size = 1.5,
207
                   # cellular population parameters
208
                   cell.plot = TRUE,
209
                   num.cells = 100,
210
                   cell.border.size = 0.25,
211
                   cell.border.color = 'black',
212
                   clone.grouping = 'horizontal',
213
                   #meta-parameters
214
                   scale.monoclonal.cell.frac = TRUE,
215
                   show.score = FALSE,
216
                   cell.frac.ci = TRUE,
217
                   disable.cell.frac = FALSE,
218
                   # output figure parameters
219
                   out.dir = './clonevol/P6B/',
220
                   out.format = 'pdf',
221
                   overwrite.output = TRUE,
222
                   width = 10,
223
                   height = 4,
224
                   # vector of width scales for each panel from left to right
225
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
226
227
###removing cell.frac annotation
228
229
230
231
plot.clonal.models(res,
232
                   # box plot parameters
233
                   box.plot = TRUE,
234
                   fancy.boxplot = TRUE,
235
                   fancy.variant.boxplot.highlight = 'is.driver',
236
                   fancy.variant.boxplot.highlight.shape = 21,
237
                   fancy.variant.boxplot.highlight.fill.color = 'red',
238
                   fancy.variant.boxplot.highlight.color = 'black',
239
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
240
                   fancy.variant.boxplot.highlight.note.color = 'blue',
241
                   fancy.variant.boxplot.highlight.note.size = 2,
242
                   fancy.variant.boxplot.jitter.alpha = 1,
243
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
244
                   fancy.variant.boxplot.base_size = 12,
245
                   fancy.variant.boxplot.plot.margin = 1,
246
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
247
                   # bell plot parameters
248
                   clone.shape = 'bell',
249
                   bell.event = TRUE,
250
                   bell.event.label.color = 'blue',
251
                   bell.event.label.angle = 60,
252
                   clone.time.step.scale = 1,
253
                   bell.curve.step = 2,
254
                   # node-based consensus tree parameters
255
                   merged.tree.plot = TRUE,
256
                   tree.node.label.split.character = NULL,
257
                   tree.node.shape = 'circle',
258
                   tree.node.size = 30,
259
                   tree.node.text.size = 0.5,
260
                   merged.tree.node.size.scale = 1.25,
261
                   merged.tree.node.text.size.scale = 2,
262
                   merged.tree.cell.frac.ci = FALSE,
263
                   # branch-based consensus tree parameters
264
                   merged.tree.clone.as.branch = TRUE,
265
                   mtcab.event.sep.char = ',',
266
                   mtcab.branch.text.size = 1,
267
                   mtcab.branch.width = 0.75,
268
                   mtcab.node.size = 3,
269
                   mtcab.node.label.size = 1,
270
                   mtcab.node.text.size = 1.5,
271
                   # cellular population parameters
272
                   cell.plot = TRUE,
273
                   num.cells = 100,
274
                   cell.border.size = 0.25,
275
                   cell.border.color = 'black',
276
                   clone.grouping = 'horizontal',
277
                   #meta-parameters
278
                   scale.monoclonal.cell.frac = TRUE,
279
                   show.score = FALSE,
280
                   cell.frac.ci = TRUE,
281
                   disable.cell.frac = TRUE,
282
                   # output figure parameters
283
                   out.dir = './clonevol/P1/',
284
                   out.format = 'pdf',
285
                   overwrite.output = TRUE,
286
                   width = 10,
287
                   height = 4,
288
                   # vector of width scales for each panel from left to right
289
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
290
291
292
##generating fish plot
293
f<- generateFishplotInputs(results = res)
294
fishes=createFishPlotObjects(f)
295
296
297
298
samples1<- c("JP6_1\ntFL1", "P6_2\ntFL2")
299
300
301
samples1<- c("JP6_1\nRelasped_tFL1","JP6_2\nRelapsed_tFL2")
302
303
304
305
306
pdf('./clonevol/P6B/P6B_fish_200x_pyclone_anno_new.pdf', width=14, height=7)
307
for (i in 1:length(fishes)){
308
  
309
  fish = layoutClones(fishes[[i]])
310
  fish = setCol(fish,f$clonevol.clone.colors)
311
  fishPlot(fish,shape="spline", title.btm="P1", cex.title=0.7,cex.vlab = 1.4,
312
           vlines=seq(1, length(samples1)), vlab=samples1, pad.left=0.5)
313
}
314
315
dev<-dev.off()
316
317
##other plots
318
319
pdf("./clonevol/P6B/P6B_box.pdf", width=3, height=3,useDingbats = FALSE, title='')
320
pp<-plot.variant.clusters(vafs_used,
321
                          cluster.col.name = 'cluster',
322
                          show.cluster.size = FALSE,
323
                          cluster.size.text.color = 'blue',
324
                          vaf.col.names = samples,
325
                          vaf.limits = 70,
326
                          sample.title.size = 20,
327
                          violin = FALSE,
328
                          box = FALSE,
329
                          jitter = TRUE,
330
                          jitter.shape = 1,
331
                          
332
                          jitter.size = 3,
333
                          jitter.alpha = 1,
334
                          jitter.center.method = 'median',
335
                          jitter.center.size = 1,
336
                          jitter.center.color = 'darkgray',
337
                          jitter.center.display.value = 'none',
338
                          highlight = 'is.driver',
339
                          highlight.shape = 21,
340
                          highlight.color = 'blue',
341
                          highlight.fill.color = 'green',
342
                          highlight.note.col.name = 'gene',
343
                          highlight.note.size = 2,
344
                          order.by.total.vaf = FALSE)
345
346
dev.off()
347
348
349
350
pdf('./clonevol/P6B/P6B_flow.pdf')
351
plot.cluster.flow(vafs_used, vaf.col.names = samples,
352
                  sample.names = c('tFL1', 'tFL2'))
353
dev.off()
354
355
####checking coverage for cluster 2 in P1_1
356
357
c1<- pyclone_out%>%filter(cluster_id=="1")
358
359
P1_1<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0001-N01_200X/GCF0150-0001-T01.txt",sep="\t",header=T)
360
P1_2<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0001-N01_200X/GCF0150-0001-T02.txt",sep="\t",header=T)
361
P1<- rbind(P1_1, P1_2)
362
363
c1inP1_1<- subset(P1_1, mutation_id %in% unique(c1$mutation_id))
364
365
########
366
#min (dp) =min(c1inP1_1$var_counts+c1inP1_1$ref_counts)=206
367
368
#max (dp) =max(c1inP1_1$var_counts+c1inP1_1$ref_counts)=1500
369
#median (dp) =median(c1inP1_1$var_counts+c1inP1_1$ref_counts)=463
370
#mean (dp) =mean(c1inP1_1$var_counts+c1inP1_1$ref_counts)=544.9
371
372
library(ggplot2)
373
374
ggplot(P1, aes(x=var_counts+ref_counts), group_by=sample)+geom_histogram()
375
376
pdf("clonevol/P1/coverage.pdf")
377
ggplot(P1, aes(x=var_counts+ref_counts, fill=sample))+geom_histogram()
378
379
dev.off()
380