Switch to unified view

a b/Figures/clonevol/clonevol_P8_311018.R
1
library(dplyr)
2
library(plyr)
3
library(clonevol)
4
library(fishplot)
5
library(reshape)
6
7
####coverage: from reseq BAM files 
8
####step1: Using mpileup to extract coverage for each site. (Prepared 27/10/18), default parameter
9
10
####step2: Prepare input for pyclone, using segments from ASCAT
11
12
####step3: Run Pyclone twice. The first run is to identify founding cluster, 
13
####       the second run adding --tumour_content based on the cellular prevalence for each biopsy (29/10/18)
14
15
###step4: using clonevol to build evolution tree and model (in this script) (30/10/18)
16
17
###       using Fishplot to visulaize
18
19
###P8, 2 biopsies, hist_transform
20
21
22
#T1:LN_right_neck
23
#T2:LN_right_axilla
24
25
26
setwd("F:/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/")
27
28
pyclone_out<- read.table(file="ouput_pyclone/output_200X_i2/P8_200X_i2/tables/loci.tsv",sep="\t",header=T)
29
30
mutations<-cast(pyclone_out[,1:4], mutation_id~sample_id, value="cellular_prevalence")
31
id<-unique(pyclone_out[, c(1,3)] )
32
33
P_case<- merge(id, mutations,by="mutation_id")
34
35
vafs = data.frame(cluster=P_case$cluster_id,
36
                  T1_vaf=(P_case$`GCF0150-0008-T01`/2)*100,
37
                  T2_vaf=(P_case$`GCF0150-0008-T02`/2)*100,
38
                  stringsAsFactors=F)
39
40
vafs$cluster<- vafs$cluster+1
41
42
###needs manully check density plot to identify which cluster is founding cluster
43
##clonevol requires founding cluster=1
44
45
max_id<- max(vafs$cluster)+1
46
47
vafs$cluster[vafs$cluster==1]<-max_id
48
vafs$cluster[vafs$cluster==2]<-1
49
vafs$cluster[vafs$cluster==max_id]<-2
50
51
52
53
samples<-c("P8_1","P8_2")
54
samples2<- c("P8_1\nPrimary\nLN_right_neck", "P8_2\nTransformed\nLN_right_axilla")
55
56
57
names(vafs)[2:3] = samples 
58
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
59
###########2. manual review density plot and exlcude cluster with small number of mutations
60
61
62
dir.create("./clonevol/P8")
63
##
64
65
66
##first: use all clusters, no consensus models
67
68
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
69
                          
70
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
71
                          founding.cluster=1,
72
                          cluster.center="mean", num.boots=1000,
73
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
74
75
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
76
                          
77
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
78
                          founding.cluster=1,ignore.clusters = c(4,7,8),
79
                          cluster.center="mean", num.boots=1000,
80
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
81
82
83
#Finding consensus models across samples...
84
#Found  0 consensus model(s)
85
#Found 0 consensus model(s)
86
87
88
##second: ignore cluster 5:7 consensus model based on manual review
89
90
91
vafs_used<- subset(vafs, !cluster %in% c(4,7,8))
92
93
vafs_used$cluster[vafs_used$cluster=="6"]<- 4
94
95
res = infer.clonal.models(variants=vafs_used, cluster.col.name="cluster", vaf.col.names=samples,
96
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
97
                          founding.cluster=1, 
98
                          cluster.center="mean", num.boots=1000,
99
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
100
101
102
103
res<-convert.consensus.tree.clone.to.branch(res, branch.scale = 'sqrt')
104
105
pdf("./clonevol/P8/P8_trees.pdf", useDingbats = FALSE)
106
plot.all.trees.clone.as.branch(res, branch.width = 0.5, node.size = 1, node.label.size = 0.5)
107
dev.off()
108
109
110
plot.clonal.models(res,
111
                   # box plot parameters
112
                   box.plot = TRUE,
113
                   fancy.boxplot = TRUE,
114
                   fancy.variant.boxplot.highlight = 'is.driver',
115
                   fancy.variant.boxplot.highlight.shape = 21,
116
                   fancy.variant.boxplot.highlight.fill.color = 'red',
117
                   fancy.variant.boxplot.highlight.color = 'black',
118
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
119
                   fancy.variant.boxplot.highlight.note.color = 'blue',
120
                   fancy.variant.boxplot.highlight.note.size = 2,
121
                   fancy.variant.boxplot.jitter.alpha = 1,
122
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
123
                   fancy.variant.boxplot.base_size = 12,
124
                   fancy.variant.boxplot.plot.margin = 1,
125
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
126
                   # bell plot parameters
127
                   clone.shape = 'bell',
128
                   bell.event = TRUE,
129
                   bell.event.label.color = 'blue',
130
                   bell.event.label.angle = 60,
131
                   clone.time.step.scale = 1,
132
                   bell.curve.step = 2,
133
                   # node-based consensus tree parameters
134
                   merged.tree.plot = TRUE,
135
                   tree.node.label.split.character = NULL,
136
                   tree.node.shape = 'circle',
137
                   tree.node.size = 30,
138
                   tree.node.text.size = 0.5,
139
                   merged.tree.node.size.scale = 1.25,
140
                   merged.tree.node.text.size.scale = 2,
141
                   merged.tree.cell.frac.ci = FALSE,
142
                   # branch-based consensus tree parameters
143
                   merged.tree.clone.as.branch = TRUE,
144
                   mtcab.event.sep.char = ',',
145
                   mtcab.branch.text.size = 1,
146
                   mtcab.branch.width = 0.75,
147
                   mtcab.node.size = 3,
148
                   mtcab.node.label.size = 1,
149
                   mtcab.node.text.size = 1.5,
150
                   # cellular population parameters
151
                   cell.plot = TRUE,
152
                   num.cells = 100,
153
                   cell.border.size = 0.25,
154
                   cell.border.color = 'black',
155
                   clone.grouping = 'horizontal',
156
                   #meta-parameters
157
                   scale.monoclonal.cell.frac = TRUE,
158
                   show.score = FALSE,
159
                   cell.frac.ci = TRUE,
160
                   disable.cell.frac = FALSE,
161
                   # output figure parameters
162
                   out.dir = './clonevol/P8/',
163
                   out.format = 'pdf',
164
                   overwrite.output = TRUE,
165
                   width = 10,
166
                   height = 4,
167
                   # vector of width scales for each panel from left to right
168
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
169
170
171
172
plot.clonal.models(res,
173
                   # box plot parameters
174
                   box.plot = TRUE,
175
                   fancy.boxplot = TRUE,
176
                   fancy.variant.boxplot.highlight = 'is.driver',
177
                   fancy.variant.boxplot.highlight.shape = 21,
178
                   fancy.variant.boxplot.highlight.fill.color = 'red',
179
                   fancy.variant.boxplot.highlight.color = 'black',
180
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
181
                   fancy.variant.boxplot.highlight.note.color = 'blue',
182
                   fancy.variant.boxplot.highlight.note.size = 2,
183
                   fancy.variant.boxplot.jitter.alpha = 1,
184
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
185
                   fancy.variant.boxplot.base_size = 12,
186
                   fancy.variant.boxplot.plot.margin = 1,
187
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
188
                   # bell plot parameters
189
                   clone.shape = 'bell',
190
                   bell.event = TRUE,
191
                   bell.event.label.color = 'blue',
192
                   bell.event.label.angle = 60,
193
                   clone.time.step.scale = 1,
194
                   bell.curve.step = 2,
195
                   # node-based consensus tree parameters
196
                   merged.tree.plot = TRUE,
197
                   tree.node.label.split.character = NULL,
198
                   tree.node.shape = 'circle',
199
                   tree.node.size = 30,
200
                   tree.node.text.size = 0.5,
201
                   merged.tree.node.size.scale = 1.25,
202
                   merged.tree.node.text.size.scale = 2,
203
                   merged.tree.cell.frac.ci = FALSE,
204
                   # branch-based consensus tree parameters
205
                   merged.tree.clone.as.branch = TRUE,
206
                   mtcab.event.sep.char = ',',
207
                   mtcab.branch.text.size = 1,
208
                   mtcab.branch.width = 0.75,
209
                   mtcab.node.size = 3,
210
                   mtcab.node.label.size = 1,
211
                   mtcab.node.text.size = 1.5,
212
                   # cellular population parameters
213
                   cell.plot = TRUE,
214
                   num.cells = 100,
215
                   cell.border.size = 0.25,
216
                   cell.border.color = 'black',
217
                   clone.grouping = 'horizontal',
218
                   #meta-parameters
219
                   scale.monoclonal.cell.frac = TRUE,
220
                   show.score = FALSE,
221
                   cell.frac.ci = TRUE,
222
                   disable.cell.frac = TRUE,
223
                   # output figure parameters
224
                   out.dir = './clonevol/P4/',
225
                   out.format = 'pdf',
226
                   overwrite.output = TRUE,
227
                   width = 10,
228
                   height = 4,
229
                   # vector of width scales for each panel from left to right
230
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
231
232
233
234
235
###removing cell.frac annotation
236
237
238
239
plot.clonal.models(res,
240
                   # box plot parameters
241
                   box.plot = TRUE,
242
                   fancy.boxplot = TRUE,
243
                   fancy.variant.boxplot.highlight = 'is.driver',
244
                   fancy.variant.boxplot.highlight.shape = 21,
245
                   fancy.variant.boxplot.highlight.fill.color = 'red',
246
                   fancy.variant.boxplot.highlight.color = 'black',
247
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
248
                   fancy.variant.boxplot.highlight.note.color = 'blue',
249
                   fancy.variant.boxplot.highlight.note.size = 2,
250
                   fancy.variant.boxplot.jitter.alpha = 1,
251
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
252
                   fancy.variant.boxplot.base_size = 12,
253
                   fancy.variant.boxplot.plot.margin = 1,
254
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
255
                   # bell plot parameters
256
                   clone.shape = 'bell',
257
                   bell.event = TRUE,
258
                   bell.event.label.color = 'blue',
259
                   bell.event.label.angle = 60,
260
                   clone.time.step.scale = 1,
261
                   bell.curve.step = 2,
262
                   # node-based consensus tree parameters
263
                   merged.tree.plot = TRUE,
264
                   tree.node.label.split.character = NULL,
265
                   tree.node.shape = 'circle',
266
                   tree.node.size = 30,
267
                   tree.node.text.size = 0.5,
268
                   merged.tree.node.size.scale = 1.25,
269
                   merged.tree.node.text.size.scale = 2,
270
                   merged.tree.cell.frac.ci = FALSE,
271
                   # branch-based consensus tree parameters
272
                   merged.tree.clone.as.branch = TRUE,
273
                   mtcab.event.sep.char = ',',
274
                   mtcab.branch.text.size = 1,
275
                   mtcab.branch.width = 0.75,
276
                   mtcab.node.size = 3,
277
                   mtcab.node.label.size = 1,
278
                   mtcab.node.text.size = 1.5,
279
                   # cellular population parameters
280
                   cell.plot = TRUE,
281
                   num.cells = 100,
282
                   cell.border.size = 0.25,
283
                   cell.border.color = 'black',
284
                   clone.grouping = 'horizontal',
285
                   #meta-parameters
286
                   scale.monoclonal.cell.frac = TRUE,
287
                   show.score = FALSE,
288
                   cell.frac.ci = TRUE,
289
                   disable.cell.frac = TRUE,
290
                   # output figure parameters
291
                   out.dir = './clonevol/P8/',
292
                   out.format = 'pdf',
293
                   overwrite.output = TRUE,
294
                   width = 10,
295
                   height = 4,
296
                   # vector of width scales for each panel from left to right
297
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
298
299
300
##generating fish plot
301
f<- generateFishplotInputs(results = res)
302
fishes=createFishPlotObjects(f)
303
304
305
samples2<- c("P8_1\nPrimary\nLN_right_neck", "P8_2\nTransformed\nLN_right_axilla")
306
307
pdf('./clonevol/P4/P4_fish_200x_pyclone_anno_loc.pdf', width=14, height=7)
308
for (i in 1:length(fishes)){
309
  
310
  fish = layoutClones(fishes[[i]])
311
  fish = setCol(fish,f$clonevol.clone.colors)
312
  fishPlot(fish,shape="spline", title.btm="P1", cex.title=0.7,cex.vlab = 1.4,
313
           vlines=seq(1, length(samples2)), vlab=samples2, pad.left=0.5)
314
}
315
dev.off()
316
317
318
319
pdf("./clonevol/P8/P8_box.pdf", width=3, height=3,useDingbats = FALSE, title='')
320
pp<-plot.variant.clusters(vafs_used,
321
                          cluster.col.name = 'cluster',
322
                          show.cluster.size = FALSE,
323
                          cluster.size.text.color = 'blue',
324
                          vaf.col.names = samples,
325
                          vaf.limits = 70,
326
                          sample.title.size = 20,
327
                          violin = FALSE,
328
                          box = FALSE,
329
                          jitter = TRUE,
330
                          jitter.shape = 1,
331
                          
332
                          jitter.size = 3,
333
                          jitter.alpha = 1,
334
                          jitter.center.method = 'median',
335
                          jitter.center.size = 1,
336
                          jitter.center.color = 'darkgray',
337
                          jitter.center.display.value = 'none',
338
                          highlight = 'is.driver',
339
                          highlight.shape = 21,
340
                          highlight.color = 'blue',
341
                          highlight.fill.color = 'green',
342
                          highlight.note.col.name = 'gene',
343
                          highlight.note.size = 2,
344
                          order.by.total.vaf = FALSE)
345
346
dev.off()
347
348
349
plot.pairwise(vafs_used, col.names = samples,
350
              out.prefix = './clonevol/P8/P8_variants.pairwise.plot')
351
352
353
pdf('./clonevol/P8/P8_flow.pdf')
354
plot.cluster.flow(vafs_used, vaf.col.names = samples,
355
                  sample.names = c('Primary', 'Transformed'))
356
dev.off()
357
358
359
####checking coverage f
360
361
c1<- pyclone_out%>%filter(cluster_id=="1")
362
363
P8_1<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0008-N01_200X/GCF0150-0008-T01.txt",sep="\t",header=T)
364
P8_2<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0008-N01_200X/GCF0150-0008-T02.txt",sep="\t",header=T)
365
Pcase<- rbind(P8_1, P8_2)
366
367
368
369
########
370
#min (dp) =min(c1inP4_1$var_counts+c1inP4_1$ref_counts)=273
371
372
#max (dp) =max(c1inP4_1$var_counts+c1inP4_1$ref_counts)=648
373
#median (dp) =median(c1inP4_1$var_counts+c1inP4_1$ref_counts)=380
374
#mean (dp) =mean(c1inP4_1$var_counts+c1inP4_1$ref_counts)=417
375
376
library(ggplot2)
377
378
379
380
pdf("clonevol/P8/coverage.pdf")
381
ggplot(Pcase, aes(x=var_counts+ref_counts, fill=sample))+geom_histogram()
382
383
dev.off()
384