a b/Figures/clonevol/clonevol_P4_311018.R
1
library(dplyr)
2
library(plyr)
3
library(clonevol)
4
library(fishplot)
5
library(reshape)
6
7
####coverage: from reseq BAM files 
8
####step1: Using mpileup to extract coverage for each site. (Prepared 27/10/18), default parameter
9
10
####step2: Prepare input for pyclone, using segments from ASCAT
11
12
####step3: Run Pyclone twice. The first run is to identify founding cluster, 
13
####       the second run adding --tumour_content based on the cellular prevalence for each biopsy (29/10/18)
14
15
###step4: using clonevol to build evolution tree and model (in this script) (30/10/18)
16
17
###       using Fishplot to visulaize
18
19
###P4, clinic_transform,2 biopsies
20
#T1:LN_left_axilla
21
#T2:LN_abdominal
22
23
24
setwd("F:/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/")
25
26
pyclone_out<- read.table(file="ouput_pyclone/output_200X_i2/P4_200X_i2/tables/loci.tsv",sep="\t",header=T)
27
28
mutations<-cast(pyclone_out[,1:4], mutation_id~sample_id, value="cellular_prevalence")
29
id<-unique(pyclone_out[, c(1,3)] )
30
31
P_case<- merge(id, mutations,by="mutation_id")
32
33
vafs = data.frame(cluster=P_case$cluster_id,
34
                  T1_vaf=(P_case$`GCF0150-0004-T01`/2)*100,
35
                  T2_vaf=(P_case$`GCF0150-0004-T02`/2)*100,
36
                  stringsAsFactors=F)
37
38
vafs$cluster<- vafs$cluster+1
39
40
###needs manully check density plot to identify which cluster is founding cluster
41
##clonevol requires founding cluster=1
42
43
44
45
46
samples<-c("P4_1","P4_2")
47
48
49
names(vafs)[2:3] = samples 
50
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
51
###########2. manual review density plot and exlcude cluster with small number of mutations
52
53
54
dir.create("./clonevol/P4")
55
##
56
57
58
##first: use all clusters, no consensus models
59
60
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
61
                          
62
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
63
                          founding.cluster=1,
64
                          cluster.center="mean", num.boots=1000,
65
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
66
67
68
#Finding consensus models across samples...
69
#Found  0 consensus model(s)
70
#Found 0 consensus model(s)
71
72
73
##second: ignore cluster 5:7 consensus model based on manual review
74
75
76
vafs_used<- subset(vafs, !cluster %in% c(5,7))
77
78
vafs_used$cluster[vafs_used$cluster=="6"]<- 5
79
80
res = infer.clonal.models(variants=vafs_used, cluster.col.name="cluster", vaf.col.names=samples,
81
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
82
                          founding.cluster=1,
83
                          cluster.center="mean", num.boots=1000,
84
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
85
86
87
88
res<-convert.consensus.tree.clone.to.branch(res, branch.scale = 'sqrt')
89
90
pdf("./clonevol/P4/P4_trees.pdf", useDingbats = FALSE)
91
plot.all.trees.clone.as.branch(res, branch.width = 0.5, node.size = 1, node.label.size = 0.5)
92
dev.off()
93
94
95
plot.clonal.models(res,
96
                   # box plot parameters
97
                   box.plot = TRUE,
98
                   fancy.boxplot = TRUE,
99
                   fancy.variant.boxplot.highlight = 'is.driver',
100
                   fancy.variant.boxplot.highlight.shape = 21,
101
                   fancy.variant.boxplot.highlight.fill.color = 'red',
102
                   fancy.variant.boxplot.highlight.color = 'black',
103
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
104
                   fancy.variant.boxplot.highlight.note.color = 'blue',
105
                   fancy.variant.boxplot.highlight.note.size = 2,
106
                   fancy.variant.boxplot.jitter.alpha = 1,
107
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
108
                   fancy.variant.boxplot.base_size = 12,
109
                   fancy.variant.boxplot.plot.margin = 1,
110
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
111
                   # bell plot parameters
112
                   clone.shape = 'bell',
113
                   bell.event = TRUE,
114
                   bell.event.label.color = 'blue',
115
                   bell.event.label.angle = 60,
116
                   clone.time.step.scale = 1,
117
                   bell.curve.step = 2,
118
                   # node-based consensus tree parameters
119
                   merged.tree.plot = TRUE,
120
                   tree.node.label.split.character = NULL,
121
                   tree.node.shape = 'circle',
122
                   tree.node.size = 30,
123
                   tree.node.text.size = 0.5,
124
                   merged.tree.node.size.scale = 1.25,
125
                   merged.tree.node.text.size.scale = 2,
126
                   merged.tree.cell.frac.ci = FALSE,
127
                   # branch-based consensus tree parameters
128
                   merged.tree.clone.as.branch = TRUE,
129
                   mtcab.event.sep.char = ',',
130
                   mtcab.branch.text.size = 1,
131
                   mtcab.branch.width = 0.75,
132
                   mtcab.node.size = 3,
133
                   mtcab.node.label.size = 1,
134
                   mtcab.node.text.size = 1.5,
135
                   # cellular population parameters
136
                   cell.plot = TRUE,
137
                   num.cells = 100,
138
                   cell.border.size = 0.25,
139
                   cell.border.color = 'black',
140
                   clone.grouping = 'horizontal',
141
                   #meta-parameters
142
                   scale.monoclonal.cell.frac = TRUE,
143
                   show.score = FALSE,
144
                   cell.frac.ci = TRUE,
145
                   disable.cell.frac = FALSE,
146
                   # output figure parameters
147
                   out.dir = './clonevol/P4/',
148
                   out.format = 'pdf',
149
                   overwrite.output = TRUE,
150
                   width = 10,
151
                   height = 4,
152
                   # vector of width scales for each panel from left to right
153
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
154
155
156
157
plot.clonal.models(res,
158
                   # box plot parameters
159
                   box.plot = TRUE,
160
                   fancy.boxplot = TRUE,
161
                   fancy.variant.boxplot.highlight = 'is.driver',
162
                   fancy.variant.boxplot.highlight.shape = 21,
163
                   fancy.variant.boxplot.highlight.fill.color = 'red',
164
                   fancy.variant.boxplot.highlight.color = 'black',
165
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
166
                   fancy.variant.boxplot.highlight.note.color = 'blue',
167
                   fancy.variant.boxplot.highlight.note.size = 2,
168
                   fancy.variant.boxplot.jitter.alpha = 1,
169
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
170
                   fancy.variant.boxplot.base_size = 12,
171
                   fancy.variant.boxplot.plot.margin = 1,
172
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
173
                   # bell plot parameters
174
                   clone.shape = 'bell',
175
                   bell.event = TRUE,
176
                   bell.event.label.color = 'blue',
177
                   bell.event.label.angle = 60,
178
                   clone.time.step.scale = 1,
179
                   bell.curve.step = 2,
180
                   # node-based consensus tree parameters
181
                   merged.tree.plot = TRUE,
182
                   tree.node.label.split.character = NULL,
183
                   tree.node.shape = 'circle',
184
                   tree.node.size = 30,
185
                   tree.node.text.size = 0.5,
186
                   merged.tree.node.size.scale = 1.25,
187
                   merged.tree.node.text.size.scale = 2,
188
                   merged.tree.cell.frac.ci = FALSE,
189
                   # branch-based consensus tree parameters
190
                   merged.tree.clone.as.branch = TRUE,
191
                   mtcab.event.sep.char = ',',
192
                   mtcab.branch.text.size = 1,
193
                   mtcab.branch.width = 0.75,
194
                   mtcab.node.size = 3,
195
                   mtcab.node.label.size = 1,
196
                   mtcab.node.text.size = 1.5,
197
                   # cellular population parameters
198
                   cell.plot = TRUE,
199
                   num.cells = 100,
200
                   cell.border.size = 0.25,
201
                   cell.border.color = 'black',
202
                   clone.grouping = 'horizontal',
203
                   #meta-parameters
204
                   scale.monoclonal.cell.frac = TRUE,
205
                   show.score = FALSE,
206
                   cell.frac.ci = TRUE,
207
                   disable.cell.frac = TRUE,
208
                   # output figure parameters
209
                   out.dir = './clonevol/P4/',
210
                   out.format = 'pdf',
211
                   overwrite.output = TRUE,
212
                   width = 10,
213
                   height = 4,
214
                   # vector of width scales for each panel from left to right
215
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
216
217
218
219
220
###removing cell.frac annotation
221
222
223
224
plot.clonal.models(res,
225
                   # box plot parameters
226
                   box.plot = TRUE,
227
                   fancy.boxplot = TRUE,
228
                   fancy.variant.boxplot.highlight = 'is.driver',
229
                   fancy.variant.boxplot.highlight.shape = 21,
230
                   fancy.variant.boxplot.highlight.fill.color = 'red',
231
                   fancy.variant.boxplot.highlight.color = 'black',
232
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
233
                   fancy.variant.boxplot.highlight.note.color = 'blue',
234
                   fancy.variant.boxplot.highlight.note.size = 2,
235
                   fancy.variant.boxplot.jitter.alpha = 1,
236
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
237
                   fancy.variant.boxplot.base_size = 12,
238
                   fancy.variant.boxplot.plot.margin = 1,
239
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
240
                   # bell plot parameters
241
                   clone.shape = 'bell',
242
                   bell.event = TRUE,
243
                   bell.event.label.color = 'blue',
244
                   bell.event.label.angle = 60,
245
                   clone.time.step.scale = 1,
246
                   bell.curve.step = 2,
247
                   # node-based consensus tree parameters
248
                   merged.tree.plot = TRUE,
249
                   tree.node.label.split.character = NULL,
250
                   tree.node.shape = 'circle',
251
                   tree.node.size = 30,
252
                   tree.node.text.size = 0.5,
253
                   merged.tree.node.size.scale = 1.25,
254
                   merged.tree.node.text.size.scale = 2,
255
                   merged.tree.cell.frac.ci = FALSE,
256
                   # branch-based consensus tree parameters
257
                   merged.tree.clone.as.branch = TRUE,
258
                   mtcab.event.sep.char = ',',
259
                   mtcab.branch.text.size = 1,
260
                   mtcab.branch.width = 0.75,
261
                   mtcab.node.size = 3,
262
                   mtcab.node.label.size = 1,
263
                   mtcab.node.text.size = 1.5,
264
                   # cellular population parameters
265
                   cell.plot = TRUE,
266
                   num.cells = 100,
267
                   cell.border.size = 0.25,
268
                   cell.border.color = 'black',
269
                   clone.grouping = 'horizontal',
270
                   #meta-parameters
271
                   scale.monoclonal.cell.frac = TRUE,
272
                   show.score = FALSE,
273
                   cell.frac.ci = TRUE,
274
                   disable.cell.frac = TRUE,
275
                   # output figure parameters
276
                   out.dir = './clonevol/P1/',
277
                   out.format = 'pdf',
278
                   overwrite.output = TRUE,
279
                   width = 10,
280
                   height = 4,
281
                   # vector of width scales for each panel from left to right
282
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
283
284
285
##generating fish plot
286
f<- generateFishplotInputs(results = res)
287
fishes=createFishPlotObjects(f)
288
289
290
samples2<- c("P4_1\nPretreat1\nLN_left_axilla", "P4_2\nPretreat2\nLN_abdominal")
291
292
pdf('./clonevol/P4/P4_fish_200x_pyclone_anno_loc.pdf', width=14, height=7)
293
for (i in 1:length(fishes)){
294
  
295
  fish = layoutClones(fishes[[i]])
296
  fish = setCol(fish,f$clonevol.clone.colors)
297
  fishPlot(fish,shape="spline", title.btm="P1", cex.title=0.7,cex.vlab = 1.4,
298
           vlines=seq(1, length(samples2)), vlab=samples2, pad.left=0.5)
299
}
300
dev.off()
301
302
303
304
##other plots
305
306
pdf("./clonevol/P4/P4_box.pdf", width=3, height=3,useDingbats = FALSE, title='')
307
pp<-plot.variant.clusters(vafs_used,
308
                          cluster.col.name = 'cluster',
309
                          show.cluster.size = FALSE,
310
                          cluster.size.text.color = 'blue',
311
                          vaf.col.names = samples,
312
                          vaf.limits = 70,
313
                          sample.title.size = 20,
314
                          violin = FALSE,
315
                          box = FALSE,
316
                          jitter = TRUE,
317
                          jitter.shape = 1,
318
                          
319
                          jitter.size = 3,
320
                          jitter.alpha = 1,
321
                          jitter.center.method = 'median',
322
                          jitter.center.size = 1,
323
                          jitter.center.color = 'darkgray',
324
                          jitter.center.display.value = 'none',
325
                          highlight = 'is.driver',
326
                          highlight.shape = 21,
327
                          highlight.color = 'blue',
328
                          highlight.fill.color = 'green',
329
                          highlight.note.col.name = 'gene',
330
                          highlight.note.size = 2,
331
                          order.by.total.vaf = FALSE)
332
333
dev.off()
334
335
336
337
pdf('./clonevol/P4/P4_flow.pdf')
338
plot.cluster.flow(vafs_used, vaf.col.names = samples,
339
                  sample.names = c('Pretreat1', 'Pretreat2'))
340
dev.off()
341
342
####checking coverage for cluster 2 in P1_1
343
344
c1<- pyclone_out%>%filter(cluster_id=="1")
345
346
P4_1<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0004-N01_200X/GCF0150-0004-T01.txt",sep="\t",header=T)
347
P4_2<- read.table(file="input_pyclone_271018_newpara/200X/GCF0150-0004-N01_200X/GCF0150-0004-T02.txt",sep="\t",header=T)
348
Pcase<- rbind(P4_1, P4_2)
349
350
c1inP4_1<- subset(P4_1, mutation_id %in% unique(c1$mutation_id))
351
352
########
353
#min (dp) =min(c1inP4_1$var_counts+c1inP4_1$ref_counts)=273
354
355
#max (dp) =max(c1inP4_1$var_counts+c1inP4_1$ref_counts)=648
356
#median (dp) =median(c1inP4_1$var_counts+c1inP4_1$ref_counts)=380
357
#mean (dp) =mean(c1inP4_1$var_counts+c1inP4_1$ref_counts)=417
358
359
library(ggplot2)
360
361
362
363
pdf("clonevol/P4/coverage.pdf")
364
ggplot(Pcase, aes(x=var_counts+ref_counts, fill=sample))+geom_histogram()
365
366
dev.off()
367