a b/Figures/clonevol/clonevol_P75_021118.R
1
library(dplyr)
2
library(plyr)
3
library(clonevol)
4
library(fishplot)
5
library(reshape)
6
####step1: Using mpileup to extract coverage for each site. (Prepared 13/08/18)
7
8
####step2: Prepare input for pyclone, using segments from ASCAT
9
10
####step3: Run Pyclone twice. The first run is to identify founding cluster, 
11
####       the second run adding --tumour_content based on the cellular prevalence for each biopsy (21/08/18)
12
13
###step4: using clonevol to build evolution tree and model (in this script) (22/08/18)
14
15
###       using Fishplot to visulaize
16
17
###P75, 4 biopsies, nFL
18
#T1:LN_right_neck
19
#T2:LN_right_inguinal
20
#T3:LN_left_supraclav
21
#T4:EN_right_subcutaneous
22
23
setwd("G:/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/")
24
25
pyclone_out<- read.table(file="ouput_pyclone/output_200X_i2/P75_200X_i2/tables/loci.tsv",sep="\t",header=T)
26
27
28
mutations<-cast(pyclone_out[,1:4], mutation_id~sample_id, value="cellular_prevalence")
29
id<-unique(pyclone_out[, c(1,3)] )
30
31
P_case<- merge(id, mutations,by="mutation_id")
32
33
vafs = data.frame(cluster=P_case$cluster_id,
34
                  T1_vaf=(P_case$`GCF0150-0075-T01`/2*100),
35
                  T2_vaf=(P_case$`GCF0150-0075-T02`/2*100),
36
                  T3_vaf=(P_case$`GCF0150-0075-T03`/2*100),
37
                  T4_vaf=(P_case$`GCF0150-0075-T04`/2*100),
38
                  stringsAsFactors=F)
39
40
vafs$cluster<- vafs$cluster+1
41
42
###needs manully check density plot to identify which cluster is founding cluster
43
##clonevol requires founding cluster=1
44
##in this case founding cluster is #1, 
45
46
max_id<- max(vafs$cluster)+1
47
48
vafs$cluster[vafs$cluster==1]<-max_id
49
vafs$cluster[vafs$cluster==3]<-1
50
vafs$cluster[vafs$cluster==max_id]<-3
51
52
53
54
samples<-c("P75_1","P75_2", "P75_3", "P75_4")
55
samples1<-c("P75_1\nPretreat\nLN_right_neck","P75_2\nRelapsed_1\nLN_right_inguinal", 
56
            "P75_3\nRelapsed_2\nLN_left_supraclav", "P75_4\nRelapsed_3\nEN_right_subcutaneous")
57
58
names(vafs)[2:5] = samples 
59
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
60
###########2. manual review density plot and exlcude cluster with small number of mutations
61
62
63
dir.create("./clonevol/P75_final")
64
##
65
66
67
##first: use all clusters, no consensus models
68
69
#vafs$P44_4[vafs$cluster==7]<-vafs$P44_4[vafs$cluster==7]-1
70
71
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
72
                          
73
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
74
                          founding.cluster=1,
75
                          cluster.center="mean", num.boots=1000,
76
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
77
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
78
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
79
                          founding.cluster=1,  
80
                          cluster.center="mean", num.boots=1000,ignore.clusters = c(2,8:11),
81
                          min.cluster.vaf=0.01,
82
                          sum.p=0.01, alpha=0.01)
83
84
85
86
87
88
89
#Finding consensus models across samples...
90
#Found  0 consensus model(s)
91
#Found 0 consensus model(s)
92
93
##second: ignore cluster ... consensus model based on manual review
94
95
96
#removing the clusters
97
98
vafs_used<- subset(vafs, !cluster %in% c(2,8:11))
99
100
#####rename clusters, cluster identities must be contiguous integer starting at 1
101
102
vafs_used$cluster[vafs_used$cluster==7]<-2
103
104
res = infer.clonal.models(variants=vafs_used, cluster.col.name="cluster", vaf.col.names=samples,
105
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
106
                          founding.cluster=1,
107
                          cluster.center="mean", num.boots=1000,
108
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
109
110
111
112
res<-convert.consensus.tree.clone.to.branch(res, branch.scale = 'sqrt')
113
114
pdf("./clonevol/P75_final/P75_trees.pdf", useDingbats = FALSE)
115
plot.all.trees.clone.as.branch(res, branch.width = 0.5, node.size = 1, node.label.size = 0.5)
116
dev.off()
117
118
119
plot.clonal.models(res,
120
                   # box plot parameters
121
                   box.plot = TRUE,
122
                   fancy.boxplot = TRUE,
123
                   fancy.variant.boxplot.highlight = 'is.driver',
124
                   fancy.variant.boxplot.highlight.shape = 21,
125
                   fancy.variant.boxplot.highlight.fill.color = 'red',
126
                   fancy.variant.boxplot.highlight.color = 'black',
127
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
128
                   fancy.variant.boxplot.highlight.note.color = 'blue',
129
                   fancy.variant.boxplot.highlight.note.size = 2,
130
                   fancy.variant.boxplot.jitter.alpha = 1,
131
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
132
                   fancy.variant.boxplot.base_size = 12,
133
                   fancy.variant.boxplot.plot.margin = 1,
134
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
135
                   # bell plot parameters
136
                   clone.shape = 'bell',
137
                   bell.event = TRUE,
138
                   bell.event.label.color = 'blue',
139
                   bell.event.label.angle = 60,
140
                   clone.time.step.scale = 1,
141
                   bell.curve.step = 2,
142
                   # node-based consensus tree parameters
143
                   merged.tree.plot = TRUE,
144
                   tree.node.label.split.character = NULL,
145
                   tree.node.shape = 'circle',
146
                   tree.node.size = 30,
147
                   tree.node.text.size = 0.5,
148
                   merged.tree.node.size.scale = 1.25,
149
                   merged.tree.node.text.size.scale = 2.5,
150
                   merged.tree.cell.frac.ci = FALSE,
151
                   # branch-based consensus tree parameters
152
                   merged.tree.clone.as.branch = TRUE,
153
                   mtcab.event.sep.char = ',',
154
                   mtcab.branch.text.size = 1,
155
                   mtcab.branch.width = 0.75,
156
                   mtcab.node.size = 3,
157
                   mtcab.node.label.size = 1,
158
                   mtcab.node.text.size = 1.5,
159
                   # cellular population parameters
160
                   cell.plot = TRUE,
161
                   num.cells = 100,
162
                   cell.border.size = 0.25,
163
                   cell.border.color = 'black',
164
                   clone.grouping = 'horizontal',
165
                   #meta-parameters
166
                   scale.monoclonal.cell.frac = TRUE,
167
                   show.score = FALSE,
168
                   cell.frac.ci = TRUE,
169
                   disable.cell.frac = FALSE,
170
                   # output figure parameters
171
                   out.dir = './clonevol/P75_final/',
172
                   out.format = 'pdf',
173
                   overwrite.output = TRUE,
174
                   width = 12,
175
                   height = 7,
176
                   # vector of width scales for each panel from left to right
177
                   panel.widths = c(3,4,2,4,2))
178
179
180
plot.clonal.models(res,
181
                   # box plot parameters
182
                   box.plot = TRUE,
183
                   fancy.boxplot = TRUE,
184
                   fancy.variant.boxplot.highlight = 'is.driver',
185
                   fancy.variant.boxplot.highlight.shape = 21,
186
                   fancy.variant.boxplot.highlight.fill.color = 'red',
187
                   fancy.variant.boxplot.highlight.color = 'black',
188
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
189
                   fancy.variant.boxplot.highlight.note.color = 'blue',
190
                   fancy.variant.boxplot.highlight.note.size = 2,
191
                   fancy.variant.boxplot.jitter.alpha = 1,
192
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
193
                   fancy.variant.boxplot.base_size = 12,
194
                   fancy.variant.boxplot.plot.margin = 1,
195
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
196
                   # bell plot parameters
197
                   clone.shape = 'bell',
198
                   bell.event = TRUE,
199
                   bell.event.label.color = 'blue',
200
                   bell.event.label.angle = 60,
201
                   clone.time.step.scale = 1,
202
                   bell.curve.step = 2,
203
                   # node-based consensus tree parameters
204
                   merged.tree.plot = TRUE,
205
                   tree.node.label.split.character = NULL,
206
                   tree.node.shape = 'circle',
207
                   tree.node.size = 30,
208
                   tree.node.text.size = 0.5,
209
                   merged.tree.node.size.scale = 1.25,
210
                   merged.tree.node.text.size.scale = 2.5,
211
                   merged.tree.cell.frac.ci = FALSE,
212
                   # branch-based consensus tree parameters
213
                   merged.tree.clone.as.branch = TRUE,
214
                   mtcab.event.sep.char = ',',
215
                   mtcab.branch.text.size = 1,
216
                   mtcab.branch.width = 0.75,
217
                   mtcab.node.size = 3,
218
                   mtcab.node.label.size = 1,
219
                   mtcab.node.text.size = 1.5,
220
                   # cellular population parameters
221
                   cell.plot = TRUE,
222
                   num.cells = 100,
223
                   cell.border.size = 0.25,
224
                   cell.border.color = 'black',
225
                   clone.grouping = 'horizontal',
226
                   #meta-parameters
227
                   scale.monoclonal.cell.frac = TRUE,
228
                   show.score = FALSE,
229
                   cell.frac.ci = TRUE,
230
                   disable.cell.frac = TRUE,
231
                   # output figure parameters
232
                   out.dir = './clonevol/P75_final/',
233
                   out.format = 'pdf',
234
                   overwrite.output = TRUE,
235
                   width = 12,
236
                   height = 7,
237
                   # vector of width scales for each panel from left to right
238
                   panel.widths = c(3,4,2,4,2))
239
240
241
##generating fish plot
242
f<- generateFishplotInputs(results = res)
243
fishes=createFishPlotObjects(f)
244
245
246
pdf('./clonevol/P75_final/P75_fish_200x_pyclone_anno.pdf', width=14, height=7)
247
for (i in 1:length(fishes)){
248
  
249
  fish = layoutClones(fishes[[i]])
250
  fish = setCol(fish,f$clonevol.clone.colors)
251
  fishPlot(fish,shape="spline", title.btm="P1", cex.title=0.7,cex.vlab = 1.4,
252
           vlines=seq(1, length(samples1)), vlab=samples1, pad.left=0.5)
253
}
254
255
dev.off()
256
257
258
##other plots
259
260
pdf("./clonevol/P75_final/P75_box.pdf", width=3, height=3,useDingbats = FALSE, title='')
261
pp<-plot.variant.clusters(vafs_used,
262
                          cluster.col.name = 'cluster',
263
                          show.cluster.size = FALSE,
264
                          cluster.size.text.color = 'blue',
265
                          vaf.col.names = samples,
266
                          vaf.limits = 70,
267
                          sample.title.size = 20,
268
                          violin = FALSE,
269
                          box = FALSE,
270
                          jitter = TRUE,
271
                          jitter.shape = 1,
272
                          
273
                          jitter.size = 3,
274
                          jitter.alpha = 1,
275
                          jitter.center.method = 'median',
276
                          jitter.center.size = 1,
277
                          jitter.center.color = 'darkgray',
278
                          jitter.center.display.value = 'none',
279
                          highlight = 'is.driver',
280
                          highlight.shape = 21,
281
                          highlight.color = 'blue',
282
                          highlight.fill.color = 'green',
283
                          highlight.note.col.name = 'gene',
284
                          highlight.note.size = 2,
285
                          order.by.total.vaf = FALSE)
286
287
dev.off()
288
289
290
plot.pairwise(vafs_used, col.names = samples,
291
              out.prefix = './clonevol/P75_final/P75_variants.pairwise.plot')
292
293
294
pdf('./clonevol/P75_final/P75_flow.pdf')
295
plot.cluster.flow(vafs_used, vaf.col.names = samples,
296
                  sample.names = samples1)
297
dev.off()
298
299
300
Pcase<-do.call("rbind", lapply( list.files("input_pyclone_271018_newpara/200X/GCF0150-0075-N01_200X/",full=TRUE),
301
                                read.table, header=TRUE, sep="\t"))
302
library(ggplot2)
303
304
305
306
pdf("clonevol/P75_final/coverage.pdf")
307
ggplot(Pcase, aes(x=(var_counts+ref_counts)))+
308
  geom_histogram(position="dodge")+
309
  facet_grid(~sample)
310
311
dev.off()
312
save.image("G:/FL_resequncing/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/P75.RData")