Switch to unified view

a b/Figures/clonevol/clonevol_P46_191118.R
1
library(dplyr)
2
library(plyr)
3
library(clonevol)
4
library(fishplot)
5
library(reshape)
6
7
####coverage: from reseq BAM files 
8
####step1: Using mpileup to extract coverage for each site. (Prepared 27/10/18), default parameter
9
10
####step2: Prepare input for pyclone, using segments from ASCAT
11
12
####step3: Run Pyclone twice. The first run is to identify founding cluster, 
13
####       the second run adding --tumour_content based on the cellular prevalence for each biopsy (29/10/18)
14
15
###step4: using clonevol to build evolution tree and model (in this script) (30/10/18)
16
17
###       using Fishplot to visulaize
18
19
###       using Fishplot to visulaize
20
21
###P46, 3 biopsies, nFL
22
#T1:LN_left_inguinal
23
#T2:LN_left_neck
24
#T3:LN_left_neck
25
26
27
28
29
30
31
setwd("G:/FL_resequncing/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018")
32
33
pyclone_out<- read.table(file="ouput_pyclone/output_200X_i2/P46_200X_i2/tables/loci.tsv",sep="\t",header=T)
34
mutations<-cast(pyclone_out[,1:4], mutation_id~sample_id, value="cellular_prevalence")
35
id<-unique(pyclone_out[, c(1,3)] )
36
37
P_case<- merge(id, mutations,by="mutation_id")
38
39
vafs = data.frame(cluster=P_case$cluster_id,
40
                  T1_vaf=(P_case$`GCF0150-0046-T01`/2)*100,
41
                  T2_vaf=(P_case$`GCF0150-0046-T02`/2)*100,
42
                  T3_vaf=(P_case$`GCF0150-0046-T03`/2)*100,
43
                  stringsAsFactors=F)
44
45
vafs$cluster<- vafs$cluster+1
46
47
samples<-c("P46_1","P46_2", "P46_3")
48
samples1<-c("P46_1\nPrimary","P46_2\nRelapsed_1", "P46_3\nRelapsed_2")
49
samples2<-c("P46_1\nPrimary\nLN_left_inguinal","P46_2\nRelapsed_1\nLN_left_neck", "P46_3\nRelapsed_2\nLN_left_neck")
50
51
max_id<- max(vafs$cluster)+1
52
53
vafs$cluster[vafs$cluster==1]<-max_id
54
vafs$cluster[vafs$cluster==4]<-1
55
vafs$cluster[vafs$cluster==max_id]<-4
56
57
58
59
###needs manully check density plot to identify which cluster is founding cluster
60
##clonevol requires founding cluster=1
61
62
63
64
65
names(vafs)[2:4] = samples 
66
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
67
###########2. manual review density plot and exlcude cluster with small number of mutations
68
##step 2: run infer.clonal.models, run twice: 1. include all cluster. 
69
###########2. manual review density plot and exlcude cluster with small number of mutations
70
71
72
dir.create("./clonevol/P46")
73
##
74
75
76
##first: use all clusters, no consensus models
77
78
vafs$P46_3[vafs$cluster=="1"]<-vafs$P46_3[vafs$cluster=="1"]+ 0.5
79
#vafs$P27_2[vafs$cluster=="1"]<-vafs$P27_2[vafs$cluster=="1"]+ 0.5
80
#vafs$P27_3[vafs$cluster=="3"]<-vafs$P27_3[vafs$cluster=="3"]+1
81
82
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
83
                          
84
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
85
                          founding.cluster=1,
86
                          cluster.center="mean", num.boots=1000,
87
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
88
89
res = infer.clonal.models(variants=vafs, cluster.col.name="cluster", vaf.col.names=samples,
90
                          
91
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
92
                          founding.cluster=1,ignore.clusters = c(6,8,10,11:14),
93
                          cluster.center="mean", num.boots=1000,
94
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
95
96
vafs_used<- subset(vafs, !cluster %in% c(6,8,10,11:14))
97
vafs_used$cluster[vafs_used$cluster==9]<-6
98
99
res = infer.clonal.models(variants=vafs_used, cluster.col.name="cluster", vaf.col.names=samples,
100
                          
101
                          subclonal.test="bootstrap", subclonal.test.model="non-parametric",
102
                          founding.cluster=1,
103
                          cluster.center="mean", num.boots=1000,
104
                          min.cluster.vaf=0.01, sum.p=0.01, alpha=0.01)
105
106
107
108
109
res<-convert.consensus.tree.clone.to.branch(res, branch.scale = 'sqrt')
110
111
pdf("./clonevol/P46/P46_trees.pdf", useDingbats = FALSE)
112
plot.all.trees.clone.as.branch(res, branch.width = 0.5, node.size = 1, node.label.size = 0.5)
113
dev.off()
114
115
116
plot.clonal.models(res,
117
                   # box plot parameters
118
                   box.plot = TRUE,
119
                   fancy.boxplot = TRUE,
120
                   fancy.variant.boxplot.highlight = 'is.driver',
121
                   fancy.variant.boxplot.highlight.shape = 21,
122
                   fancy.variant.boxplot.highlight.fill.color = 'red',
123
                   fancy.variant.boxplot.highlight.color = 'black',
124
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
125
                   fancy.variant.boxplot.highlight.note.color = 'blue',
126
                   fancy.variant.boxplot.highlight.note.size = 2,
127
                   fancy.variant.boxplot.jitter.alpha = 1,
128
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
129
                   fancy.variant.boxplot.base_size = 12,
130
                   fancy.variant.boxplot.plot.margin = 1,
131
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
132
                   # bell plot parameters
133
                   clone.shape = 'bell',
134
                   bell.event = TRUE,
135
                   bell.event.label.color = 'blue',
136
                   bell.event.label.angle = 60,
137
                   clone.time.step.scale = 1,
138
                   bell.curve.step = 2,
139
                   # node-based consensus tree parameters
140
                   merged.tree.plot = TRUE,
141
                   tree.node.label.split.character = NULL,
142
                   tree.node.shape = 'circle',
143
                   tree.node.size = 30,
144
                   tree.node.text.size = 0.5,
145
                   merged.tree.node.size.scale = 1.25,
146
                   merged.tree.node.text.size.scale = 2,
147
                   merged.tree.cell.frac.ci = FALSE,
148
                   # branch-based consensus tree parameters
149
                   merged.tree.clone.as.branch = TRUE,
150
                   mtcab.event.sep.char = ',',
151
                   mtcab.branch.text.size = 1,
152
                   mtcab.branch.width = 0.75,
153
                   mtcab.node.size = 3,
154
                   mtcab.node.label.size = 1,
155
                   mtcab.node.text.size = 1.5,
156
                   # cellular population parameters
157
                   cell.plot = TRUE,
158
                   num.cells = 100,
159
                   cell.border.size = 0.25,
160
                   cell.border.color = 'black',
161
                   clone.grouping = 'horizontal',
162
                   #meta-parameters
163
                   scale.monoclonal.cell.frac = TRUE,
164
                   show.score = FALSE,
165
                   cell.frac.ci = TRUE,
166
                   disable.cell.frac = FALSE,
167
                   # output figure parameters
168
                   out.dir = './clonevol/P46/',
169
                   out.format = 'pdf',
170
                   overwrite.output = TRUE,
171
                   width = 10,
172
                   height = 4,
173
                   # vector of width scales for each panel from left to right
174
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
175
176
###removing cell.frac annotation
177
178
plot.clonal.models(res,
179
                   # box plot parameters
180
                   box.plot = TRUE,
181
                   fancy.boxplot = TRUE,
182
                   fancy.variant.boxplot.highlight = 'is.driver',
183
                   fancy.variant.boxplot.highlight.shape = 21,
184
                   fancy.variant.boxplot.highlight.fill.color = 'red',
185
                   fancy.variant.boxplot.highlight.color = 'black',
186
                   fancy.variant.boxplot.highlight.note.col.name = 'gene',
187
                   fancy.variant.boxplot.highlight.note.color = 'blue',
188
                   fancy.variant.boxplot.highlight.note.size = 2,
189
                   fancy.variant.boxplot.jitter.alpha = 1,
190
                   fancy.variant.boxplot.jitter.center.color = 'grey50',
191
                   fancy.variant.boxplot.base_size = 12,
192
                   fancy.variant.boxplot.plot.margin = 1,
193
                   fancy.variant.boxplot.vaf.suffix = '.VAF',
194
                   # bell plot parameters
195
                   clone.shape = 'bell',
196
                   bell.event = TRUE,
197
                   bell.event.label.color = 'blue',
198
                   bell.event.label.angle = 60,
199
                   clone.time.step.scale = 1,
200
                   bell.curve.step = 2,
201
                   # node-based consensus tree parameters
202
                   merged.tree.plot = TRUE,
203
                   tree.node.label.split.character = NULL,
204
                   tree.node.shape = 'circle',
205
                   tree.node.size = 30,
206
                   tree.node.text.size = 0.5,
207
                   merged.tree.node.size.scale = 1.25,
208
                   merged.tree.node.text.size.scale = 2,
209
                   merged.tree.cell.frac.ci = FALSE,
210
                   # branch-based consensus tree parameters
211
                   merged.tree.clone.as.branch = TRUE,
212
                   mtcab.event.sep.char = ',',
213
                   mtcab.branch.text.size = 1,
214
                   mtcab.branch.width = 0.75,
215
                   mtcab.node.size = 3,
216
                   mtcab.node.label.size = 1,
217
                   mtcab.node.text.size = 1.5,
218
                   # cellular population parameters
219
                   cell.plot = TRUE,
220
                   num.cells = 100,
221
                   cell.border.size = 0.25,
222
                   cell.border.color = 'black',
223
                   clone.grouping = 'horizontal',
224
                   #meta-parameters
225
                   scale.monoclonal.cell.frac = TRUE,
226
                   show.score = FALSE,
227
                   cell.frac.ci = TRUE,
228
                   disable.cell.frac = TRUE,
229
                   # output figure parameters
230
                   out.dir = './clonevol/P46/',
231
                   out.format = 'pdf',
232
                   overwrite.output = TRUE,
233
                   width = 10,
234
                   height = 4,
235
                   # vector of width scales for each panel from left to right
236
                   panel.widths = c(1.5,2.5,1.5,2.5,2))
237
238
239
##generating fish plot
240
f<- generateFishplotInputs(results = res)
241
fishes=createFishPlotObjects(f)
242
243
244
pdf('./clonevol/P46/P46_fish_200x_pyclone_anno_loc.pdf', width=14, height=7)
245
for (i in 1:(length(fishes))){
246
  
247
  fish = layoutClones(fishes[[i]])
248
  fish = setCol(fish,f$clonevol.clone.colors)
249
  fishPlot(fish,shape="bezier",  cex.title=0.7,cex.vlab = 1.4,
250
           vlines=seq(1, length(samples2)), vlab=samples2, pad.left=0.5)
251
}
252
dev.off()
253
254
255
256
pdf("./clonevol/P46/P46_box.pdf", width=3, height=3,useDingbats = FALSE, title='')
257
pp<-plot.variant.clusters(vafs_used,
258
                          cluster.col.name = 'cluster',
259
                          show.cluster.size = FALSE,
260
                          cluster.size.text.color = 'blue',
261
                          vaf.col.names = samples,
262
                          vaf.limits = 70,
263
                          sample.title.size = 20,
264
                          violin = FALSE,
265
                          box = FALSE,
266
                          jitter = TRUE,
267
                          jitter.shape = 1,
268
                          
269
                          jitter.size = 3,
270
                          jitter.alpha = 1,
271
                          jitter.center.method = 'median',
272
                          jitter.center.size = 1,
273
                          jitter.center.color = 'darkgray',
274
                          jitter.center.display.value = 'none',
275
                          highlight = 'is.driver',
276
                          highlight.shape = 21,
277
                          highlight.color = 'blue',
278
                          highlight.fill.color = 'green',
279
                          highlight.note.col.name = 'gene',
280
                          highlight.note.size = 2,
281
                          order.by.total.vaf = FALSE)
282
283
dev.off()
284
285
286
plot.pairwise(vafs_used, col.names = samples,
287
              out.prefix = './clonevol/P46/P46_variants.pairwise.plot')
288
289
290
pdf('./clonevol/P46/P46_flow.pdf')
291
plot.cluster.flow(vafs, vaf.col.names = samples,
292
                  sample.names = c('Primary', 'Relapsed_1', "Relapsed_2"))
293
dev.off()
294
295
296
####checking coverage 
297
Pcase<-do.call("rbind", lapply( list.files("input_pyclone_271018_newpara/200X/GCF0150-0046-N01_200X/",full=TRUE),
298
                                read.table, header=TRUE, sep="\t"))
299
300
301
302
303
########
304
#min (dp) =min(c1inP4_1$var_counts+c1inP4_1$ref_counts)=273
305
306
#max (dp) =max(c1inP4_1$var_counts+c1inP4_1$ref_counts)=648
307
#median (dp) =median(c1inP4_1$var_counts+c1inP4_1$ref_counts)=380
308
#mean (dp) =mean(c1inP4_1$var_counts+c1inP4_1$ref_counts)=417
309
310
library(ggplot2)
311
312
313
314
pdf("clonevol/P46/coverage.pdf")
315
ggplot(Pcase, aes(x=(var_counts+ref_counts)))+
316
  geom_histogram(position="dodge")+
317
  facet_grid(~sample)
318
319
dev.off()
320
321
save.image("G:/FL_resequncing/FL_exome_final/fl_latest/11_pyclone/BED_bam/counts_new/pyclone_output_301018/P46.RData")
322
323