Diff of /Figures/Figure1_drivers.R [000000] .. [4ff317]

Switch to unified view

a b/Figures/Figure1_drivers.R
1
2
################################################
3
#  File Name:Figure1_drivers.R
4
#  Author: Baoyan Bai
5
6
7
#################################################
8
9
10
########Manually plot driver genes (DO NOT USE GENVISR)
11
########date: 20211012, plot 51 genes with VAF adjusted for tumor% >=0.15
12
########add clinic features: group, POD24 infor
13
########genes ordered based on mutation frequency
14
15
library(maftools)
16
library(plyr)
17
library(dplyr)
18
library(pheatmap)
19
library(xlsx)
20
library(reshape)
21
library(readxl)
22
library(ggplot2)
23
library(RColorBrewer)
24
library(egg)
25
setwd("~/Desktop/FL/fl_latest")
26
flreseq<- read.table(file="16_combined/fl44_reseq_final_060619.txt",sep="\t",header = T, 
27
                     stringsAsFactors = F,quote="")
28
29
drivers<- read.table(file="28_drivergenes/51drivers.txt",header=T,stringsAsFactors = F)
30
31
32
###clinical information
33
34
clinic_biopsy<- read.table(file="2_clinic_infor/FL94biopsies_Sigve_20211125.txt",sep="\t",header=T,stringsAsFactors = F)
35
clinic_biopsy$type[clinic_biopsy$type=="pre&transform"]<- "transform"
36
37
38
39
40
clinic_biopsy_redced<- clinic_biopsy%>%select(Sample_name, type, group, POD24.1)
41
names(clinic_biopsy_redced)<- c("sample", "type", "group", "POD24")
42
clinic_biopsy_redced.melt<- melt(clinic_biopsy_redced, id.vars = "sample")
43
clinic_biopsy_redced.melt$variable<- factor(clinic_biopsy_redced.melt$variable, levels=c("group","POD24","type"))
44
45
####clinic information
46
47
48
49
50
###making data.frame for waterfall plot
51
52
plot_df_all<- unique(flreseq%>%filter(Region=="Coding"& VARIANT!="Silent")%>%select(SAMPLE, SYMBOL,VARIANT))
53
names(plot_df_all)<- c("sample", "gene", "variant_class")
54
plot_df_all$variant_class[plot_df_all$variant_class=="Frame_Shift_Ins" |plot_df_all$variant_class=="Frame_Shift_Del"]<- "Frame_Shift"
55
plot_df_all$variant_class[plot_df_all$variant_class=="In_Frame_Ins" |plot_df_all$variant_class=="In_Frame_Del"]<- "In_Frame"
56
57
sample_order<- c(as.character(clinic_biopsy_redced$sample[clinic_biopsy_redced$group=="nFL"]),
58
                 as.character(clinic_biopsy_redced$sample[clinic_biopsy_redced$group=="tFL"& clinic_biopsy_redced$type!="transform"]) ,
59
                 as.character(clinic_biopsy_redced$sample[clinic_biopsy_redced$group=="tFL" & clinic_biopsy_redced$type=="transform"]))
60
61
remove_sample<- c("P34_1", "P35_1", "P58_2")
62
sample_order_94<- setdiff(sample_order, remove_sample )
63
64
65
66
67
68
69
70
plot_df_all$variant_class[plot_df_all$variant_class=="Translation_Start_Site" |plot_df_all$variant_class=="Nonstop_Mutation"]<- "Translation_Site"
71
72
plot_df_all$variant_class[plot_df_all$variant_class=="Missense_Mutation"]<- "Missense"
73
74
plot_df_all$variant_class[plot_df_all$variant_class=="Nonsense_Mutation"]<- "Nonsense"
75
76
plot_df_all$variant_class<- factor(plot_df_all$variant_class, 
77
                                   levels=c("Frame_Shift","Nonsense", "Splice_Site", "In_Frame", "Translation_Site", "Missense"))
78
79
##########
80
plot_df<- ddply(plot_df_all, .(sample, gene), mutate, variant_class_correct=min(as.integer( variant_class )))
81
82
83
84
plot_df$variant_1<-levels(plot_df$variant_class)[plot_df$variant_class_correct]
85
86
87
plot_df.reduced<- unique(plot_df%>%select(sample, gene, variant_1))
88
plot_df.cast<- cast(plot_df.reduced, gene~ sample)
89
90
plot_df.melt<- melt(plot_df.cast, id.vars="gene")
91
92
plot_df.drivers<- subset(plot_df.melt, gene %in%drivers$SYMBOL)
93
94
95
plot_df.drivers$sample<-factor(plot_df.drivers$sample, levels=sample_order_94)
96
plot_df.drivers$value<- as.character(plot_df.drivers$value)
97
98
99
plot_df.drivers$value<- factor(plot_df.drivers$value, 
100
                               levels=c("Frame_Shift", "Nonsense","Splice_Site", "In_Frame", "Translation_Site","Missense"))
101
102
103
#####sort genes based on patient-level frequency
104
105
plot_df_pat<- unique(flreseq%>%filter(Region=="Coding"& VARIANT!="Silent")%>%select(Patient, SYMBOL)%>% filter(SYMBOL %in% drivers$SYMBOL))
106
107
###########
108
109
drivers.sum.pat<- ddply(plot_df_pat, .(SYMBOL), summarise,, Patient=length(Patient))
110
111
drivers.sum.pat$SYMBOL<- reorder(drivers.sum.pat$SYMBOL, drivers.sum.pat$Patient)
112
113
114
115
###
116
plot_df.drivers$gene<- factor(plot_df.drivers$gene, levels=levels(drivers.sum.pat$SYMBOL))
117
118
119
mutation<-ggplot(plot_df.drivers, aes(x=sample, y=gene,fill=value))+geom_tile(color="gray", size=0.1)+
120
  scale_fill_manual(values=c("#e7298a","firebrick","navyblue","#1b9e77", "#7570b3","#66a61e","white"),na.translate=FALSE)+scale_x_discrete(drop=FALSE)+
121
  theme(panel.background = element_blank(),
122
        legend.text = element_text(size=14,face="bold"),
123
        legend.title = element_text(size=16,face="bold"),
124
        axis.ticks.x=element_blank(),
125
        axis.ticks.y=element_blank(),
126
        legend.position = "none",
127
        axis.text.x =element_blank(),
128
        axis.text.y =element_text(size=10,hjust =0),
129
        plot.margin = unit(c(0.5,0.5,-0.2,0.5), "cm"))+ xlab("")+ylab("")
130
guides(fill=guide_legend(title="Mutation Type"))
131
132
133
134
#####frequency plot
135
pat<- unique(clinic_biopsy%>% select(Patient, group))
136
137
drivers.pat.clinic<-merge(plot_df_pat, pat,by="Patient")
138
drivers.pat.clinic$SYMBOL<- factor(drivers.pat.clinic$SYMBOL, levels=levels(plot_df.drivers$gene))
139
drivers.pat.clinic$group<- factor(drivers.pat.clinic$group, levels=c("tFL", "nFL"))
140
141
plot_freq<- ggplot(drivers.pat.clinic, aes(x=SYMBOL, fill=group))+ geom_bar()+  scale_fill_manual(values=c("red","blue"))+
142
  coord_flip()+theme_bw()+ theme(axis.text.y = element_blank() ,
143
                                 axis.text.x=element_text(size=12),
144
                                 legend.position = c(0.80, 0.1),
145
                                 axis.ticks.y=element_blank(),
146
                                 plot.margin = unit(c(0.3,0.3,-0.8,-0.5), "cm"))+
147
  ylab("Number of patients")+xlab("")
148
149
freq<-plot_freq+ guides(fill = guide_legend(reverse=TRUE))
150
151
152
153
154
155
#######clinic information
156
clinic_biopsy_redced.melt$sample<- factor(clinic_biopsy_redced.melt$sample, levels=sample_order_94)
157
clinic.final<- subset(clinic_biopsy_redced.melt,sample %in%sample_order_94)
158
clinic.final$variable<- as.character(clinic.final$variable)
159
160
clinic.final$variable[clinic.final$variable=="group"]<- "Group"
161
clinic.final$variable[clinic.final$variable=="type"]<- "Biopsy"
162
clinic.final$variable<- factor(clinic.final$variable,levels=c("Group", "POD24", "Biopsy"))
163
164
clinic.final$value<- factor(clinic.final$value, levels=c("pre", "relapse", "transform", "nFL", "tFL","0", "1"))
165
clinic.plot<- ggplot(clinic.final, aes(x=sample, y=variable, fill=value))+geom_tile()+
166
  scale_fill_manual(values=c("dark cyan","#fbb4ae", "purple","#998EC3","#F1A340", "grey", "black"))+
167
  theme(panel.background = element_blank(),
168
        axis.ticks.x=element_blank(),
169
        legend.text = element_text(size=12,face="bold"),
170
        legend.title = element_text(size=14,face="bold"),
171
        axis.ticks.y=element_blank(),
172
        axis.text.x=element_blank(),
173
        legend.position="right",
174
        axis.text.y =element_text(size=14,face="bold"),
175
        plot.margin = unit(c(0.1,0.2,0,2,0.5), "cm"))+ xlab("")+ylab("")+
176
  guides(fill=guide_legend(ncol=2, title="Biopsy      Group"))
177
178
###select colors
179
180
"#998EC3","#F1A340",
181
     
182
clinic.plot.nolegend<- ggplot(clinic.final, aes(x=sample, y=variable, fill=value))+geom_tile()+
183
  scale_fill_manual(values=c("dark cyan","#fbb4ae", "purple","#998EC3","#F1A340", "grey", "black"))+
184
  theme(panel.background = element_blank(),
185
        axis.ticks.x=element_blank(),
186
        legend.text = element_text(size=12,face="bold"),
187
        legend.title = element_text(size=14,face="bold"),
188
        axis.ticks.y=element_blank(),
189
        axis.text.x=element_blank(),
190
        legend.position="null",
191
        axis.text.y =element_text(size=12),
192
        plot.margin = unit(c(-0.2,0.5,0.5,0.5), "cm"))+ xlab("")+ylab("")
193
194
##########
195
196
#####arrange the plots
197
198
199
pdf(file="figures/Final_figures/51drivers_complete_clinic_20211130.pdf", width=13,height=8)
200
ggarrange(mutation,  clinic.plot.nolegend, nrow=2, ncol=1,  heights=c(10,1))
201
202
dev.off()
203
204
###########stop
205
206
#####making forest plot
207
clinic<- read_excel("./2_clinic_infor/FL_Baoyan_clinical_file_FINAL_020719_use.xlsx", sheet = 1)
208
209
clinic$group<- ifelse(is.na(clinic$Date_transf), "nFL", "tFL")
210
211
clinic.info<- clinic%>%select(Patient_ID, group)
212
213
names(clinic.info)<- c("Patient", "Group")
214
215
flreseq$chrom<- as.character(lapply(strsplit(as.character(flreseq$VAR_ID), "\\_"),"[",1))
216
flreseq$pos<-as.character(lapply(strsplit(as.character(flreseq$VAR_ID), "\\_"),"[",2))
217
flreseq$ref<-as.character(lapply(strsplit(as.character(flreseq$VAR_ID), "\\_"),"[",3))
218
flreseq$alt<-as.character(lapply(strsplit(as.character(flreseq$VAR_ID), "\\_"),"[",4))
219
220
221
222
flreseq_sample.maf<- unique(flreseq%>%select(Patient, SYMBOL, chrom, pos, ref, alt, VARIANT,VARIANT_CLASS, HGVSp_short))
223
224
225
226
names(flreseq_sample.maf)<- c("Tumor_Sample_Barcode",
227
                              "Hugo_Symbol", "Chromosome", "Start_Position", "Reference_Allele", 
228
                              "Tumor_Seq_Allele2", "Variant_Classification", "Variant_Type", "HGVSp_short")
229
230
flreseq_sample.maf$End_Position<- flreseq_sample.maf$Start_Position  
231
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="snv"]<- "SNP"
232
flreseq_sample.maf$Variant_Classification[flreseq_sample.maf$Variant_Classification=="3' UTR"]<- "3'UTR"
233
flreseq_sample.maf$Variant_Classification[flreseq_sample.maf$Variant_Classification=="5' UTR"]<- "5'UTR"
234
235
236
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="deletion"]<- "DEL"
237
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="inDEL"]<- "DEL"
238
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="insertion"]<- "INS"
239
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="INdel"]<- "INS"
240
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="substitution"]<- "SUB"
241
flreseq_sample.maf$Variant_Type[flreseq_sample.maf$Variant_Type=="SNV"]<- "SNP"
242
#######seperate nFL and tFL
243
244
245
246
flreseq.nFL<- subset(flreseq_sample.maf, Tumor_Sample_Barcode%in% clinic.info$Patient[clinic.info$Group=="nFL"])
247
flreseq.tFL<- subset(flreseq_sample.maf, Tumor_Sample_Barcode%in% clinic.info$Patient[clinic.info$Group=="tFL"])
248
249
###taking only drivers
250
flreseq.nFL.drivers<- flreseq.nFL%>%filter (Hugo_Symbol%in% drivers$SYMBOL)
251
flreseq.tFL.drivers<- flreseq.tFL%>%filter (Hugo_Symbol%in% drivers$SYMBOL)
252
253
#####
254
255
256
nFL.maf<- read.maf(flreseq.nFL.drivers)
257
tFL.maf<- read.maf(flreseq.tFL.drivers)
258
nFL.vs.tFL<- mafCompare(m1=nFL.maf, m2=tFL.maf, m1Name = "nFL group", m2Name = "tFL group", minMut=0)
259
print(nFL.vs.tFL)
260
261
262
263
drivers.sum.pat$SYMBOL<- reorder(drivers.sum.pat$SYMBOL, drivers.sum.pat$Patient)
264
265
trace(forestPlot, edit=TRUE)
266
###change
267
#m.sigs$Hugo_Symbol <- factor(m.sigs$Hugo_Symbol, levels = levels(drivers.sum.pat$SYMBOL))
268
#m.sigs = m.sigs[order(m.sigs$Hugo_Symbol)]
269
270
#m.sigs$or_new = ifelse(test = m.sigs$or > 3, yes = 3, no = m.sigs$or)
271
#m.sigs$upper = ifelse(test = m.sigs$ci.up > 3, yes = 3, 
272
#                      no = m.sigs$ci.up)
273
#m.sigs$lower = ifelse(test = m.sigs$ci.low > 3, yes = 3, 
274
#                      no = m.sigs$ci.low)
275
276
#####arrage
277
278
#if (is.null(fdr)) {
279
#  m.sigs = mafCompareRes$results[pval < pVal]
280
#}
281
#else {
282
#  m.sigs = mafCompareRes$results[adjPval < fdr]
283
#}
284
285
pdf(file="2021_codes/figures/51drivers_complete_clinic_20211125.pdf", width=13, height=8)
286
ggarrange(mutation,  clinic.plot.nolegend, nrow=2,   heights=c(8.5,1))
287
288
dev.off()
289
290
pdf(file="figures/51drivers_forest_20211130.pdf",width=6, height=10)
291
forestPlot(mafCompareRes = nFL.vs.tFL, color=c("#F1A340","#998EC3"),geneFontSize = 1,pVal = 1)
292
dev.off()
293
294
295
296
##################################
297
flreseq.ns<- unique(flreseq%>%filter(Region=="Coding" & VARIANT !="Silent")%>%select(Patient, SYMBOL))
298
clinic<- read.xlsx(file="2_clinic_infor/FL_Baoyan_clinical_file_FINAL_020719 - survival.xlsx",sheetIndex = 1,header=T)
299
fl.drivers<- subset(flreseq.ns, SYMBOL %in% drivers$Genes)
300
301
clinic$trans<- ifelse(is.na(clinic$Date_transf),"0","1")
302
patient<- clinic%>%select(Patient_ID, trans)
303
names(patient)<- c("Patient", "Trans_status")
304
305
fl.drivers.clinic<- merge(fl.drivers, patient, by="Patient")
306
fl.drivers.clinic$SYMBOL_ordered<- factor(fl.drivers.clinic$SYMBOL, levels=drivers$Genes)
307
308
fl.drivers.summary<-ddply(fl.drivers.clinic,.(SYMBOL, Trans_status), summarise, num_pat=length(Patient))
309
310
fl.drivers.sum.cast<- cast(fl.drivers.summary, SYMBOL~ Trans_status)
311
fl.drivers.sum.cast[is.na(fl.drivers.sum.cast)]<-0
312
313
fl.drivers.melt<- melt(fl.drivers.sum.cast, id.vars = 'SYMBOL')
314
315
fl.drivers.melt$value[fl.drivers.melt$Trans_status=="0"]<- fl.drivers.melt$value[fl.drivers.melt$Trans_status=="0"]*-1
316
317
fl.drivers.melt$SYMBOL<- factor(fl.drivers.melt$SYMBOL, levels=rev(drivers$Genes))
318
319
320
321
322
323
mutation_nolegend<-mutation<-ggplot(plot_df.drivers, aes(x=sample, y=gene,fill=value))+geom_tile(color="gray", size=0.1)+
324
  scale_fill_manual(values=c("#e7298a","firebrick","navyblue","#1b9e77", "#7570b3","#66a61e","white"),na.translate=FALSE)+scale_x_discrete(drop=FALSE)+
325
  theme(panel.background = element_blank(),
326
        legend.text = element_text(size=14,face="bold"),
327
        legend.title = element_text(size=16,face="bold"),
328
        axis.ticks.x=element_blank(),
329
        legend.position = "",
330
        axis.text.x =element_blank(),
331
        axis.ticks.y=element_blank(),
332
        axis.text.y=element_blank(),
333
        
334
        plot.margin = unit(c(0.2,-0.2,-0.5,0.1), "cm"))+ xlab("")+ylab("")+
335
  
336
  guides(fill=guide_legend(title="Mutation Type"))
337
338
clinic.nolegend<- ggplot(clinic.final, aes(x=sample, y=variable, fill=value))+geom_tile()+
339
  scale_fill_manual(values=c("dark cyan","#fbb4ae", "purple","blue", "red"))+
340
  theme(panel.background = element_blank(),
341
        axis.ticks.x=element_blank(),
342
        legend.text = element_text(size=14,face="bold"),
343
        legend.title = element_text(size=16,face="bold"),
344
        axis.ticks.y=element_blank(),
345
        axis.text.x=element_blank(),
346
        legend.position="",
347
        axis.text.y =element_text(size=16,face="bold"),
348
        plot.margin = unit(c(-0.5,0.2,0,2,0.1), "cm"))+ xlab("")+ylab("")+
349
  guides(fill=guide_legend(ncol=2, title="Biopsy      Group"))
350
351
352
353
354
355
356
357
358
359
360
mutation_legend<-mutation<-ggplot(plot_df.drivers, aes(x=sample, y=gene,fill=value))+geom_tile(color="gray", size=0.1)+
361
  scale_fill_manual(values=c("#e7298a","firebrick","navyblue","#1b9e77", "#7570b3","#66a61e","white"),na.translate=FALSE)+scale_x_discrete(drop=FALSE)+
362
  theme(panel.background = element_blank(),
363
        legend.text = element_text(size=14,face="bold"),
364
        legend.title = element_text(size=16,face="bold"),
365
        axis.ticks.x=element_blank(),
366
        legend.position = "left",
367
        axis.text.x =element_blank(),
368
        axis.ticks.y=element_blank(),
369
        axis.text.y=element_blank(),
370
        
371
        plot.margin = unit(c(0.2,-0.2,-0.5,0.1), "cm"))+ xlab("")+ylab("")+
372
  
373
  guides(fill=guide_legend(title="Mutation Type"))
374
375
376
clinic.legend<- ggplot(clinic.final, aes(x=sample, y=variable, fill=value))+geom_tile()+
377
  scale_fill_manual(values=c("dark cyan","#fbb4ae", "purple","blue", "red"))+
378
  theme(panel.background = element_blank(),
379
        axis.ticks.x=element_blank(),
380
        legend.text = element_text(size=14,face="bold"),
381
        legend.title = element_text(size=16,face="bold"),
382
        axis.ticks.y=element_blank(),
383
        axis.text.x=element_blank(),
384
        legend.position="left",
385
        axis.text.y =element_text(size=16,face="bold"),
386
        plot.margin = unit(c(-0.5,0.2,0,2,0.1), "cm"))+ xlab("")+ylab("")+
387
  guides(fill=guide_legend(ncol=2, title="Biopsy      Group"))
388
389