Diff of /Figures/Figure2_GD.R [000000] .. [4ff317]

Switch to unified view

a b/Figures/Figure2_GD.R
1
2
3
################################################
4
#  File Name:Figure2_GD.R
5
#  Author: Baoyan Bai
6
7
8
#################################################
9
10
11
12
##load required libraries
13
library("plyr")
14
library(dplyr)
15
library(reshape)
16
library(distances)
17
18
library(ggplot2)
19
library(xlsx)
20
library("gridExtra")
21
22
23
setwd("~/Desktop/FL/fl_latest")
24
#####Patients with mutiple biopsies
25
26
27
#####using clonal mutation from pyclone to calcuate genomic distance
28
#####Input file: output file from pyclone
29
setwd("~/Desktop/FL/11_pyclone/BED_bam/counts_new/pyclone_output_301018/ouput_pyclone/output_200X_i2")
30
31
empty<- setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("Pair", "Pat",  "distance"))
32
33
for (i in pat) {
34
  
35
  cluster<- read.table(file=paste(i, "/tables/cluster.tsv",sep=""), sep="\t", header=T, stringsAsFactors = F)
36
  cluster.cast<- data.frame(cast(cluster, cluster_id ~ sample_id, value= "mean"))
37
  cluster.cast$max<- do.call(pmax, cluster.cast[c(-1)])
38
  
39
  cluster.filter<- cluster.cast%>% dplyr::filter(max>0.4)
40
  
41
  loci<- read.table(file=paste(i, "/tables/loci.tsv",sep=""), header=T, sep="\t", stringsAsFactors = F)
42
  
43
  loci.fil<- subset(loci, cluster_id %in% cluster.filter$cluster_id)
44
  
45
  loci.fil.tmp<- loci.fil%>% select(mutation_id, sample_id, cellular_prevalence)
46
  loci.cast<- cast(loci.fil.tmp, mutation_id ~ sample_id, value="cellular_prevalence")
47
  
48
  loci.cast<- as.matrix(loci.cast)
49
  dis<- melt(matrix(dist(t(loci.cast))))
50
  names(dis)<- c("Pair", "Pat", "distance")
51
  dis$Pat<- i
52
  empty<- rbind(empty, dis)
53
}
54
write.table(empty, file="~/Desktop/FL/fl_latest/34_GDist/FL_genomicdist_clonal.txt", sep="\t", row.name=F,quote=F)
55
56
57
####Plot genomic distance, with clinical information
58
library(cowplot)
59
library(forcats)
60
library(plyr)
61
fldist<- read.table(file="FL_gDistance_update.txt",sep="\t",header=T, stringsAsFactors = F)
62
63
64
65
fldist$Group[fldist$Group=="clinial tFL"]<- "tFL"
66
67
68
pat.order<- ddply(fldist, .(Pat), summarise, largest_dist=max(Distance_CF))
69
70
pat.order$Pat<- reorder(pat.order$Pat, -pat.order$largest_dist)
71
fldist$Pat<- factor(fldist$Pat, levels=levels(pat.order$Pat))
72
names(fldist)[names(fldist)=="classify"]<- "Comparison"
73
fldist$Comparison[fldist$Comparison=="FL3B-tFL"]<- "FL-tFL"
74
fldist$Comparison[fldist$Comparison=="tFL-tFL"]<- "FL-tFL"
75
76
library(tidyverse)
77
dis_plot<-ggplot(fldist, aes(x=Pat, y=Distance_CF, color=Comparison,size=Interval))+ 
78
  geom_point(position=position_jitter(h=0.1, w=0.1),
79
             shape = 19,alpha=0.7)+
80
  scale_color_manual(values=c("dark blue", "dark red"))+
81
  theme(axis.title.x=element_blank(),
82
        axis.text.x=element_blank(),
83
        axis.text.y=element_text(size=12),
84
        axis.ticks.x=element_blank())+ ylab("Genomic Distance")+
85
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
86
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
87
88
89
library(reshape)
90
clinic<- xlsx2dfs("../2_clinic_infor/FL_Baoyan_clinical_file_FINAL_020719 - survival.xlsx")
91
clinic<- clinic$FL_Baoyan_clinical_file_FINAL_0
92
clinic$Patient_ID<- row.names(clinic)
93
clinic$Transf<- ifelse(is.na(clinic$Date_transf), "N", "Y")
94
clinic.info<- clinic%>%select(Patient_ID, Transf, POD24)
95
clinic.info.melt<- melt(clinic.info, id.vars = "Patient_ID")
96
names(clinic.info.melt)<- c("sample", "variable", "value")
97
98
clinic.info.melt$sample<- as.character(clinic.info.melt$sample)
99
100
pats.info<- unique(fldist%>%dplyr::select(Pat, Group))
101
pats.info$variable<- "Group"
102
103
104
clinic.info.30 <- subset(clinic.info.melt, sample %in% pats.info$Pat)
105
clinic.info.30$sample<- factor(clinic.info.30$sample, levels=levels(fldist$Pat))
106
107
clinic.info.30$value[clinic.info.30$value==0]<- "No"
108
clinic.info.30$value[clinic.info.30$value==1]<- "Yes"
109
clinic.info.30$value[clinic.info.30$value=="N"]<- "nFL"
110
clinic.info.30$value[clinic.info.30$value=="Y"]<- "tFL"
111
clinic.info.30$variable<- as.character(clinic.info.30$variable)
112
113
clinic.info.30$variable[clinic.info.30$variable=="Transf"]<- "Group"
114
clinic.info.30$variable<- factor(clinic.info.30$variable, levels=c( "Group", "POD24"))
115
clinic.info.30$value<- factor(clinic.info.30$value, levels=c("No", "Yes", "nFL", "tFL"))
116
117
118
clinic.anno<- ggplot(clinic.info.30, aes(x=sample,y=variable, fill=value))+ geom_tile(color="white")+
119
  scale_fill_manual(values = c("grey","black", "#998EC3","#F1A340"))+
120
  theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
121
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
122
                     axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()
123
  )+theme_void()+
124
  guides(fill=guide_legend(ncol=2))+ylab("")
125
126
pdf(file="fl_dist_sorted_20211213.pdf", width =7, height=5)
127
plot_grid(dis_plot, clinic.anno, ncol = 1, align = "v", rel_heights=c(6,2))
128
dev.off()
129
130
#########Violin plot
131
####violin plot to show the genomic distance between nFL and tFL groups
132
133
p1<- ggplot(fldist, aes(x=Group, y=Distance_CF)) + 
134
  geom_violin(trim=FALSE)
135
136
pdf(file="fldist_group_violin_20210609.pdf", width=5, height=5)
137
p1+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
138
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
139
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
140
        axis.text.x = element_text(size=12),
141
        axis.title.x =element_text(size=14),
142
        axis.title.y =element_text(size=14),
143
        axis.text.y = element_text(size=12))+ylab("Genomic Distance")+stat_compare_means()
144
# print: p-value 0.6
145
146
dev.off()
147
####violin plot to show the genomic distance between different comparisons
148
fldist$classify[fldist$classify=="tFL-tFL"]<- "FL-tFL"
149
fldist$classify[fldist$classify=="FL3B-tFL"]<- "tFL-tFL"
150
151
p2<- ggplot(fldist, aes(x=classify, y=Distance_CF)) + 
152
  geom_violin(trim=FALSE)
153
154
######Type of comparison
155
pdf(file="fldist_type_violin_202100609.pdf", width=5, height=5)
156
p2+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
157
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
158
        axis.text.x = element_text(size=12),
159
        axis.text.y =element_text(size=12),
160
        axis.title.x = element_text(size=14),
161
        axis.title.y =element_text(size=14),
162
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ 
163
  xlab("Type of Comparison")+ylab("Genomic Distance")+stat_compare_means()
164
#print: pvalue =0.37
165
dev.off()
166
167
###Based on POD status
168
169
170
p3<-ggplot(fldist, aes(x=POD24, y=Distance_CF)) + 
171
  geom_violin(trim=FALSE)
172
pdf(file="fldist_POD24_violin_20230913.pdf", width=5, height=5)
173
p3+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
174
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
175
        axis.text.x = element_text(size=12),
176
        axis.text.y =element_text(size=12),
177
        axis.title.x = element_text(size=14),
178
        axis.title.y =element_text(size=14),
179
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ 
180
  xlab("POD24 status")+ylab("Genomic Distance")+stat_compare_means()
181
dev.off()
182
183
pdf(file="fldist_POD24_violin_20230913_wPval.pdf", width=5, height=5)
184
p3+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
185
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
186
        axis.text.x = element_text(size=12),
187
        axis.text.y =element_text(size=12),
188
        axis.title.x = element_text(size=14),
189
        axis.title.y =element_text(size=14),
190
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ 
191
  xlab("POD24 status")+ylab("Genomic Distance")+stat_compare_means()
192
dev.off()
193
194
p1<- ggplot(fl.dist, aes(x=Defi_2020, y=Distance_CF))+geom_violin(trim=FALSE)
195
pdf(file="evolution_gDIST_group_wPvalue.pdf", width=5, height=5)
196
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
197
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
198
        axis.text.x = element_text(size=12),
199
        axis.text.y =element_text(size=12),
200
        axis.title.x = element_text(size=14),
201
        axis.title.y =element_text(size=14),
202
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+
203
  facet_wrap(~Group, scale="free")+stat_compare_means()
204
dev.off()
205
206
pdf(file="evolution_gDIST_group_woPvalue.pdf", width=8, height=5)
207
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
208
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
209
        axis.text.x = element_text(size=12),
210
        axis.text.y =element_text(size=12),
211
        axis.title.x = element_text(size=14),
212
        axis.title.y =element_text(size=14),
213
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+
214
  facet_wrap(~Group, scale="free")
215
dev.off()
216
217
pdf(file="evolution_gDIST_all_wPvalue.pdf", width=5, height=5)
218
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
219
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
220
        axis.text.x = element_text(size=12),
221
        axis.text.y =element_text(size=12),
222
        axis.title.x = element_text(size=14),
223
        axis.title.y =element_text(size=14),
224
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+
225
  stat_compare_means()
226
dev.off()
227
228
229
230
231
pdf(file="evolution_gDIST_all_woPvalue.pdf", width=5, height=5)
232
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + 
233
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
234
        axis.text.x = element_text(size=12),
235
        axis.text.y =element_text(size=12),
236
        axis.title.x = element_text(size=14),
237
        axis.title.y =element_text(size=14),
238
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")
239
240
dev.off()
241
242
#pval : 6.9e-05