|
a |
|
b/Figures/Figure2_GD.R |
|
|
1 |
|
|
|
2 |
|
|
|
3 |
################################################ |
|
|
4 |
# File Name:Figure2_GD.R |
|
|
5 |
# Author: Baoyan Bai |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
################################################# |
|
|
9 |
|
|
|
10 |
|
|
|
11 |
|
|
|
12 |
##load required libraries |
|
|
13 |
library("plyr") |
|
|
14 |
library(dplyr) |
|
|
15 |
library(reshape) |
|
|
16 |
library(distances) |
|
|
17 |
|
|
|
18 |
library(ggplot2) |
|
|
19 |
library(xlsx) |
|
|
20 |
library("gridExtra") |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
setwd("~/Desktop/FL/fl_latest") |
|
|
24 |
#####Patients with mutiple biopsies |
|
|
25 |
|
|
|
26 |
|
|
|
27 |
#####using clonal mutation from pyclone to calcuate genomic distance |
|
|
28 |
#####Input file: output file from pyclone |
|
|
29 |
setwd("~/Desktop/FL/11_pyclone/BED_bam/counts_new/pyclone_output_301018/ouput_pyclone/output_200X_i2") |
|
|
30 |
|
|
|
31 |
empty<- setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("Pair", "Pat", "distance")) |
|
|
32 |
|
|
|
33 |
for (i in pat) { |
|
|
34 |
|
|
|
35 |
cluster<- read.table(file=paste(i, "/tables/cluster.tsv",sep=""), sep="\t", header=T, stringsAsFactors = F) |
|
|
36 |
cluster.cast<- data.frame(cast(cluster, cluster_id ~ sample_id, value= "mean")) |
|
|
37 |
cluster.cast$max<- do.call(pmax, cluster.cast[c(-1)]) |
|
|
38 |
|
|
|
39 |
cluster.filter<- cluster.cast%>% dplyr::filter(max>0.4) |
|
|
40 |
|
|
|
41 |
loci<- read.table(file=paste(i, "/tables/loci.tsv",sep=""), header=T, sep="\t", stringsAsFactors = F) |
|
|
42 |
|
|
|
43 |
loci.fil<- subset(loci, cluster_id %in% cluster.filter$cluster_id) |
|
|
44 |
|
|
|
45 |
loci.fil.tmp<- loci.fil%>% select(mutation_id, sample_id, cellular_prevalence) |
|
|
46 |
loci.cast<- cast(loci.fil.tmp, mutation_id ~ sample_id, value="cellular_prevalence") |
|
|
47 |
|
|
|
48 |
loci.cast<- as.matrix(loci.cast) |
|
|
49 |
dis<- melt(matrix(dist(t(loci.cast)))) |
|
|
50 |
names(dis)<- c("Pair", "Pat", "distance") |
|
|
51 |
dis$Pat<- i |
|
|
52 |
empty<- rbind(empty, dis) |
|
|
53 |
} |
|
|
54 |
write.table(empty, file="~/Desktop/FL/fl_latest/34_GDist/FL_genomicdist_clonal.txt", sep="\t", row.name=F,quote=F) |
|
|
55 |
|
|
|
56 |
|
|
|
57 |
####Plot genomic distance, with clinical information |
|
|
58 |
library(cowplot) |
|
|
59 |
library(forcats) |
|
|
60 |
library(plyr) |
|
|
61 |
fldist<- read.table(file="FL_gDistance_update.txt",sep="\t",header=T, stringsAsFactors = F) |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
fldist$Group[fldist$Group=="clinial tFL"]<- "tFL" |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
pat.order<- ddply(fldist, .(Pat), summarise, largest_dist=max(Distance_CF)) |
|
|
69 |
|
|
|
70 |
pat.order$Pat<- reorder(pat.order$Pat, -pat.order$largest_dist) |
|
|
71 |
fldist$Pat<- factor(fldist$Pat, levels=levels(pat.order$Pat)) |
|
|
72 |
names(fldist)[names(fldist)=="classify"]<- "Comparison" |
|
|
73 |
fldist$Comparison[fldist$Comparison=="FL3B-tFL"]<- "FL-tFL" |
|
|
74 |
fldist$Comparison[fldist$Comparison=="tFL-tFL"]<- "FL-tFL" |
|
|
75 |
|
|
|
76 |
library(tidyverse) |
|
|
77 |
dis_plot<-ggplot(fldist, aes(x=Pat, y=Distance_CF, color=Comparison,size=Interval))+ |
|
|
78 |
geom_point(position=position_jitter(h=0.1, w=0.1), |
|
|
79 |
shape = 19,alpha=0.7)+ |
|
|
80 |
scale_color_manual(values=c("dark blue", "dark red"))+ |
|
|
81 |
theme(axis.title.x=element_blank(), |
|
|
82 |
axis.text.x=element_blank(), |
|
|
83 |
axis.text.y=element_text(size=12), |
|
|
84 |
axis.ticks.x=element_blank())+ ylab("Genomic Distance")+ |
|
|
85 |
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), |
|
|
86 |
panel.background = element_blank(), axis.line = element_line(colour = "black")) |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
library(reshape) |
|
|
90 |
clinic<- xlsx2dfs("../2_clinic_infor/FL_Baoyan_clinical_file_FINAL_020719 - survival.xlsx") |
|
|
91 |
clinic<- clinic$FL_Baoyan_clinical_file_FINAL_0 |
|
|
92 |
clinic$Patient_ID<- row.names(clinic) |
|
|
93 |
clinic$Transf<- ifelse(is.na(clinic$Date_transf), "N", "Y") |
|
|
94 |
clinic.info<- clinic%>%select(Patient_ID, Transf, POD24) |
|
|
95 |
clinic.info.melt<- melt(clinic.info, id.vars = "Patient_ID") |
|
|
96 |
names(clinic.info.melt)<- c("sample", "variable", "value") |
|
|
97 |
|
|
|
98 |
clinic.info.melt$sample<- as.character(clinic.info.melt$sample) |
|
|
99 |
|
|
|
100 |
pats.info<- unique(fldist%>%dplyr::select(Pat, Group)) |
|
|
101 |
pats.info$variable<- "Group" |
|
|
102 |
|
|
|
103 |
|
|
|
104 |
clinic.info.30 <- subset(clinic.info.melt, sample %in% pats.info$Pat) |
|
|
105 |
clinic.info.30$sample<- factor(clinic.info.30$sample, levels=levels(fldist$Pat)) |
|
|
106 |
|
|
|
107 |
clinic.info.30$value[clinic.info.30$value==0]<- "No" |
|
|
108 |
clinic.info.30$value[clinic.info.30$value==1]<- "Yes" |
|
|
109 |
clinic.info.30$value[clinic.info.30$value=="N"]<- "nFL" |
|
|
110 |
clinic.info.30$value[clinic.info.30$value=="Y"]<- "tFL" |
|
|
111 |
clinic.info.30$variable<- as.character(clinic.info.30$variable) |
|
|
112 |
|
|
|
113 |
clinic.info.30$variable[clinic.info.30$variable=="Transf"]<- "Group" |
|
|
114 |
clinic.info.30$variable<- factor(clinic.info.30$variable, levels=c( "Group", "POD24")) |
|
|
115 |
clinic.info.30$value<- factor(clinic.info.30$value, levels=c("No", "Yes", "nFL", "tFL")) |
|
|
116 |
|
|
|
117 |
|
|
|
118 |
clinic.anno<- ggplot(clinic.info.30, aes(x=sample,y=variable, fill=value))+ geom_tile(color="white")+ |
|
|
119 |
scale_fill_manual(values = c("grey","black", "#998EC3","#F1A340"))+ |
|
|
120 |
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
121 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), |
|
|
122 |
axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank() |
|
|
123 |
)+theme_void()+ |
|
|
124 |
guides(fill=guide_legend(ncol=2))+ylab("") |
|
|
125 |
|
|
|
126 |
pdf(file="fl_dist_sorted_20211213.pdf", width =7, height=5) |
|
|
127 |
plot_grid(dis_plot, clinic.anno, ncol = 1, align = "v", rel_heights=c(6,2)) |
|
|
128 |
dev.off() |
|
|
129 |
|
|
|
130 |
#########Violin plot |
|
|
131 |
####violin plot to show the genomic distance between nFL and tFL groups |
|
|
132 |
|
|
|
133 |
p1<- ggplot(fldist, aes(x=Group, y=Distance_CF)) + |
|
|
134 |
geom_violin(trim=FALSE) |
|
|
135 |
|
|
|
136 |
pdf(file="fldist_group_violin_20210609.pdf", width=5, height=5) |
|
|
137 |
p1+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
138 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
139 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"), |
|
|
140 |
axis.text.x = element_text(size=12), |
|
|
141 |
axis.title.x =element_text(size=14), |
|
|
142 |
axis.title.y =element_text(size=14), |
|
|
143 |
axis.text.y = element_text(size=12))+ylab("Genomic Distance")+stat_compare_means() |
|
|
144 |
# print: p-value 0.6 |
|
|
145 |
|
|
|
146 |
dev.off() |
|
|
147 |
####violin plot to show the genomic distance between different comparisons |
|
|
148 |
fldist$classify[fldist$classify=="tFL-tFL"]<- "FL-tFL" |
|
|
149 |
fldist$classify[fldist$classify=="FL3B-tFL"]<- "tFL-tFL" |
|
|
150 |
|
|
|
151 |
p2<- ggplot(fldist, aes(x=classify, y=Distance_CF)) + |
|
|
152 |
geom_violin(trim=FALSE) |
|
|
153 |
|
|
|
154 |
######Type of comparison |
|
|
155 |
pdf(file="fldist_type_violin_202100609.pdf", width=5, height=5) |
|
|
156 |
p2+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
157 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
158 |
axis.text.x = element_text(size=12), |
|
|
159 |
axis.text.y =element_text(size=12), |
|
|
160 |
axis.title.x = element_text(size=14), |
|
|
161 |
axis.title.y =element_text(size=14), |
|
|
162 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ |
|
|
163 |
xlab("Type of Comparison")+ylab("Genomic Distance")+stat_compare_means() |
|
|
164 |
#print: pvalue =0.37 |
|
|
165 |
dev.off() |
|
|
166 |
|
|
|
167 |
###Based on POD status |
|
|
168 |
|
|
|
169 |
|
|
|
170 |
p3<-ggplot(fldist, aes(x=POD24, y=Distance_CF)) + |
|
|
171 |
geom_violin(trim=FALSE) |
|
|
172 |
pdf(file="fldist_POD24_violin_20230913.pdf", width=5, height=5) |
|
|
173 |
p3+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
174 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
175 |
axis.text.x = element_text(size=12), |
|
|
176 |
axis.text.y =element_text(size=12), |
|
|
177 |
axis.title.x = element_text(size=14), |
|
|
178 |
axis.title.y =element_text(size=14), |
|
|
179 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ |
|
|
180 |
xlab("POD24 status")+ylab("Genomic Distance")+stat_compare_means() |
|
|
181 |
dev.off() |
|
|
182 |
|
|
|
183 |
pdf(file="fldist_POD24_violin_20230913_wPval.pdf", width=5, height=5) |
|
|
184 |
p3+geom_dotplot( binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
185 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
186 |
axis.text.x = element_text(size=12), |
|
|
187 |
axis.text.y =element_text(size=12), |
|
|
188 |
axis.title.x = element_text(size=14), |
|
|
189 |
axis.title.y =element_text(size=14), |
|
|
190 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ |
|
|
191 |
xlab("POD24 status")+ylab("Genomic Distance")+stat_compare_means() |
|
|
192 |
dev.off() |
|
|
193 |
|
|
|
194 |
p1<- ggplot(fl.dist, aes(x=Defi_2020, y=Distance_CF))+geom_violin(trim=FALSE) |
|
|
195 |
pdf(file="evolution_gDIST_group_wPvalue.pdf", width=5, height=5) |
|
|
196 |
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
197 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
198 |
axis.text.x = element_text(size=12), |
|
|
199 |
axis.text.y =element_text(size=12), |
|
|
200 |
axis.title.x = element_text(size=14), |
|
|
201 |
axis.title.y =element_text(size=14), |
|
|
202 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+ |
|
|
203 |
facet_wrap(~Group, scale="free")+stat_compare_means() |
|
|
204 |
dev.off() |
|
|
205 |
|
|
|
206 |
pdf(file="evolution_gDIST_group_woPvalue.pdf", width=8, height=5) |
|
|
207 |
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
208 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
209 |
axis.text.x = element_text(size=12), |
|
|
210 |
axis.text.y =element_text(size=12), |
|
|
211 |
axis.title.x = element_text(size=14), |
|
|
212 |
axis.title.y =element_text(size=14), |
|
|
213 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+ |
|
|
214 |
facet_wrap(~Group, scale="free") |
|
|
215 |
dev.off() |
|
|
216 |
|
|
|
217 |
pdf(file="evolution_gDIST_all_wPvalue.pdf", width=5, height=5) |
|
|
218 |
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
219 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
220 |
axis.text.x = element_text(size=12), |
|
|
221 |
axis.text.y =element_text(size=12), |
|
|
222 |
axis.title.x = element_text(size=14), |
|
|
223 |
axis.title.y =element_text(size=14), |
|
|
224 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance")+ |
|
|
225 |
stat_compare_means() |
|
|
226 |
dev.off() |
|
|
227 |
|
|
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
|
231 |
pdf(file="evolution_gDIST_all_woPvalue.pdf", width=5, height=5) |
|
|
232 |
p1+geom_dotplot(binaxis='y', stackdir='center', dotsize=1)+ theme_bw() + |
|
|
233 |
theme(panel.border = element_blank(), panel.grid.major = element_blank(), |
|
|
234 |
axis.text.x = element_text(size=12), |
|
|
235 |
axis.text.y =element_text(size=12), |
|
|
236 |
axis.title.x = element_text(size=14), |
|
|
237 |
axis.title.y =element_text(size=14), |
|
|
238 |
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ xlab("Evolution patten")+ylab("Genomic Distance") |
|
|
239 |
|
|
|
240 |
dev.off() |
|
|
241 |
|
|
|
242 |
#pval : 6.9e-05 |