[8e0848]: / Statistical_analysis_FIMM_AML_RRBS.R

Download this file

200 lines (169 with data), 7.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
library(RnBeads)
# Set some analysis options
rnb.options(
filtering.sex.chromosomes.removal=T,
identifiers.column="SampleID",
replicate.id.column="treatment",
import.bed.style="bismarkCov",
enforce.memory.management=T,
assembly="hg38",
differential.report.sites=FALSE,
filtering.sex.chromosomes.removal = TRUE,
import.table.separator="\t"
)
setwd("/research/groups/sysgen/PROJECTS/HEMAP_IMMUNOLOGY/petri_work/HEMAP_IMMUNOLOGY/Published_data_figures")
options(fftempdir=file.path(getwd(), "temp"))
if(!dir.exists(file.path(getwd(), "temp")))dir.create(file.path(getwd(), "temp"))
rnb.set=load.rnb.set("analysis.zip", temp.dir=file.path(getwd(), "temp"))
diffmeth=load.rnb.diffmeth("analysis/")
# laad custom annot:
rnb.load.annotation("CpGregion_annotations.Rdata", "CpGregion")
rnb.load.annotation("CpGregion_CIITA_annotations.Rdata", "CpGregion_CIITA")
rnb.load.annotation("CpGexpanded_annotations.Rdata", "CpG.expanded")
#****************** volcanoplot to see differential methylation for each comparison: ******************
library(EnhancedVolcano)
# type="promoters"
# type="tiling"
# type="genes"
# type="cpgislands"
# type="CpG.expanded"
type="CpGregion"
comparison <- get.comparisons(diffmeth)[1]
tab.sites <- get.table(diffmeth, comparison, type, return.data.frame=TRUE)
which.diffmeth <- abs(tab.sites$mean.mean.diff)>0.1 & tab.sites$comb.p.adj.fdr<0.05
tab.sites$significant=which.diffmeth
lab = paste(tab.sites$Chromosome, tab.sites$Start, tab.sites$End, sep=":")
aa <- annotation(object = rnb.set, type = type)
annotated.dmrs <- data.frame(aa, tab.sites)
annotated.dmrs[grep("CpG:41.91|CpG:21.172|CpG:20.179",rownames(annotated.dmrs)),]
pdf("Fig4G_CIITA_CpG_Volcanoplot.pdf", width=5, height=5)
EnhancedVolcano(tab.sites, title = gsub(" \\(.*.", "", comparison), subtitle = type, xlab = "mean difference",
# lab = paste(tab.sites$Chromosome, tab.sites$Start, tab.sites$End, sep=":"), drawConnectors=T,
lab =rownames(annotated.dmrs), drawConnectors=F,widthConnectors = 1,
# selectLab = grep("CpG:41.91",rownames(annotated.dmrs), value=T),
x = 'mean.mean.diff', ylim=c(0,5),
y = 'comb.p.adj.fdr', col=c("grey75", "grey75","grey75","grey75"),
pointSize = 2,vline = -0.058,vlineCol = "red",vlineType = "solid",
pCutoff = 0, FCcutoff = 0)
EnhancedVolcano(tab.sites[grepl("CpG:41.91|CpG:21.172|CpG:20.179",rownames(annotated.dmrs)),], title = gsub(" \\(.*.", "", comparison), subtitle = type, xlab = "mean difference",
# lab = paste(tab.sites$Chromosome, tab.sites$Start, tab.sites$End, sep=":"), drawConnectors=T,
lab =rownames(annotated.dmrs)[grepl("CpG:41.91|CpG:21.172|CpG:20.179",rownames(annotated.dmrs))], drawConnectors=F, widthConnectors = 1,
selectLab = grep("CpG:41.91",rownames(annotated.dmrs), value=T),
x = 'mean.mean.diff', pointSize = 3,vline = -0.058,vlineCol = "red",vlineType = "solid",
y = 'comb.p.adj.fdr', col=c("red2", "red2","red2","red2"),
pCutoff = 0, FCcutoff = 0, xlim = c(-1,1), ylim=c(0,5))
dev.off()
#
# # Check some global
# rnb.set.m <- meth(rnb.set, type="cpgislands")
#
# rnb.set.m2=t(rnb.set.m[!rowSums(is.na(rnb.set.m))>20,])
#
# d=do.call(rbind, lapply(1:24, function(i)prop.table(table(rnb.set.m2[i,]<0.1))))
# rownames(d)=rnb.set@pheno$treatment
#
# d2=do.call(rbind, lapply(1:24, function(i)prop.table(table(rnb.set.m2[i,]>0.9))))
# rownames(d2)=rnb.set@pheno$treatment
#
# res.cor <- cor(t(rnb.set.m2), method = "pearson", use = "complete.obs")
#
# # Load required packages
# library(magrittr)
# library(dplyr)
# library(ggpubr)
# # Cmpute MDS
# mds <- rnb.set.m2 %>%
# dist() %>%
# cmdscale() %>%
# as_tibble()
# colnames(mds) <- c("Dim.1", "Dim.2")
# # Plot MDS
#
# pdf("MDS.pdf", width = 8, height = 8)
# ggscatter(mds, x = "Dim.1", y = "Dim.2",
# label = rownames(rnb.set.m2),
# size = 1,
# repel = TRUE)
# dev.off()
#
# res.cor <- 1-cor(t(rnb.set.m2), method = "pearson", use = "complete.obs")
dist.m=dist(rnb.set.m2, method = "euclidean")
fit <- hclust(dist.m, method="ward")
pdf("FigS4L_Euclidean.pdf")
plot(fit) # display dendogram
dev.off()
# CIITA CpG island
rnb.set.m <- meth(rnb.set, type="CpGregion_CIITA")
aa <- annotation(rnb.set, type="CpGregion_CIITA")
rownames(rnb.set.m)=rownames(aa)
comparison <- get.comparisons(diffmeth)[1]
tab.promoters <- get.table(diffmeth, comparison, "CpGregion_CIITA", return.data.frame=TRUE)
plots=lapply(c(2,1,3,5,4), function(i){
group=gsub("_1|_2|_3","", colnames(rnb.set.m))
group=ifelse(grepl("DC", colnames(rnb.set.m)),"DAC", "Rest")
dat.list=lapply(unique(group), function(g)rnb.set.m[i,group%in%g])
names(dat.list)=paste(unique(group), rownames(aa)[i])
return(dat.list)
})
plots=unlist(plots, recursive = F)
a=plot.boxplot.list(plots, color.v = rep("grey75",length(plots)), ylab = "Methylation beta", spread = T, outlier.size = 1, y.range = c(0,1))
ggsave(plot = a, filename = "Fig4H_boxplot_CIITA_CpG.pdf",device = "pdf", height = 4, width = 4)
ind=4
# group=gsub("_1|_2|_3","", colnames(rnb.set.m))
# group=ifelse(grepl("DC", colnames(rnb.set.m)),"DAC", "Rest")
#
# dat.list=lapply(unique(group), function(g)rnb.set.m[ind,group%in%g])
# names(dat.list)=unique(group)
# a=plot.boxplot.list(dat.list, color.v = rep("grey75",length(dat.list)), ylab = "Methylation beta", spread = T, outlier.size = 1.5, y.range = c(0,1))
#
# ggsave(plot = a, filename = "boxplot_CIITA_CpG.pdf",device = "pdf", height = 3, width = 3)
#
# write.table(rnb.set.m, "CIITA_CpGisland.txt", quote = F, row.names = T, col.names = T, sep="\t")
#
# # CIITA promoter
# rnb.set.m <- meth(rnb.set, type="promoters")
# aa <- annotation(rnb.set, type="promoters")
#
# ind=aa$symbol%in%"CIITA"
# group=gsub("_1|_2|_3","", colnames(rnb.set.m))
# group=ifelse(grepl("DC", colnames(rnb.set.m)),"DAC", "Rest")
#
# dat.list=lapply(unique(group), function(g)rnb.set.m[ind,group%in%g])
# names(dat.list)=unique(group)
# plot.boxplot.list(dat.list, color.v = rep("grey75",length(dat.list)), ylab = "Methylation beta")
#
#
# # region analysis
# type="CpGregion"
# rnb.set.m <- meth(rnb.set, type="CpGregion")
# comparison <- get.comparisons(diffmeth)[1]
# tab.sites <- get.table(diffmeth, comparison, type, return.data.frame=TRUE)
# which.diffmeth <- abs(tab.sites$mean.mean.diff)>0.05 & tab.sites$comb.p.adj.fdr<0.01
#
# aa <- annotation(object = rnb.set, type = type)
# annotated.dmrs <- data.frame(aa, tab.sites)
# annotated.dmrs$significant=which.diffmeth
#
# save(list = c("annotated.dmrs", "rnb.set.m"), file="annotated.dmrs.CpGregion.Rdata")
#
# # region analysis
# type="promoters"
# comparison <- get.comparisons(diffmeth)[1]
# rnb.set.m <- meth(rnb.set, type="promoters")
# tab.sites <- get.table(diffmeth, comparison, type, return.data.frame=TRUE)
# which.diffmeth <- abs(tab.sites$mean.mean.diff)>0.05 & tab.sites$comb.p.adj.fdr<0.01
#
# aa <- annotation(object = rnb.set, type = type)
# annotated.dmrs <- data.frame(aa, tab.sites)
# annotated.dmrs$significant=which.diffmeth
#
# save(list = c("annotated.dmrs", "rnb.set.m"), file="annotated.dmrs.promoters.Rdata")
#
#
# region=cbind(as.character(annotated.dmrs$Chromosome), annotated.dmrs$Start, annotated.dmrs$End, rownames(annotated.dmrs))
# region=region[which.diffmeth,]
# region[,4]=gsub("@.*.", "", region[,4])
#
# region=region[!duplicated(region[,4]),]
#
# write.table(region[,4], "significant_CpG.bed", quote=F, sep="\t", row.names = F, col.names = F)