--- a
+++ b/preprocessing/Preprocessing_FIMM_AML_RRBS.R
@@ -0,0 +1,188 @@
+# sbatch
+# https://www.sciencedirect.com/science/article/pii/S0168165617315936
+# RnBeads chosen as it uses limma style statistics suitable if you have replicates.
+
+# BiocManager::install("RnBeads")
+# BiocManager::install("RnBeads.hg38")
+
+library(RnBeads)
+logger.start(fname=NA)
+parallel.setup(8)
+parallel.isEnabled()
+
+# set dirs:
+data.dir <- file.path(getwd(), "input_data")
+analysis.dir=file.path(getwd(), "analysis")
+report.dir <-file.path(getwd(), "report_ttest")
+
+# Set some analysis options
+rnb.options(
+  filtering.sex.chromosomes.removal=T,
+  identifiers.column="SampleID",
+  replicate.id.column="treatment",
+  import.bed.style="bismarkCov",
+  differential.site.test.method = "ttest",
+  differential.enrichment.go=TRUE,
+  differential.enrichment.lola=TRUE,
+  filtering.coverage.threshold=10,
+  assembly="hg38",
+  differential.report.sites=FALSE,
+  filtering.sex.chromosomes.removal = TRUE,
+  import.table.separator="\t"
+)
+
+# add CpGisland shore and shelves to analysis:
+d=data.table::fread("cpgIslandExt.hg38.bed", data.table = F, fill = T)
+
+CpG.expanded <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V2-4000),
+  End = as.integer(d$V3+4000),
+  row.names = paste(make.unique(d$V4), "island.shore.shelf", sep="@"), stringsAsFactors = F)
+
+shelf.left <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V2-2000),
+  End = as.integer(d$V2),
+  row.names = paste(make.unique(d$V4), "shelf.l", sep="@"), stringsAsFactors = F)
+
+shelf.right <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V3),
+  End = as.integer(d$V3+2000),
+  row.names = paste(make.unique(d$V4), "shelf.r", sep="@"), stringsAsFactors = F)
+
+shore.left <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V2-4000),
+  End = as.integer(d$V2-2000),
+  row.names = paste(make.unique(d$V4), "shore.l", sep="@"), stringsAsFactors = F)
+
+shore.right <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V3+2000),
+  End = as.integer(d$V3+4000),
+  row.names = paste(make.unique(d$V4), "shore.r", sep="@"), stringsAsFactors = F)
+
+cpg <- data.frame(
+  Chromosome = d$V1,
+  Start = as.integer(d$V2),
+  End = as.integer(d$V3),
+  row.names = paste(make.unique(d$V4), "island", sep="@"), stringsAsFactors = F)
+
+
+CpG_shelf_shore=rbind(cpg, shelf.left, shelf.right, shore.left, shore.right)
+CpG_shelf_shore=CpG_shelf_shore[!(CpG_shelf_shore[,2]<0|CpG_shelf_shore[,3]<0),]
+
+txt <- "CpG islands + CpG shelves + CpG shores"
+rnb.set.annotation(type = "CpGregion", regions = CpG_shelf_shore, description = txt, assembly = "hg38")
+
+rnb.save.annotation("CpGregion_annotations.Rdata", "CpGregion", assembly = "hg38")
+
+write.table(cbind(CpG_shelf_shore[,c(1:3)], rownames(CpG_shelf_shore)), "CpG_shelf_shore.bed", quote = F, row.names = F, col.names = F, sep="\t")
+
+
+rnb.set.annotation(type = "CpG.expanded", regions = CpG.expanded, description = txt, assembly = "hg38")
+
+rnb.save.annotation("CpGexpanded_annotations.Rdata", "CpG.expanded", assembly = "hg38")
+
+# CIITA CpG
+CIITA_CpG=CpG_shelf_shore[grep("CpG:41.91|CpG:21.172|CpG:20.179",rownames(CpG_shelf_shore)),]
+rnb.set.annotation(type = "CpGregion_CIITA", regions = CIITA_CpG, description = txt, assembly = "hg38")
+rnb.save.annotation("CpGregion_CIITA_annotations.Rdata", "CpGregion_CIITA", assembly = "hg38")
+
+# options(fftempdir=file.path(getwd(), "temp"))
+# getOption("fftempdir")
+
+fileList=list.files(data.dir, pattern = ".cov", full.names = T)
+sample.id=gsub("_FRB.*.-1a.bismark.cov|M13_","", basename(fileList))
+treatment=gsub("M13_|_1$|_2$|_3$", "", sample.id)
+fileList=list.files(data.dir, pattern = ".cov", full.names = F)
+
+# compare DC treated to DMSO/IFN treated
+DAC_REST=ifelse(grepl("DC", treatment), "DC/DCIFN", "DMSO/IFN")
+
+# CIITA 
+pData = data.frame(
+  files=fileList,
+  SampleID = sample.id,
+  treatment = treatment,
+  row.names = sample.id,
+  DAC_REST,
+  stringsAsFactors = FALSE)
+
+sample.annotation <- file.path(getwd(), "pData.txt")
+
+# write pData
+write.table(pData, sample.annotation, quote = F, sep = "\t", row.names = F, col.names = T)
+
+data.source <- c(data.dir, sample.annotation)
+
+# initialize
+rnb.initialize.reports(report.dir)
+
+result=rnb.run.import(data.source,data.type = "bs.bed.dir", dir.reports=report.dir, init.configuration = !file.exists(file.path(report.dir, "configuration")), close.report = TRUE, show.report = FALSE)
+rnb.set <- result$rnb.set
+
+## Quality Control
+rnb.run.qc(rnb.set, report.dir)
+
+rnb.set <- rnb.run.preprocessing(rnb.set, dir.reports=report.dir)$rnb.set
+
+save.dir <- file.path(report.dir, "analysis")
+save.rnb.set(rnb.set, path=save.dir, archive=TRUE)
+
+rnb.options("differential.variability"=TRUE)
+dm <- rnb.execute.computeDiffMeth(rnb.set,pheno.cols=c("DAC_REST"))
+
+save.rnb.diffmeth(dm, save.dir)
+
+## Data export
+rnb.run.tnt(rnb.set, report.dir)
+
+
+
+# OLD 
+# sample.id=gsub("_FRB.*.-1a.bismark.cov|M13_","", basename(fileList))
+# treatment=gsub("M13_|_1$|_2$|_3$", "", sample.id)
+# fileList=list.files(data.dir, pattern = ".cov", full.names = F)
+# 
+# DAC_DMSO=treatment
+# DAC_DMSO[!DAC_DMSO%in%c("Ctrl_DC", "Ctrl_DMSO")]=NA
+# 
+# IFNG_DMSO=treatment
+# IFNG_DMSO[!IFNG_DMSO%in%c("Ctrl_IFN", "Ctrl_DMSO")]=NA
+# 
+# DAC_IFNG_DMSO=treatment
+# DAC_IFNG_DMSO[!DAC_IFNG_DMSO%in%c("Ctrl_DCIFN", "Ctrl_DMSO")]=NA
+# 
+# # compare DAC + IFNG stimulus when sgCIITA and wtCIITA
+# sgCIITA_DMSO=treatment
+# sgCIITA_DMSO[!sgCIITA_DMSO%in%c("CIITA_DMSO", "Ctrl_DMSO")]=NA
+# 
+# sgCIITA_DAC_DMSO=treatment
+# sgCIITA_DAC_DMSO[!sgCIITA_DAC_DMSO%in%c("CIITA_DC", "Ctrl_DC")]=NA
+# 
+# sgCIITA_IFNG_DMSO=treatment
+# sgCIITA_IFNG_DMSO[!sgCIITA_IFNG_DMSO%in%c("CIITA_IFN", "Ctrl_IFN")]=NA
+# 
+# sgCIITA_DAC_IFNG_DMSO=treatment
+# sgCIITA_DAC_IFNG_DMSO[!sgCIITA_DAC_IFNG_DMSO%in%c("CIITA_DCIFN", "Ctrl_DCIFN")]=NA
+# 
+# # compare DAC + IFNG stimulus when sgTET2 and wtTET2
+# gTET2_DMSO=treatment
+# gTET2_DMSO[!gTET2_DMSO%in%c("TET2_DMSO", "Ctrl_DMSO")]=NA
+# 
+# sgTET2_DAC_DMSO=treatment
+# sgTET2_DAC_DMSO[!sgTET2_DAC_DMSO%in%c("TET2_DC", "Ctrl_DC")]=NA
+# 
+# sgTET2_IFNG_DMSO=treatment
+# sgTET2_IFNG_DMSO[!sgTET2_IFNG_DMSO%in%c("TET2_IFN", "Ctrl_IFN")]=NA
+# 
+# sgTET2_DAC_IFNG_DMSO=treatment
+# sgTET2_DAC_IFNG_DMSO[!sgTET2_DAC_IFNG_DMSO%in%c("TET2_DCIFN", "Ctrl_DCIFN")]=NA
+# 
+# DAC_REST=ifelse(grepl("DC", treatment), "DC", "REST")
+# 
+# DACIFN_DMSOIFN=ifelse(grepl("DCIFN", treatment), "DCIFN", "DMSO_IFN")
+# DACIFN_DMSOIFN[grepl("_DC$", treatment)]=NA