807 lines (599 with data), 30.7 kB
---
title: "deepStats - compareCurves 0.2"
output: html_notebook
---
THIS NOTEBOOK IS NOT UP TO DATE YET
This tool assesses if multiple genomics signals ( ChIP-seq, ATAC-seq... ) are significantly different or
not between conditions ( control, KO1, KO2, etc ). DeepStats compareCurves uses bootstraps and corrected
Wilcoxon Rank-sum tests to do so. The input of this tool corresponds to the output of deepTools
`computeMatrix --outFileNameMatrix`. If multiple region sets have been used in deepTools, one plot and
tab-delimited table will be produced for each set of regions.
## Loading dependencies
Be sure to install all the dependencies. Please check https://github.com/gtrichard/deepStats.
```{r}
suppressMessages( library( 'optparse' ) )
suppressMessages( library( 'boot' ) )
suppressMessages( library( 'ggplot2' ) )
suppressMessages( library( 'cowplot' ) )
suppressMessages( library( 'showtext' ) )
suppressMessages( library( 'tidyverse' ) )
suppressMessages( library( 'dichromat' ) )
suppressMessages( library( 'purrr' ) )
```
## Set working directory
```{r}
setwd("/home/user")
```
## I/O and parameters
```{r}
opt = list()
opt$input = "file.txt"
# DeepTools file obtained from computeMatrix --outFileNameMatrix.
# Alternatively, a .boots file from previous compareCurves runs can be provided
# for replotting purposes and to avoid the bootstraps computation once more.
opt$output = "file"
# Output prefix. Three files will be generated, a .pdf file containing the plot
# and a .boots file containing the bootsraps information ( RDS file ). If a .boots file
# is provided as input, only the plot will be produced as pdf.
opt$comparison = NULL
# When specifying 'regions' or 'scores', force a given comparison. The correct comparison to
# perform is otherwise automatically detected.
opt$scoreLabel = NULL
# Names of the scores.
opt$regionLabel = NULL
# Names of the regions.
opt$bootstraps = 1000
# Number of bootstraps to perform. Default: 1000.
opt$predictionsNumber = NULL
# Number of regions randomly drawn at each bootstrap ( predictions ).
# For more information please see:
# https://www.rdocumentation.org/packages/boot/versions/1.3-20/topics/boot
opt$bootstrapsCI = 0.95
# Confidence intervals ( CI ) threshold in percent. Default: 0.95.
opt$CPU = 4
# Number of CPU to use. Default: 4.
opt$wilcoxStartBin = NULL
# The start bin number used to restrict the corrected
# wilcoxon test to a given portion of the plot. An integer must be provided.
# Default: 1, the first bin.
opt$wilcoxEndBin = NULL
# The end bin number used to restrict the corrected
# wilcoxon test to a given portion of the plot. An integer must be provided.
# Default: the last bin.
opt$wilcoxThreshold = 0.05
# Threshold used to define significant bins on the Wilcoxon rank-sum
# test plot. Default: 0.05
opt$signalName = "Genomic signal"
# Name given to the signal, for instance 'H3K4me3 log2input'.
# Default: 'Genomic signal'
opt$firstRegionName = "TSS"
# Name of the central or left region. Default: 'TSS'.
opt$secondRegionName = "TES"
# Name of the right region, only used when `deeptools computeMatrix`
# ran in scaled-regions mode. Default: 'TES'.
opt$bootPlotColors = NULL
# Change the bootstraps plot color palette to a user-provided one. The file must
# be tab-delimited and contain for each line two HTML color codes ( #3366CC #769EF2 ).
# The first column corresponds to the mean color, the second column corresponds to the
# color of the bootstrap confidence interval shadowed area. The default color scale
# contains 6 colors that are color blind friendly using the dichromat R package.
opt$bootPlotWidth = 5.2
# How large the bootstraps plot should be. Default: 5.2
opt$bootPlotHeight = 3.7
# How tall the bootstraps plot should be. Default: 3.7
opt$wilcoxPlotWidth = 4.6
# How large the Wilcoxon rank-sum test plot should be. Default: 4.6
opt$wilcoxPlotHeight = 4.6
# How tall the Wilcoxon rank-sum test plot should be. Default: 4.6
opt$font = NULL
# Font used for plotting, given a TTF file. Default is usually Helvetica.
if ( is.null( opt$input ) ) {
print_help( opt_parser )
stop( "An input file must be supplied", call.=FALSE )
}
if ( is.null( opt$output ) ) {
print_help( opt_parser )
stop( "An output prefix must be supplied", call.=FALSE )
}
substrRight <- function( x, n ) {
substr( x, nchar( x )-n+1, nchar( x ) )
}
if ( substrRight( opt$input,6 ) == ".boots" ) {
mode="boots"
cat( as.character( Sys.time() ),".boots file provided as input, skipping statistical analysis.\n" )
}else{mode="deeptools"}
```
## compareCurves analysis
```{r}
#### SET WORKING DIR AND ABSOLUTE PATHS ####
opt$input <- normalizePath( opt$input )
opt$output <- paste( normalizePath( dirname( opt$output ) ),"/",basename( opt$output ),sep="" )
setwd( normalizePath( dirname( opt$output ) ) )
#### METADATA EXTRACTION ####
if ( mode == "deeptools" ) {
master_list <- list()
metadata <- list()
cat( as.character( Sys.time() ),"Extracting metadata from file:\n ", opt$input )
#---
regions_data <- readLines( opt$input,n = 1 )
regions_data <-strsplit( x = regions_data,split = "\t" )
regions_data<-lapply( regions_data,function( x ) strsplit( x,split=":" ) )
regions_data[[1]][[1]][1] <- gsub( pattern = "#",replacement = "", x = regions_data[[1]][[1]][1] )
even_indexes<-seq( 2,length( regions_data[[1]] )*2,2 )
odd_indexes<-seq( 1,length( regions_data[[1]] )*2,2 )
group_names<-as.character( as.data.frame( unlist( regions_data[[1]] ) )[odd_indexes,] )
group_length<-as.numeric( as.character( as.data.frame( unlist( regions_data[[1]] ) )[even_indexes,] ) )
metadata[[1]]<-group_names
metadata[[2]]<-group_length
#---
xaxis_data <- readLines( opt$input,n = 2 )[2]
xaxis_data <-strsplit( x = xaxis_data,split = "\t" )
xaxis_data <-lapply( xaxis_data,function( x ) strsplit( x,split=":" ) )
xaxis_data[[1]][[1]][1] <- gsub( pattern = "#",replacement = "", x = xaxis_data[[1]][[1]][1] )
even_indexes<-seq( 2,length( xaxis_data[[1]] )*2,2 )
odd_indexes<-seq( 1,length( xaxis_data[[1]] )*2,2 )
xaxis_names <- as.character( as.data.frame( unlist( xaxis_data[[1]] ) )[odd_indexes,] )
xaxis_values <- as.numeric( as.character( as.data.frame( unlist( xaxis_data[[1]] ) )[even_indexes,] ) )
metadata[[3]]<-xaxis_names
metadata[[4]]<-xaxis_values
#---
score_data <- readLines( opt$input,n = 3 )[3]
score_data <- unlist( strsplit( score_data,'\t' ) )
score_data<-score_data[-grep( pattern =":",x = score_data )]
metadata[["scores"]]<-unique( score_data )
names( metadata )<-c( "group_names","group_length","xaxis_names","xaxis_values","scores" )
cat( " ... Done!\n" )
}
#### SETUP INPUT FILES IN CASE A BOOTS FILE IS PROVIDED ####
if ( mode == "boots" ) {
cat( as.character( Sys.time() ),"Extracting data from file:\n ", opt$input )
master_list <- readRDS( file=opt$input )
metadata <- master_list$metadata
master_list$metadata <- NULL
cat( " ... Done!\n" )
}
### CHANGE LABELS IF NEEDED
if ( is.null(opt$scoreLabels)==FALSE ) {
metadata$scores <- unlist(str_split(opt$scoreLabels,";"))
}
if ( is.null(opt$regionLabels)==FALSE ) {
metadata$group_names <- unlist(str_split(opt$regionLabels,";"))
}
#### LINEBREAKS FOR LONG LEGENDS ####
for (i in 1:length(metadata$group_names)){
if(nchar(metadata$group_names[[i]]) >=21 ){
metadata$group_names[[i]] <- paste(strwrap(metadata$group_names[[i]],width = 20), collapse="\n")
}
}
for (i in 1:length(metadata$scores)){
if(nchar(metadata$scores[[i]]) >=21 ){
metadata$scores[[i]] <- paste(strwrap(metadata$scores[[i]],width = 20), collapse="\n")
}
}
#### GLOBAL FLAG DETERMINATION FOR COMPARISON ####
if(is.null(opt$comparison)){
if( length( metadata$scores )>1 & length( metadata$group_names )==1 ) {
cat( as.character( Sys.time() ),"Multiple scores detected but only one set of region found: comparing scores.\n" )
global_flag="compare_scores"
}
if( length( metadata$scores )==1 & length( metadata$group_names )>1 ) {
cat( as.character( Sys.time() ),"Multiple set of regions detected but only one score found: comparing regions.\n" )
global_flag="compare_regions"
}
if( length( metadata$scores )>1 & length( metadata$group_names )>1 ) {
cat( as.character( Sys.time() ),"Multiple scores and set of regions detected: comparing regions per score.\n" )
global_flag="compare_scores"
}
}else{
if(opt$comparison=="score" || opt$comparison=="scores" || opt$comparison=="Score" || opt$comparison=="Scores"){
cat( as.character( Sys.time() ),"--comparison is set: comparing scores.\n" )
global_flag="compare_scores"
}
if(opt$comparison=="region" || opt$comparison=="regions" || opt$comparison=="Region" || opt$comparison=="Regions"){
cat( as.character( Sys.time() ),"--comparison is set: comparing regions.\n" )
global_flag="compare_regions"
}
}
#### BOOSTRAPPING ####
if ( mode=="deeptools" ) {
bootstraps_to_plot<-list()
dat <- read.table( file=opt$input, header=F, sep=( "\t" ),skip=3 ) #loading the input file content
nbbin <- as.numeric( ncol( dat ) / length( metadata$scores ) ) # number of bins to analyze ( a bin = a column in the file )
scores_length <- rep( x=nbbin,times=length( metadata$scores ) )
line_index_end <- c( cumsum( metadata$group_length ) )
line_index_start <- c( 1,cumsum( metadata$group_length )+1 )
col_index_end <- c( cumsum( scores_length ) )
col_index_start <- c( 1,cumsum( scores_length )+1 )
cat( as.character( Sys.time() ), "Boostraps starting with", opt$CPU,"cores, for each group:\n" )
cat( " Number of bootstraps to perform =", opt$bootstraps,"\n" )
cat( " Number of bins to analyze =", nbbin,"\n" )
for( score in 1:length( metadata$scores ) ) {
bootstrap_dfs<-list()
for ( group in 1:length( metadata$group_names ) ) {
cat( as.character( Sys.time() ), "Starting to boostrap values for the following group:\n" )
cat( " Genomic score used:",metadata$scores[[score]],"\n" )
dat_group <- dat[line_index_start[group]:line_index_end[group],]
dat_group <- dat_group[,col_index_start[score]:col_index_end[score]]
nbgene <- as.numeric( nrow( dat_group ) ) # number of genes/regions in the dataset
label <- metadata$group_names[[group]]
output.name <- opt$output
cat( " Region set used:",label,"\n" )
cat( " Number of regions to analyze:", nbgene )
sample.colmeans <- function( x, d ) {
return( apply( x[d,], 2, mean, na.rm=T ) )
}
b = boot( dat_group, sample.colmeans, R=opt$bootstraps, ncpus=opt$CPU ,parallel = "multicore" )
mini <- vector( length = nbbin )
maxi <- vector( length = nbbin )
ci_min <- ( 1-opt$bootstrapsCI )/2
ci_max <- opt$bootstrapsCI+ci_min
for ( i in 1:nbbin ) {
mini[i] <- quantile( b$t[,i], ci_min, na.rm=T ) # get CI
maxi[i] <- quantile( b$t[,i], ci_max, na.rm=T )
}
cat( "... Done!\n" )
region_name<-rep( x=label,nbbin )
score_name<-rep( x=metadata$scores[[score]],nbbin )
bin<-seq( from=1, to=nbbin, by=1 )
bootstraps_to_plot<-cbind( region_name,score_name,bin,b$t0,mini,maxi )
bootstraps_to_plot<-as.data.frame( unname( bootstraps_to_plot ) )
colnames( bootstraps_to_plot )<-c("Region","Score","Bin","Mean","Min","Max" )
for ( column in 3:ncol( bootstraps_to_plot ) ) {
bootstraps_to_plot[,column]<-as.numeric( as.character( bootstraps_to_plot[,column] ) )
}
bootstrap_dfs[[group]] <- bootstraps_to_plot
}
names( bootstrap_dfs )<- metadata$group_names
#bootstraps_to_plot <- do.call( rbind,bootstrap_dfs )
master_list[[metadata$scores[score]]] <- list()
master_list[[score]][["bootstraps"]] <- bootstrap_dfs
}
}
#### WILCOXON TESTS CORRECTED ####
if ( mode=="deeptools" ) {
#currently not implemented but it doesn't hurt to keep.
if ( is.null( opt$wilcoxStartBin )==TRUE && is.null( opt$wilcoxEndBin )==FALSE ) {
print_help( opt_parser )
stop( "Both --wilcoxStartBin and --wilcoxEndBin must be given.", call.=FALSE )
}
if ( is.null( opt$wilcoxStartBin )==FALSE && is.null( opt$wilcoxEndBin )==TRUE ) {
print_help( opt_parser )
stop( "Both --wilcoxStartBin and --wilcoxEndBin must be given.", call.=FALSE )
}
if ( is.null( opt$wilcoxStartBin )==FALSE && is.null( opt$wilcoxEndBin )==FALSE ) {
if ( opt$wilcoxStartBin > opt$wilcoxEndBin ) {
print_help( opt_parser )
stop( "--wilcoxStartBin must be superior to --wilcoxEndBin.", call.=FALSE )
}
if ( opt$wilcoxStartBin > opt$wilcoxEndBin ) {
cat( as.character( Sys.time() ), "Starting Wilcoxon rank-sum test on the following bins: from",opt$wilcoxStartBin,
"to",opt$wilcoxEndBin, "\n" )
wilcox_df <- dat[,opt$wilcoxStartBin:opt$wilcoxEndBin]
#wilcox_dfs <- lapply( bootstrap_dfs, function( x ) {x[which( x$Bin >= opt$wilcoxStartBin & x$Bin <= opt$wilcoxEndBin ),]} )
}
}else{
cat( as.character( Sys.time() ), "Starting Wilcoxon rank-sum test.\n" )
wilcox_df <- dat
}
if (global_flag=="compare_regions") {
for( score in 1:length( metadata$scores ) ) {
wilcox_groups_indexes <- paste( line_index_start[1:length( line_index_start )-1],line_index_end,sep=":" )
wilcox_comparisons <- as.data.frame( expand.grid( wilcox_groups_indexes,wilcox_groups_indexes ) )
wilcox_score_indexes <- paste( col_index_start[1:length( col_index_start )-1],col_index_end,sep=":" ) #this is type dependent
wilcox_matrix <- matrix( nrow = length( wilcox_groups_indexes ), ncol = length( wilcox_groups_indexes ) )
row.names( wilcox_matrix )<-wilcox_groups_indexes
colnames( wilcox_matrix )<-wilcox_groups_indexes
wilcox_padj_vectors <- list()
for ( comps in ( 1:nrow( wilcox_comparisons ) ) ) {
group_1_name <- as.character( wilcox_comparisons[comps,1] )
group_2_name <- as.character( wilcox_comparisons[comps,2] )
group_1_index <- unlist( strsplit( group_1_name,split = ":" ) )
group_2_index <- unlist( strsplit( group_2_name,split = ":" ) )
score_index <- unlist( strsplit( as.character(wilcox_score_indexes[score]),split = ":" ) )
group_1_data <- wilcox_df[group_1_index[1]:group_1_index[2],score_index[1]:score_index[2]]
group_2_data <- wilcox_df[group_2_index[1]:group_2_index[2],score_index[1]:score_index[2]]
wilcox_res <- vector()
for ( bins in ( 1:ncol( group_1_data ) ) ) {
wilcox_res[bins] <- wilcox.test( group_1_data[,bins],group_2_data[,bins] )$p.value
}
wilcox_adjust <- p.adjust( wilcox_res,method = "bonferroni" )
wilcox_padj_vectors[[ paste( group_1_name,group_2_name,sep="_" )]] <- wilcox_adjust
}
max_val=max( as.vector( unlist( lapply( wilcox_padj_vectors, function( x ) max( -log10( x ) ) ) ) ) )
wilcox_plot_list <- list()
wilcox_to_plot <- list()
group_names_mapping <- as.data.frame( unique( wilcox_comparisons[,1] ) )
group_names_mapping["names"] <- metadata$group_names
colnames( group_names_mapping )<-c( "indexes","names" )
for ( i in 1:length( wilcox_padj_vectors ) ) {
wilcox_to_plot[[i]]<-as.data.frame( -log10( wilcox_padj_vectors[[i]] ) )
wilcox_to_plot[[i]]["xaxis"]<-as.numeric( as.character( rownames( wilcox_to_plot[[i]] ) ) )
colnames( wilcox_to_plot[[i]] )<-c( "vals","xaxis" )
wilcox_plot_group1<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[1] )
colnames( wilcox_plot_group1 )<-"indexes"
wilcox_plot_group1<- merge( wilcox_plot_group1,group_names_mapping,by="indexes" )[1,2]
wilcox_plot_group2<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[2] )
colnames( wilcox_plot_group2 )<-"indexes"
wilcox_plot_group2<- merge( wilcox_plot_group2,group_names_mapping,by="indexes" )[1,2]
wilcox_to_plot[[i]]["Group1"]<-wilcox_plot_group1
wilcox_to_plot[[i]]["Group2"]<-wilcox_plot_group2
wilcox_to_plot[[i]]["Constant"]<-metadata$scores[[score]]
}
master_list[[score]][["wilcoxon"]] <- wilcox_to_plot
}
}
if (global_flag=="compare_scores") {
store_wilcox_scores<-list()
for( region in 1:length( metadata$group_names ) ) {
wilcox_score_indexes <- paste( col_index_start[1:length( col_index_start )-1],col_index_end,sep=":" ) #this is type dependent
wilcox_comparisons <- as.data.frame( expand.grid( wilcox_score_indexes,wilcox_score_indexes ) ) #this is type dependent
wilcox_groups_indexes <- paste( line_index_start[1:length( line_index_start )-1],line_index_end,sep=":" )
wilcox_matrix <- matrix( nrow = length( wilcox_score_indexes ), ncol = length( wilcox_score_indexes ) )
row.names( wilcox_matrix )<-wilcox_score_indexes
colnames( wilcox_matrix )<-wilcox_score_indexes
wilcox_padj_vectors <- list()
for ( comps in ( 1:nrow( wilcox_comparisons ) ) ) {
group_1_name <- as.character( wilcox_comparisons[comps,1] )
group_2_name <- as.character( wilcox_comparisons[comps,2] )
group_1_index <- unlist( strsplit( group_1_name,split = ":" ) )
group_2_index <- unlist( strsplit( group_2_name,split = ":" ) )
regions_index <- unlist( strsplit( wilcox_groups_indexes[region],split = ":" ) )
group_1_data <- wilcox_df[regions_index[1]:regions_index[2],group_1_index[1]:group_1_index[2]] #this is type dependent
group_2_data <- wilcox_df[regions_index[1]:regions_index[2],group_2_index[1]:group_2_index[2]] #this is type dependent
wilcox_res <- vector()
for ( bins in ( 1:ncol( group_1_data ) ) ) {
wilcox_res[bins] <- wilcox.test( group_1_data[,bins],group_2_data[,bins] )$p.value
}
wilcox_adjust <- p.adjust( wilcox_res,method = "bonferroni" )
wilcox_padj_vectors[[ paste( group_1_name,group_2_name,sep="_" )]] <- wilcox_adjust
}
max_val=max( as.vector( unlist( lapply( wilcox_padj_vectors, function( x ) max( -log10( x ) ) ) ) ) )
wilcox_plot_list <- list()
wilcox_to_plot <- list()
group_names_mapping <- as.data.frame( unique( wilcox_comparisons[,1] ) )
group_names_mapping["names"] <- metadata$scores #this is type dependent
colnames( group_names_mapping )<-c( "indexes","names" ) #this is type dependent
for ( i in 1:length( wilcox_padj_vectors ) ) {
wilcox_to_plot[[i]]<-as.data.frame( -log10( wilcox_padj_vectors[[i]] ) )
wilcox_to_plot[[i]]["xaxis"]<-as.numeric( as.character( rownames( wilcox_to_plot[[i]] ) ) )
colnames( wilcox_to_plot[[i]] )<-c( "vals","xaxis" )
wilcox_plot_group1<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[1] )
colnames( wilcox_plot_group1 )<-"indexes"
wilcox_plot_group1<- merge( wilcox_plot_group1,group_names_mapping,by="indexes" )[1,2]
wilcox_plot_group2<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[2] )
colnames( wilcox_plot_group2 )<-"indexes"
wilcox_plot_group2<- merge( wilcox_plot_group2,group_names_mapping,by="indexes" )[1,2]
wilcox_to_plot[[i]]["Group1"]<-wilcox_plot_group1
wilcox_to_plot[[i]]["Group2"]<-wilcox_plot_group2
wilcox_to_plot[[i]]["Constant"]<-metadata$group_names[[region]]
}
store_wilcox_scores[[region]]<- do.call( "rbind",wilcox_to_plot )
}
for( score in 1:length( metadata$scores ) ) {
master_list[[score]][["wilcoxon"]] <- store_wilcox_scores
}
}
cat( as.character( Sys.time() ), "Wilcoxon test finished.\n" )
}
```
## Exportation of the data for future runs
```{r}
if ( mode=="deeptools" ) {
cat( as.character( Sys.time() ),"Exporting data to the following RDS object:\n ",paste( opt$output,".dscc",sep="" ) )
to_export <- master_list
to_export[["metadata"]]<-metadata
saveRDS( to_export,file = paste( opt$output,".dscc",sep="" ) )
rm(to_export)
cat( " ... Done!\n" )
}
```
## X axis creation for plotting
```{r}
cat( as.character( Sys.time() )," Generating the X axis for plots.",sep="" )
xaxis<-list()
linebreaker <- function( x,... ) {
gsub( '\\s','\n',x )
}
if ( metadata$xaxis_values[3]==0 ) {
#Centered region
#Start and end definition
xaxis["xaxis_start"][1] <- as.numeric( 1 ) # position
xaxis$xaxis_start[2] <- paste( ( -1 * metadata$xaxis_values[2] / 1000 ),"kb",sep = "" )
xaxis["xaxis_end"][1] <- as.numeric( max( bootstraps_to_plot$Bin ) ) # position
xaxis$xaxis_end[2] <- paste( ( metadata$xaxis_values[1] / 1000 ),"kb",sep = "" )
xaxis["first_region"][1] <- round( as.integer( max( bootstraps_to_plot$Bin ) ) / 2 )
xaxis$first_region[2] <- linebreaker( opt$firstRegionName )
xaxis$second_region <- NULL
}else{
#Scaled region
xaxis["xaxis_start"][1] <- 0 # position
xaxis$xaxis_start[2] <- paste( ( -1 * metadata$xaxis_values[2] / 1000 ),"kb",sep = "" )
xaxis["xaxis_end"][1] <- as.numeric( max( bootstraps_to_plot$Bin ) ) # position
xaxis$xaxis_end[2] <- paste( ( metadata$xaxis_values[1] / 1000 ),"kb",sep = "" )
xaxis["first_region"] <- metadata$xaxis_values[2] / metadata$xaxis_values[4]
xaxis$first_region[2] <- linebreaker( opt$firstRegionName )
xaxis["second_region"] <- as.numeric(xaxis$xaxis_end[1]) - (metadata$xaxis_values[1]) / metadata$xaxis_values[4]
xaxis$second_region[2] <- linebreaker( opt$secondRegionName )
}
if ( metadata$xaxis_values[5]>0 ) {
xaxis["unscaled5"][1] <- (metadata$xaxis_values[2] + metadata$xaxis_values[5]) / metadata$xaxis_values[4]
xaxis$unscaled5[2] <- paste( ( -1 * metadata$xaxis_values[5] / 1000 ),"kb",sep = "" )
}
if ( metadata$xaxis_values[6]>0 ) {
xaxis["unscaled3"][1] <- as.numeric(xaxis$xaxis_end[1]) - (metadata$xaxis_values[1] + metadata$xaxis_values[6]) / metadata$xaxis_values[4]
xaxis$unscaled3[2] <- paste( ( -1 * metadata$xaxis_values[6] / 1000 ),"kb",sep = "" )
}
xaxis["breaks"]<-as.data.frame( as.numeric( c( xaxis$xaxis_start[1],
xaxis$unscaled5[1],
xaxis$first_region[1],
xaxis$second_region[1],
xaxis$unscaled3[1],
xaxis$xaxis_end[1] ) ) )
xaxis["names"]<-as.data.frame( as.character( c( xaxis$xaxis_start[2],
xaxis$unscaled5[2],
xaxis$first_region[2],
xaxis$second_region[2],
xaxis$unscaled3[2],
xaxis$xaxis_end[2] ) ) )
cat( " ... Done!\n" )
```
## Bootstraps plotting
```{r}
cat( as.character( Sys.time() ), "Starting Bootstraps plotting to the following file:\n ",paste( opt$output,"_bootstraps.pdf\n",sep="" ) )
pdf( file = paste( opt$output,"_bootstraps.pdf",sep="" ),width = opt$bootPlotWidth,height = opt$bootPlotHeight )
color_palette<-c( "#000000",
colorschemes$Categorical.12[[12]],
colorschemes$Categorical.12[[8]],
colorschemes$Categorical.12[[2]],
colorschemes$Categorical.12[[10]],
colorschemes$Categorical.12[[6]] )
fill_palette<-c( "#919191",
colorschemes$Categorical.12[[11]],
colorschemes$Categorical.12[[7]],
colorschemes$Categorical.12[[1]],
colorschemes$Categorical.12[[9]],
colorschemes$Categorical.12[[5]] )
boot_plot_data <- list()
for (group in 1:length(metadata$group_names)){ # One plot per region
data_frame_storage <- list()
for (score in 1:length(metadata$scores)) {
score_name <- metadata$scores[score]
group_name <- metadata$group_names[group]
data_frame_storage[[score]] <- master_list[[score_name]]$bootstraps[[group_name]]
}
bootstraps_to_plot <- do.call( rbind,data_frame_storage)
boot_plot_data[[group]]<-bootstraps_to_plot
}
bootstraps_to_plot<-do.call("rbind", boot_plot_data)
rm(boot_plot_data)
opt$bootPlotShareY=FALSE
make_boot_plot<-function(x,type){
if(is.null(opt$bootPlotShareY)){
ratio <- with(x, diff(range(x$Bin))*opt$bootPlotRatio/diff(c(min(x$Min), max(x$Max))))
}else{
ratio <- with(bootstraps_to_plot, diff(range(bootstraps_to_plot$Bin))*opt$bootPlotRatio /
diff(c(min(bootstraps_to_plot$Min), max(bootstraps_to_plot$Max) )))
}
ggplot(x,aes( x=x$Bin, color=x[,type], fill=x[,type] ))+
geom_ribbon( aes( ymin = x$Min, ymax = x$Max ),alpha=0.5,linetype=0 )+
geom_line( aes( y=x$Mean ) )+
scale_color_manual( legend_title, values = color_palette )+
scale_fill_manual( legend_title, values = fill_palette )+
scale_x_continuous( NULL , breaks=xaxis$breaks, labels=xaxis$names )+
{if(is.null(opt$bootPlotShareY)==FALSE) ylim(min(bootstraps_to_plot$Min),max(bootstraps_to_plot$Max))}+
ylab( opt$signalName )+
panel_border( size=1, remove=FALSE )+
coord_fixed(ratio=ratio)+
theme( axis.text.y = element_text( size = 12, color = "#383838" ),
axis.text.x = element_text( size = 12, color = "#383838" ),
axis.title.x = element_text( size = 13 ),
plot.title = element_text( size = 13, face = "plain" ),
legend.title = element_text( size = 12, color = "#383838" ),
line = element_line( size=1 ),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_rect( colour = "black" ),
panel.border = element_rect( colour = "black" ),
axis.line = element_blank(),
panel.spacing = unit( 1, "line" ) )
}
boot_plot_list <- list()
if(global_flag == "compare_regions") {
legend_title="Regions"
for (score in 1:length(unique(bootstraps_to_plot$Score))){
data <- bootstraps_to_plot[which(bootstraps_to_plot$Score==unique(bootstraps_to_plot$Score)[score]),]
boot_plot_list[[score]] <- make_boot_plot(data,"Region") + ggtitle(as.character(unique(data$Score)))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
boot_plot_list[[score]] <- boot_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
if( is.null( opt$bootPlotColors )==FALSE ) {
color_pal<-read.table( opt$bootPlotColors,header = F,sep = "\t" )
boot_plot_list[[score]] <- boot_plot_list[[score]] +
scale_color_manual( "",values = color_pal[,1] ) +
scale_fill_manual( "",values = color_pal[,2] )
}
}
}
if(global_flag == "compare_scores") {
legend_title="Scores"
for (region in 1:length(unique(bootstraps_to_plot$Region))){
data <- bootstraps_to_plot[which(bootstraps_to_plot$Region==unique(bootstraps_to_plot$Region)[region]),]
boot_plot_list[[region]] <- make_boot_plot(data,"Score") + ggtitle(as.character(unique(data$Region)))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
boot_plot_list[[score]] <- boot_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
if( is.null( opt$bootPlotColors )==FALSE ) {
color_pal<-read.table( opt$bootPlotColors,header = F,sep = "\t" )
boot_plot_list[[score]] <- boot_plot_list[[score]] +
scale_color_manual( "",values = color_pal[,1] ) +
scale_fill_manual( "",values = color_pal[,2] )
}
}
}
boot_plot_list
invisible( dev.off() )
cat( " ... Done!\n" )
boot_plot_list
```
## Wilcoxon rank-sum test plotting
```{r}
cat( as.character( Sys.time() ), "Starting Wilcoxon corrected tests plotting to the following file:\n ",paste( opt$output,"_wilcoxon.pdf\n",sep="" ) )
pdf( file = paste( opt$output,"_wilcoxon.pdf",sep="" ),width = opt$wilcoxPlotWidth,height = opt$wilcoxPlotHeight )
make_wilcox_plot<-function(x){
rec_start<-as.numeric( xaxis$xaxis_start[[1]] )
rec_end<-as.numeric( xaxis$xaxis_end[[1]] )
full <- do.call(rbind,wilcox_to_plot)
ratio <- with(x, diff(range(full$xaxis))/diff(range(full$vals)))
ggplot( x,aes( x=x$xaxis,y=x$vals ) )+
geom_rect( aes( xmin = rec_start, xmax = rec_end, ymin = -0.1, ymax =-log10( opt$wilcoxThreshold ) ), fill = "#CECECE" ) +
geom_line( color="#ff6100" )+
facet_grid( Group1 ~ Group2 )+
coord_fixed(ratio=ratio)+
scale_x_continuous( "Genomic position",breaks=xaxis$breaks,labels=xaxis$names )+
ylim(-0.1,max(full$vals))+
ylab( "-log10( Bonferroni corrected \n Wilcoxon rank-sum p-value )" )+
panel_border( size=1,remove=FALSE )+
theme( axis.text = element_text( size=11 ),
axis.text.y = element_text( size=10 ),
axis.text.x = element_text( size=10,angle=50,hjust=1,vjust=1 ),
plot.title = element_text( size = 13, face = "plain" ),
line = element_line( size=1 ),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_rect( colour = "black" ),
panel.border = element_rect( colour = "black" ),
axis.line = element_blank(),
panel.spacing = unit( 1,"line" ) )
}
wilcox_plot_list <- list()
if(global_flag == "compare_scores") {
wilcox_to_plot <- master_list[[1]]$wilcoxon
for (i in 1:length(wilcox_to_plot)) {
wilcox_plot_list[[i]] <- make_wilcox_plot(wilcox_to_plot[[i]]) + ggtitle(unique(wilcox_to_plot[[i]]$Constant))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
wilcox_plot_list[[i]] <- wilcox_plot_list[[i]] + theme( text = element_text( family = "plot_font" ) )
}
}
}
if(global_flag == "compare_regions") {
for (score in 1:length(master_list)){
wilcox_to_plot<-do.call(rbind,master_list[[score]]$wilcoxon)
wilcox_plot_list[[score]] <- make_wilcox_plot(wilcox_to_plot) + ggtitle(unique(wilcox_to_plot$Constant))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
wilcox_plot_list[[score]] <- wilcox_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
}
}
wilcox_plot_list
invisible( dev.off() )
cat( " ... Done!\n" )
wilcox_plot_list
```