788 lines (584 with data), 34.2 kB
#!/usr/bin/env Rscript
# BOOTSTRAPS ON DEEPTOOLS COMPUTE MATRIX OUTPUT AND PLOT
suppressMessages( library( 'argparse' ) )
suppressMessages( library( 'boot' ) )
suppressMessages( library( 'ggplot2' ) )
suppressMessages( library( 'cowplot' ) )
suppressMessages( library( 'showtext' ) )
suppressMessages( library( 'tidyverse' ) )
suppressMessages( library( 'dichromat' ) )
suppressMessages( library( 'purrr' ) )
suppressMessages( library( 'vroom' ) )
options( show.error.locations = TRUE )
#### ARGUMENTS ####
parser <- ArgumentParser(description= "dsCompareCurves assesses if multiple genomics signals ( ChIP-seq, ATAC-seq... ) are significantly different or not between conditions ( control, KO1, KO2, etc ). `dsCompareCurves` uses bootstraps and corrected Wilcoxon Rank-sum tests to do so. The input of this tool corresponds to the output of deepTools `computeMatrix --outFileNameMatrix`. If multiple region sets have been used in deepTools, one plot and tab-delimited table will be produced for each set of regions.",
usage = "dsCompareCurves --input file.txt --output results")
parser$add_argument("--input","-i", type="character", default=NULL,
help="DeepTools file obtained from computeMatrix --outFileNameMatrix. Alternatively, a .dscc file from previous dsCompareCurves runs can be provided for replotting purposes and to avoid the bootstraps computation once more.", metavar="character" )
parser$add_argument("--output", "-o", type="character", default=NULL,
help="Output prefix. Three files will be generated, a .pdf file containing the plot and a .dscc file containing the bootsraps information ( RDS file ). If a .dscc file is provided as input, only the plot will be produced as pdf.", metavar="character" )
parser$add_argument("--comparison", "-c", type="character", default=NULL,
help="When specifying 'regions' or 'scores', force a given comparison. The correct comparison to perform is otherwise automatically detected.", metavar="character" )
# nargs = '+' should be used at some point, bug changing this would break galaxy and the documentation and would require "" for each label.
parser$add_argument("--scoreLabels", type="character", default=NULL,
help="Names of the scores to be displayed on the plots. It must be provided as text seperated by semi-colons, i.e. 'Score A;Score B;Score C'.", metavar="character" )
parser$add_argument("--regionLabels", type="character", default=NULL,
help="Names of the regions to be displayed on the plots. It must be provided as text seperated by semi-colons, i.e. 'Regions A; Regions B; Regions C'.", metavar="character" )
parser$add_argument("--signalName", type="character", default="Genomic signal",
help="Name given to the signal, for instance 'H3K4me3 log2input'. Default: 'Genomic signal'", metavar="character" )
parser$add_argument("--bootstraps", "-b", type="integer", default=1000,
help="Number of bootstraps to perform. Default: 1000.", metavar="integer" )
parser$add_argument("--bootstrapsCI", type="double", default=0.95,
help="Confidence intervals (CI) threshold for bootstraps. Default: 0.95.", metavar="numeric" )
parser$add_argument("--CPU", "-p", type="integer", default=4,
help="Number of CPU to use. Default: 4.", metavar="integer" )
# parser$add_argument("--wilcoxStartBin", type="numeric", default=NULL,
# help="The start bin number used to restrict the corrected
# wilcoxon test to a given portion of the plot. An integer must be provided.
# Default: first bin.", metavar="numeric" )
# parser$add_argument("--wilcoxEndBin", type="numeric", default=NULL,
# help="The end bin number used to restrict the corrected
# wilcoxon test to a given portion of the plot. An integer must be provided.
# Default: last bin.", metavar="numeric" )
parser$add_argument("--wilcoxThreshold", type="double", default=0.05,
help="Threshold used to define significant bins on the Wilcoxon rank-sum test plot. Default: 0.05", metavar="numeric" )
parser$add_argument("--firstRegionName", type="character", default="TSS",
help="Name of the central or left region. Default: TSS", metavar="character" )
parser$add_argument("--secondRegionName", type="character", default="TES",
help="Name of the right region, only used when deeptools computeMatrix ran in scaled-regions mode. Default: TES", metavar="character" )
parser$add_argument("--bootPlotShareY", type="character", default="TRUE",
help="Given TRUE or FALSE, defines if the bootstraps plots should share the same scale on the y axis or not. Default: TRUE", metavar="character" )
parser$add_argument("--bootPlotColors", type="character", default=NULL,
help="Change the bootstraps plot color palette to a user-provided one. The file must be tab-delimited and contain for each line two HTML color codes ( #3366CC #769EF2 ). The first column corresponds to the mean color, the second column corresponds to the color of the bootstrap confidence interval shadowed area. The default color scale contains 6 colors that are color blind friendly using the dichromat R package.", metavar="character" )
parser$add_argument("--bootPlotRatio", type="double", default=0.85,
help="Changes the aspect ratio of the plot. A value < 1 results in a wide plot, a value > 1 results in a narrow plot. Default: 0.85.", metavar="numeric" )
parser$add_argument("--bootPlotWidth", type="double", default=5.2,
help="How large the bootstraps plot should be. Default: 5.2", metavar="numeric" )
parser$add_argument("--bootPlotHeight", type="double", default=3.7,
help="How tall the bootstraps plot should be. Default: 3.7", metavar="numeric" )
parser$add_argument("--wilcoxPlotWidth", type="double", default=4.6,
help="How large the Wilcoxon rank-sum test plot should be. Default: 4.6", metavar="numeric" )
parser$add_argument("--wilcoxPlotHeight", type="double", default=4.6,
help="How tall the Wilcoxon rank-sum test plot should be. Default: 4.6", metavar="numeric" )
parser$add_argument("--font", type="character", default=NULL,
help="Font used for plotting, given a TTF file. Default is usually Helvetica.", metavar="character" )
opt <- parser$parse_args()
#### SANITY CHECK ####
if ( is.null( opt$input ) ) {
print_help( opt_parser )
stop( "An input file must be supplied", call.=FALSE )
}
if ( is.null( opt$output ) ) {
print_help( opt_parser )
stop( "An output prefix must be supplied", call.=FALSE )
}
substrRight <- function( x, n ) {
substr( x, nchar( x )-n+1, nchar( x ) )
}
if ( substrRight( opt$input,5 ) == ".dscc" ) {
mode="dscc"
cat( as.character( Sys.time() ),".dscc file provided as input, skipping statistical analysis.\n" )
}else{mode="deeptools"}
#### SET WORKING DIR AND ABSOLUTE PATHS ####
opt$input <- normalizePath( opt$input )
opt$output <- paste( normalizePath( dirname( opt$output ) ),"/",basename( opt$output ),sep="" )
setwd( normalizePath( dirname( opt$output ) ) )
#### REMOVE BAD LABELS ####
#if ( opt$scoreLabels== NULL ) {
# opt$scoreLabels <- NULL
#}
#if ( opt$regionLabels=="" ) {
# opt$regionLabels <- NULL
#}
#### METADATA EXTRACTION ####
if ( mode == "deeptools" ) {
master_list <- list()
metadata <- list()
cat( as.character( Sys.time() ),"Extracting metadata from file:\n ", opt$input )
#---
regions_data <- readLines( opt$input,n = 1 )
regions_data <-strsplit( x = regions_data,split = "\t" )
regions_data<-lapply( regions_data,function( x ) strsplit( x,split=":" ) )
regions_data[[1]][[1]][1] <- gsub( pattern = "#",replacement = "", x = regions_data[[1]][[1]][1] )
even_indexes<-seq( 2,length( regions_data[[1]] )*2,2 )
odd_indexes<-seq( 1,length( regions_data[[1]] )*2,2 )
group_names<-as.character( as.data.frame( unlist( regions_data[[1]] ) )[odd_indexes,] )
group_length<-as.numeric( as.character( as.data.frame( unlist( regions_data[[1]] ) )[even_indexes,] ) )
metadata[[1]]<-group_names
metadata[[2]]<-group_length
#---
xaxis_data <- readLines( opt$input,n = 2 )[2]
xaxis_data <-strsplit( x = xaxis_data,split = "\t" )
xaxis_data <-lapply( xaxis_data,function( x ) strsplit( x,split=":" ) )
xaxis_data[[1]][[1]][1] <- gsub( pattern = "#",replacement = "", x = xaxis_data[[1]][[1]][1] )
even_indexes<-seq( 2,length( xaxis_data[[1]] )*2,2 )
odd_indexes<-seq( 1,length( xaxis_data[[1]] )*2,2 )
xaxis_names <- as.character( as.data.frame( unlist( xaxis_data[[1]] ) )[odd_indexes,] )
xaxis_values <- as.numeric( as.character( as.data.frame( unlist( xaxis_data[[1]] ) )[even_indexes,] ) )
metadata[[3]]<-xaxis_names
metadata[[4]]<-xaxis_values
#---
score_data <- readLines( opt$input,n = 3 )[3]
score_data <- unlist( strsplit( score_data,'\t' ) )
score_data<-score_data[-grep( pattern =":",x = score_data )]
metadata[["scores"]]<-unique( score_data )
names( metadata )<-c( "group_names","group_length","xaxis_names","xaxis_values","scores" )
cat( " ... Done!\n" )
}
#### SETUP INPUT FILES IN CASE A BOOTS FILE IS PROVIDED ####
if ( mode == "dscc" ) {
cat( as.character( Sys.time() ),"Extracting data from file:\n ", opt$input )
master_list <- readRDS( file=opt$input )
metadata <- master_list$metadata
master_list$metadata <- NULL
cat( " ... Done!\n" )
}
### CHANGE LABELS IF NEEDED
if ( is.null(opt$scoreLabels)==FALSE ) {
metadata$scores <- unlist(str_split(opt$scoreLabels,";"))
}
if ( is.null(opt$regionLabels)==FALSE ) {
metadata$group_names <- unlist(str_split(opt$regionLabels,";"))
}
#### LINEBREAKS FOR LONG LEGENDS ####
for (i in 1:length(metadata$group_names)){
nchar(metadata$group_names[[i]])
if(nchar(metadata$group_names[[i]]) >20 ){
metadata$group_names[[i]] <- gsub('(.{20})', "\\1\n", metadata$group_names[[i]], perl = TRUE)
}
}
for (i in 1:length(metadata$scores)){
if(nchar(metadata$scores[[i]]) >20 ){
metadata$scores[[i]] <- gsub('(.{20})', "\\1\n", metadata$scores[[i]], perl = TRUE)
}
}
#### GLOBAL FLAG DETERMINATION FOR COMPARISON ####
if(is.null(opt$comparison)){
if( length( metadata$scores )>1 & length( metadata$group_names )==1 ) {
cat( as.character( Sys.time() ),"Multiple scores detected but only one set of region found: comparing scores.\n" )
global_flag="compare_scores"
}
if( length( metadata$scores )==1 & length( metadata$group_names )>1 ) {
cat( as.character( Sys.time() ),"Multiple set of regions detected but only one score found: comparing regions.\n" )
global_flag="compare_regions"
}
if( length( metadata$scores )>1 & length( metadata$group_names )>1 ) {
cat( as.character( Sys.time() ),"Multiple scores and set of regions detected: comparing regions per score.\n" )
global_flag="compare_scores"
}
}else{
if(opt$comparison=="score" || opt$comparison=="scores" || opt$comparison=="Score" || opt$comparison=="Scores"){
cat( as.character( Sys.time() ),"--comparison is set: comparing scores.\n" )
global_flag="compare_scores"
}
if(opt$comparison=="region" || opt$comparison=="regions" || opt$comparison=="Region" || opt$comparison=="Regions"){
cat( as.character( Sys.time() ),"--comparison is set: comparing regions.\n" )
global_flag="compare_regions"
}
}
#### BOOSTRAPPING ####
if ( mode=="deeptools" ) {
bootstraps_to_plot<-list()
suppressMessages( dat <- vroom(opt$input,skip = 3,col_names = FALSE) ) #loading the input file content
nbbin <- as.numeric( ncol( dat ) / length( metadata$scores ) ) # number of bins to analyze ( a bin = a column in the file )
scores_length <- rep( x=nbbin,times=length( metadata$scores ) )
line_index_end <- c( cumsum( metadata$group_length ) )
line_index_start <- c( 1,cumsum( metadata$group_length )+1 )
col_index_end <- c( cumsum( scores_length ) )
col_index_start <- c( 1,cumsum( scores_length )+1 )
cat( as.character( Sys.time() ), "Boostraps starting with", opt$CPU,"cores, for each group:\n" )
cat( " Number of bootstraps to perform =", opt$bootstraps,"\n" )
cat( " Number of bins to analyze =", nbbin,"\n" )
for( score in 1:length( metadata$scores ) ) {
bootstrap_dfs<-list()
for ( group in 1:length( metadata$group_names ) ) {
cat( as.character( Sys.time() ), "Starting to boostrap values for the following group:\n" )
cat( " Genomic score used:",metadata$scores[[score]],"\n" )
dat_group <- dat[line_index_start[group]:line_index_end[group],]
dat_group <- dat_group[,col_index_start[score]:col_index_end[score]]
nbgene <- as.numeric( nrow( dat_group ) ) # number of genes/regions in the dataset
label <- metadata$group_names[[group]]
output.name <- opt$output
cat( " Region set used:",label,"\n" )
cat( " Number of regions to analyze:", nbgene )
sample.colmeans <- function( x, d ) {
return( apply( x[d,], 2, mean, na.rm=T ) )
}
b = boot( dat_group, sample.colmeans, R=opt$bootstraps, ncpus=opt$CPU ,parallel = "multicore" )
mini <- vector( length = nbbin )
maxi <- vector( length = nbbin )
ci_min <- ( 1-opt$bootstrapsCI )/2
ci_max <- opt$bootstrapsCI+ci_min
for ( i in 1:nbbin ) {
mini[i] <- quantile( b$t[,i], ci_min, na.rm=T ) # get CI
maxi[i] <- quantile( b$t[,i], ci_max, na.rm=T )
}
cat( "... Done!\n" )
region_name<-rep( x=label,nbbin )
score_name<-rep( x=metadata$scores[[score]],nbbin )
bin<-seq( from=1, to=nbbin, by=1 )
bootstraps_to_plot<-cbind( region_name,score_name,bin,b$t0,mini,maxi )
bootstraps_to_plot<-as.data.frame( unname( bootstraps_to_plot ) )
colnames( bootstraps_to_plot )<-c("Region","Score","Bin","Mean","Min","Max" )
for ( column in 3:ncol( bootstraps_to_plot ) ) {
bootstraps_to_plot[,column]<-as.numeric( as.character( bootstraps_to_plot[,column] ) )
}
bootstrap_dfs[[group]] <- bootstraps_to_plot
}
names( bootstrap_dfs )<- metadata$group_names
#bootstraps_to_plot <- do.call( rbind,bootstrap_dfs )
master_list[[metadata$scores[score]]] <- list()
master_list[[score]][["bootstraps"]] <- bootstrap_dfs
}
}
#### WILCOXON TESTS CORRECTED ####
if ( mode=="deeptools" ) {
#currently not implemented but it doesn't hurt to keep.
if ( is.null( opt$wilcoxStartBin )==TRUE && is.null( opt$wilcoxEndBin )==FALSE ) {
print_help( opt_parser )
stop( "Both --wilcoxStartBin and --wilcoxEndBin must be given.", call.=FALSE )
}
if ( is.null( opt$wilcoxStartBin )==FALSE && is.null( opt$wilcoxEndBin )==TRUE ) {
print_help( opt_parser )
stop( "Both --wilcoxStartBin and --wilcoxEndBin must be given.", call.=FALSE )
}
if ( is.null( opt$wilcoxStartBin )==FALSE && is.null( opt$wilcoxEndBin )==FALSE ) {
if ( opt$wilcoxStartBin > opt$wilcoxEndBin ) {
print_help( opt_parser )
stop( "--wilcoxStartBin must be superior to --wilcoxEndBin.", call.=FALSE )
}
if ( opt$wilcoxStartBin > opt$wilcoxEndBin ) {
cat( as.character( Sys.time() ), "Starting Wilcoxon rank-sum test on the following bins: from",opt$wilcoxStartBin,
"to",opt$wilcoxEndBin, "\n" )
wilcox_df <- dat[,opt$wilcoxStartBin:opt$wilcoxEndBin]
#wilcox_dfs <- lapply( bootstrap_dfs, function( x ) {x[which( x$Bin >= opt$wilcoxStartBin & x$Bin <= opt$wilcoxEndBin ),]} )
}
}else{
cat( as.character( Sys.time() ), "Starting Wilcoxon rank-sum test.\n" )
wilcox_df <- dat
}
if (global_flag=="compare_regions") {
for( score in 1:length( metadata$scores ) ) {
wilcox_groups_indexes <- paste( line_index_start[1:length( line_index_start )-1],line_index_end,sep=":" )
wilcox_comparisons <- as.data.frame( expand.grid( wilcox_groups_indexes,wilcox_groups_indexes ) )
wilcox_score_indexes <- paste( col_index_start[1:length( col_index_start )-1],col_index_end,sep=":" ) #this is type dependent
wilcox_matrix <- matrix( nrow = length( wilcox_groups_indexes ), ncol = length( wilcox_groups_indexes ) )
row.names( wilcox_matrix )<-wilcox_groups_indexes
colnames( wilcox_matrix )<-wilcox_groups_indexes
wilcox_padj_vectors <- list()
for ( comps in ( 1:nrow( wilcox_comparisons ) ) ) {
group_1_name <- as.character( wilcox_comparisons[comps,1] )
group_2_name <- as.character( wilcox_comparisons[comps,2] )
group_1_index <- unlist( strsplit( group_1_name,split = ":" ) )
group_2_index <- unlist( strsplit( group_2_name,split = ":" ) )
score_index <- unlist( strsplit( as.character(wilcox_score_indexes[score]),split = ":" ) )
group_1_data <- wilcox_df[group_1_index[1]:group_1_index[2],score_index[1]:score_index[2]]
group_2_data <- wilcox_df[group_2_index[1]:group_2_index[2],score_index[1]:score_index[2]]
wilcox_res <- vector()
for ( bins in ( 1:ncol( group_1_data ) ) ) {
wilcox_res[bins] <- wilcox.test( group_1_data[,bins][[1]],group_2_data[,bins][[1]] )$p.value
}
wilcox_adjust <- p.adjust( wilcox_res,method = "bonferroni" )
wilcox_padj_vectors[[ paste( group_1_name,group_2_name,sep="_" )]] <- wilcox_adjust
}
max_val=max( as.vector( unlist( lapply( wilcox_padj_vectors, function( x ) max( -log10( x ) ) ) ) ) )
wilcox_plot_list <- list()
wilcox_to_plot <- list()
group_names_mapping <- as.data.frame( unique( wilcox_comparisons[,1] ) )
group_names_mapping["names"] <- metadata$group_names
colnames( group_names_mapping )<-c( "indexes","names" )
for ( i in 1:length( wilcox_padj_vectors ) ) {
wilcox_to_plot[[i]]<-as.data.frame( -log10( wilcox_padj_vectors[[i]] ) )
wilcox_to_plot[[i]]["xaxis"]<-as.numeric( as.character( rownames( wilcox_to_plot[[i]] ) ) )
colnames( wilcox_to_plot[[i]] )<-c( "vals","xaxis" )
wilcox_plot_group1<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[1] )
colnames( wilcox_plot_group1 )<-"indexes"
wilcox_plot_group1<- merge( wilcox_plot_group1,group_names_mapping,by="indexes" )[1,2]
wilcox_plot_group2<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[2] )
colnames( wilcox_plot_group2 )<-"indexes"
wilcox_plot_group2<- merge( wilcox_plot_group2,group_names_mapping,by="indexes" )[1,2]
wilcox_to_plot[[i]]["Group1"]<-wilcox_plot_group1
wilcox_to_plot[[i]]["Group2"]<-wilcox_plot_group2
wilcox_to_plot[[i]]["Constant"]<-metadata$scores[[score]]
}
master_list[[score]][["wilcoxon"]] <- wilcox_to_plot
}
}
if (global_flag=="compare_scores") {
store_wilcox_scores<-list()
for( region in 1:length( metadata$group_names ) ) {
wilcox_score_indexes <- paste( col_index_start[1:length( col_index_start )-1],col_index_end,sep=":" ) #this is type dependent
wilcox_comparisons <- as.data.frame( expand.grid( wilcox_score_indexes,wilcox_score_indexes ) ) #this is type dependent
wilcox_groups_indexes <- paste( line_index_start[1:length( line_index_start )-1],line_index_end,sep=":" )
wilcox_matrix <- matrix( nrow = length( wilcox_score_indexes ), ncol = length( wilcox_score_indexes ) )
row.names( wilcox_matrix )<-wilcox_score_indexes
colnames( wilcox_matrix )<-wilcox_score_indexes
wilcox_padj_vectors <- list()
for ( comps in ( 1:nrow( wilcox_comparisons ) ) ) {
group_1_name <- as.character( wilcox_comparisons[comps,1] )
group_2_name <- as.character( wilcox_comparisons[comps,2] )
group_1_index <- unlist( strsplit( group_1_name,split = ":" ) )
group_2_index <- unlist( strsplit( group_2_name,split = ":" ) )
regions_index <- unlist( strsplit( wilcox_groups_indexes[region],split = ":" ) )
group_1_data <- wilcox_df[regions_index[1]:regions_index[2],group_1_index[1]:group_1_index[2]] #this is type dependent
group_2_data <- wilcox_df[regions_index[1]:regions_index[2],group_2_index[1]:group_2_index[2]] #this is type dependent
wilcox_res <- vector()
for ( bins in ( 1:ncol( group_1_data ) ) ) {
wilcox_res[bins] <- wilcox.test( group_1_data[,bins][[1]],group_2_data[,bins][[1]] )$p.value
}
wilcox_adjust <- p.adjust( wilcox_res,method = "bonferroni" )
wilcox_padj_vectors[[ paste( group_1_name,group_2_name,sep="_" )]] <- wilcox_adjust
}
max_val=max( as.vector( unlist( lapply( wilcox_padj_vectors, function( x ) max( -log10( x ) ) ) ) ) )
wilcox_plot_list <- list()
wilcox_to_plot <- list()
group_names_mapping <- as.data.frame( unique( wilcox_comparisons[,1] ) )
group_names_mapping["names"] <- metadata$scores #this is type dependent
colnames( group_names_mapping )<-c( "indexes","names" ) #this is type dependent
for ( i in 1:length( wilcox_padj_vectors ) ) {
wilcox_to_plot[[i]]<-as.data.frame( -log10( wilcox_padj_vectors[[i]] ) )
wilcox_to_plot[[i]]["xaxis"]<-as.numeric( as.character( rownames( wilcox_to_plot[[i]] ) ) )
colnames( wilcox_to_plot[[i]] )<-c( "vals","xaxis" )
wilcox_plot_group1<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[1] )
colnames( wilcox_plot_group1 )<-"indexes"
wilcox_plot_group1<- merge( wilcox_plot_group1,group_names_mapping,by="indexes" )[1,2]
wilcox_plot_group2<-as.data.frame( unlist( strsplit( names( wilcox_padj_vectors )[i],split = "_" ) )[2] )
colnames( wilcox_plot_group2 )<-"indexes"
wilcox_plot_group2<- merge( wilcox_plot_group2,group_names_mapping,by="indexes" )[1,2]
wilcox_to_plot[[i]]["Group1"]<-wilcox_plot_group1
wilcox_to_plot[[i]]["Group2"]<-wilcox_plot_group2
wilcox_to_plot[[i]]["Constant"]<-metadata$group_names[[region]]
}
store_wilcox_scores[[region]]<- do.call( "rbind",wilcox_to_plot )
}
for( score in 1:length( metadata$scores ) ) {
master_list[[score]][["wilcoxon"]] <- store_wilcox_scores
}
}
cat( as.character( Sys.time() ), "Wilcoxon test finished.\n" )
}
#### EXPORTATION ####
if ( mode=="deeptools" ) {
cat( as.character( Sys.time() ),"Exporting data to the following RDS object:\n ",paste( opt$output,".dscc",sep="" ) )
to_export <- master_list
to_export[["metadata"]]<-metadata
saveRDS( to_export,file = paste( opt$output,".dscc",sep="" ) )
rm(to_export)
cat( " ... Done!\n" )
}
##### X AXIS CREATION
cat( as.character( Sys.time() )," Generating the X axis for plots.",sep="" )
xaxis<-list()
linebreaker <- function( x,... ) {
gsub( '\\s','\n',x )
}
if ( metadata$xaxis_values[3]==0 ) {
#Centered region
#Start and end definition
xaxis["xaxis_start"][1] <- as.numeric( 1 ) # position
xaxis$xaxis_start[2] <- paste( ( -1 * metadata$xaxis_values[2] / 1000 ),"kb",sep = "" )
xaxis["xaxis_end"][1] <- as.numeric( max( master_list[[1]][[1]][[1]]$Bin ) ) # position
xaxis$xaxis_end[2] <- paste( ( metadata$xaxis_values[1] / 1000 ),"kb",sep = "" )
xaxis["first_region"][1] <- round( as.integer( max( master_list[[1]][[1]][[1]]$Bin) ) / 2 )
xaxis$first_region[2] <- linebreaker( opt$firstRegionName )
xaxis$second_region <- NULL
}else{
#Scaled region
xaxis["xaxis_start"][1] <- 0 # position
xaxis$xaxis_start[2] <- paste( ( -1 * metadata$xaxis_values[2] / 1000 ),"kb",sep = "" )
xaxis["xaxis_end"][1] <- as.numeric( max( master_list[[1]][[1]][[1]]$Bin ) ) # position
xaxis$xaxis_end[2] <- paste( ( metadata$xaxis_values[1] / 1000 ),"kb",sep = "" )
xaxis["first_region"] <- metadata$xaxis_values[2] / metadata$xaxis_values[4]
xaxis$first_region[2] <- linebreaker( opt$firstRegionName )
xaxis["second_region"] <- as.numeric(xaxis$xaxis_end[1]) - (metadata$xaxis_values[1]) / metadata$xaxis_values[4]
xaxis$second_region[2] <- linebreaker( opt$secondRegionName )
}
if ( metadata$xaxis_values[5]>0 ) {
xaxis["unscaled5"][1] <- (metadata$xaxis_values[2] + metadata$xaxis_values[5]) / metadata$xaxis_values[4]
xaxis$unscaled5[2] <- paste( ( -1 * metadata$xaxis_values[5] / 1000 ),"kb",sep = "" )
}
if ( metadata$xaxis_values[6]>0 ) {
xaxis["unscaled3"][1] <- as.numeric(xaxis$xaxis_end[1]) - (metadata$xaxis_values[1] + metadata$xaxis_values[6]) / metadata$xaxis_values[4]
xaxis$unscaled3[2] <- paste( ( -1 * metadata$xaxis_values[6] / 1000 ),"kb",sep = "" )
}
xaxis["breaks"]<-as.data.frame( as.numeric( c( xaxis$xaxis_start[1],
xaxis$unscaled5[1],
xaxis$first_region[1],
xaxis$second_region[1],
xaxis$unscaled3[1],
xaxis$xaxis_end[1] ) ) )
xaxis["names"]<-as.data.frame( as.character( c( xaxis$xaxis_start[2],
xaxis$unscaled5[2],
xaxis$first_region[2],
xaxis$second_region[2],
xaxis$unscaled3[2],
xaxis$xaxis_end[2] ) ) )
cat( " ... Done!\n" )
##### BOOTSTRAP PLOT
cat( as.character( Sys.time() ), "Starting Bootstraps plotting to the following file:\n ",paste( opt$output,"_bootstraps.pdf\n",sep="" ) )
pdf( file = paste( opt$output,"_bootstraps.pdf",sep="" ),width = opt$bootPlotWidth,height = opt$bootPlotHeight )
color_palette<-c( "#000000",
colorschemes$Categorical.12[[12]],
colorschemes$Categorical.12[[8]],
colorschemes$Categorical.12[[2]],
colorschemes$Categorical.12[[10]],
colorschemes$Categorical.12[[6]] )
fill_palette<-c( "#919191",
colorschemes$Categorical.12[[11]],
colorschemes$Categorical.12[[7]],
colorschemes$Categorical.12[[1]],
colorschemes$Categorical.12[[9]],
colorschemes$Categorical.12[[5]] )
boot_plot_data <- list()
for (group in 1:length(metadata$group_names)){ # One plot per region
data_frame_storage <- list()
for (score in 1:length(metadata$scores)) {
data_frame_storage[[score]] <- master_list[[score]]$bootstraps[[group]]
}
bootstraps_to_plot <- do.call( rbind,data_frame_storage)
boot_plot_data[[group]]<-bootstraps_to_plot
}
bootstraps_to_plot<-do.call("rbind", boot_plot_data)
rm(boot_plot_data)
make_boot_plot<-function(x,type){
if ( opt$bootPlotShareY == FALSE ){
ratio <- with(x, diff(range(x$Bin))/diff(c(min(x$Min), max(x$Max)))*as.numeric(opt$bootPlotRatio))
}else{
ratio <- with(bootstraps_to_plot, diff(range(bootstraps_to_plot$Bin))*as.numeric(opt$bootPlotRatio) /
diff(c(min(bootstraps_to_plot$Min), max(bootstraps_to_plot$Max) )))
}
ggplot(x,aes( x = Bin, color = x[,type], fill = x[,type] ))+
geom_ribbon( aes( ymin = Min, ymax = Max ),alpha = 0.5, linetype = 0 )+
geom_line( aes( y = Mean ) )+
scale_color_manual( legend_title, values = color_palette )+
scale_fill_manual( legend_title, values = fill_palette )+
scale_x_continuous( NULL , breaks = xaxis$breaks, labels = xaxis$names )+
{if( opt$bootPlotShareY == FALSE ) scale_y_continuous(labels = function(x) sprintf("%.2f", x)) }+
{if( opt$bootPlotShareY == TRUE ) scale_y_continuous(labels = function(x) sprintf("%.2f", x),
limits = c( min ( bootstraps_to_plot$Min ) ,max ( bootstraps_to_plot$Max )))}+
ylab( opt$signalName )+
panel_border ( size = 1, remove = FALSE )+
coord_fixed ( ratio = ratio )+
theme( axis.text.y = element_text( size = 12, color = "#383838" ),
axis.text.x = element_text( size = 12, color = "#383838" ),
axis.title.y = element_text( size = 13 ),
axis.title.x = element_text( size = 13 ),
plot.title = element_text( size = 13, face = "plain", hjust = 0.5, vjust=0.5),
legend.title = element_text( size = 12, color = "#383838" ),
legend.text = element_text( size = 12, color = "black" ),
line = element_line( size = 1 ),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.border = element_rect( colour = "black" ),
axis.line = element_blank(),
panel.spacing = unit( 1, "line" ),
panel.grid = element_blank(),
panel.background = element_blank(),
axis.ticks = element_line(color = "black", size = 0.6),
axis.ticks.length = unit( 3, "pt")
)
}
boot_plot_list <- list()
if(global_flag == "compare_regions") {
legend_title="Regions"
for (score in 1:length(unique(bootstraps_to_plot$Score))){
data <- bootstraps_to_plot[which(bootstraps_to_plot$Score==unique(bootstraps_to_plot$Score)[score]),]
boot_plot_list[[score]] <- make_boot_plot(data,"Region") + ggtitle(as.character(unique(data$Score)))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
boot_plot_list[[score]] <- boot_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
if( is.null( opt$bootPlotColors )==FALSE ) {
color_pal<-read.table( opt$bootPlotColors,header = F,sep = "\t" )
boot_plot_list[[score]] <- boot_plot_list[[score]] +
scale_color_manual( "",values = color_pal[,1] ) +
scale_fill_manual( "",values = color_pal[,2] )
}
}
}
if(global_flag == "compare_scores") {
legend_title="Scores"
for (region in 1:length(unique(bootstraps_to_plot$Region))){
data <- bootstraps_to_plot[which(bootstraps_to_plot$Region==unique(bootstraps_to_plot$Region)[region]),]
boot_plot_list[[region]] <- make_boot_plot(data,"Score") + ggtitle(as.character(unique(data$Region)))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
boot_plot_list[[score]] <- boot_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
if( is.null( opt$bootPlotColors )==FALSE ) {
color_pal<-read.table( opt$bootPlotColors,header = F,sep = "\t" )
boot_plot_list[[score]] <- boot_plot_list[[score]] +
scale_color_manual( "",values = color_pal[,1] ) +
scale_fill_manual( "",values = color_pal[,2] )
}
}
}
boot_plot_list
invisible( dev.off() )
cat( " ... Done!\n" )
##### WILCOXON PLOT
cat( as.character( Sys.time() ), "Starting Wilcoxon corrected tests plotting to the following file:\n ",paste( opt$output,"_wilcoxon.pdf\n",sep="" ) )
pdf( file = paste( opt$output,"_wilcoxon.pdf",sep="" ),width = opt$wilcoxPlotWidth,height = opt$wilcoxPlotHeight )
make_wilcox_plot<-function(x){
full[ which( full$vals == Inf ), 'vals' ] <- max( full[ which( !is.infinite( full$vals ) ), 1 ] )
rec_start<-as.numeric( xaxis$xaxis_start[[1]] )
rec_end<-as.numeric( xaxis$xaxis_end[[1]] )
ratio <- with(x, diff(range(full$xaxis))/diff(c(min(full$vals),max(full$vals, -log10( opt$wilcoxThreshold )))))
ggplot( x,aes( x = xaxis,y = vals ) )+
geom_rect( aes( xmin = rec_start, xmax = rec_end, ymin = -0.1, ymax =-log10( opt$wilcoxThreshold ) ), fill = "#CECECE" ) +
geom_line( color="#ff6100" )+
facet_grid( Group1 ~ Group2 )+
coord_fixed(ratio=ratio)+
scale_x_continuous( "Genomic position",breaks=xaxis$breaks,labels=xaxis$names )+
ylim(-0.1, max(full$vals, -log10( opt$wilcoxThreshold )))+
ylab( "-log10( Bonferroni corrected \n Wilcoxon rank-sum p-value )" )+
panel_border( size=1,remove=FALSE )+
theme( axis.text = element_text( size=11 ),
axis.text.y = element_text( size=10 ),
axis.text.x = element_text( size=10,angle=50,hjust=1,vjust=1 ),
plot.title = element_text( size = 13, face = "plain", hjust = 0.5, vjust=0.5),
line = element_line( size=1 ),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.border = element_rect( colour = "black" ),
axis.line = element_blank(),
panel.spacing = unit( 1,"line" ),
panel.grid = element_blank(),
panel.background = element_blank(),
axis.ticks = element_line(color = "black", size = 0.6),
axis.ticks.length = unit( 3, "pt")
)
}
wilcox_plot_list <- list()
if(global_flag == "compare_scores") {
wilcox_to_plot <- master_list[[1]]$wilcoxon
full <- do.call(rbind,wilcox_to_plot)
for (i in 1:length(wilcox_to_plot)) {
wilcox_plot_list[[i]] <- make_wilcox_plot(wilcox_to_plot[[i]]) + ggtitle(unique(wilcox_to_plot[[i]]$Constant))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
wilcox_plot_list[[i]] <- wilcox_plot_list[[i]] + theme( text = element_text( family = "plot_font" ) )
}
}
}
if(global_flag == "compare_regions") {
for (score in 1:length(master_list)){
wilcox_to_plot<-Reduce(rbind,master_list[[score]]$wilcoxon)
full <- wilcox_to_plot
wilcox_plot_list[[score]] <- make_wilcox_plot(wilcox_to_plot) + ggtitle(unique(wilcox_to_plot$Constant))
if ( is.null( opt$font )==FALSE ) {
font_add( family = "plot_font", regular = opt$font )
showtext_auto()
wilcox_plot_list[[score]] <- wilcox_plot_list[[score]] + theme( text = element_text( family = "plot_font" ) )
}
}
}
wilcox_plot_list
invisible( dev.off() )
cat( " ... Done!\n" )
cat( as.character( Sys.time() ), "Job finished succesfully!\n" )