--- a
+++ b/exseek/config/default_config.yaml
@@ -0,0 +1,141 @@
+# RNA types for sequential mapping in small-RNA pipeline
+rna_types: [univec, rRNA, lncRNA, mature_miRNA, miRNA, mRNA, piRNA, snoRNA, 
+  snRNA, srpRNA, tRNA, tucpRNA, Y_RNA]
+
+# Define recurrent domain as domains called in fraction of samples above this value
+cov_threshold: 0.05
+ 
+# Maximum number of features to select
+n_features_to_select: [10]
+
+# Parameters for evalation of features
+evaluation_features:
+  classifier: logistic_regression
+  classifier_params:
+    logistic_regression:
+      penalty: l2
+
+# Type of counts for feature selection
+#   domains_combined: combine miRNA/piRNA with long RNA domains
+#   transcript: transcript-level features
+#   featurecounts: gene-level features counted using featureCounts
+count_method: mirna_and_long_fragments
+# Define low expression value as read counts below this value
+filtercount: 5
+# Threshold for filtering low expression features
+filterexpv: 0
+# Quantification method for low expression filter
+filtermethod: filtercount
+# Keep features with high expression in fraction of samples above this value
+filtersample: 0.2
+# Imputation methods to try (set to "null" to skip imputation)
+#imputation_methods: ["viper_count", "null"]
+imputation_method: ["null"]
+# Read depth normalization methods to try
+normalization_method: ["TMM"]
+# Batch effect removal methods to try (set "null" to skip batch effect removal)
+batch_removal_method: ["ComBat"]
+# Column index of batch effect in batch_info.txt to considier for Combat
+batch_index: 1
+    
+# Root directory
+root_dir: "."
+# Directory for sequences and annotations
+genome_dir: "genome/hg38"
+# Temporary directory (e.g. samtools sort, sort)
+temp_dir: "tmp"
+# Directory for third-party tools
+tools_dir: "tools"
+# Directory for exSeek scripts
+bin_dir: "bin"
+# Directory for spike-in sequences and index
+spikein_dir: "genome/hg38/spikein"
+# bin path to R
+# r_dir: "/usr/bin"
+# Input files are clean reads
+input_clean_reads: false
+
+# Number of threads for uncompression and compression
+threads_compress: 1
+# Default number of threads to use
+threads: 1
+# alignment software to use (valie choices: bowtie, star)
+aligner: bowtie2
+# Remove 3'-end adaptor sequence from single-end reads
+adaptor: ""
+# Remove 5'-end adaptor sequence from single-end reads
+adaptor_5p: ""
+# Remove 3'-end adaptor sequence from the first read in a pair
+adaptor1: ""
+# Remove 3'-end adaptor sequence from the second read in a pair
+adaptor2: ""
+# Remove 5'-end adaptor sequence from the first read in a pair
+adaptor1_5p: ""
+# Remove 5'-end adaptor sequence from the second in a pair
+adaptor2_5p: ""
+# Exact number of bases to trim from 5'-end
+trim_5p: 0
+# Exact number of bases to trim from 3'-end
+trim_3p: 0
+# Trim exact number of bases after adapter trimming
+trim_after_adapter: false
+# Discard reads of length below this value
+min_read_length: 16
+# Maximum read length
+max_read_length: 100
+# Trim bases with quality below this value from 3'-end
+min_base_quality: 30
+# Trim bases with quality below this value from 5'-end
+min_base_quality_5p: 30
+# Trim bases with quality below this value from 3'-end
+min_base_quality_3p: 30
+# Quality encoding in FASTQ files
+quality_base: 33
+# Strandness (valid choices: forward, reverse, no)
+strandness: forward
+# Filter out reads with mapping quality below this value
+min_mapping_quality: 0
+# Only considier longest transcript for transcriptome mapping
+use_longest_transcript: true
+# Expected read length for mapping using STAR
+star_genome_generate:
+  sjdbOverhang: 100
+  limitGenomeGenerateRAM: 31000000000
+# Number of threads for mapping
+threads_mapping: 4
+# Remove duplicates for long RNA-seq before feature counting
+remove_duplicates_long: true
+# Input reads are paired-end
+paired_end: false
+# Use small RNA-seq pipeline (sequential mapping)
+small_rna: true
+# Remove UMI tags (leading nucleotides)
+umi_tags: false
+# Length of the UMI barcode
+umi_length: 0
+# Evaluate published biomarkers
+evaluate_features_preprocess_methods: []
+# Differential expression method
+# Available methods: deseq2, edger_glmlrt, edger_glmqlf, edger_exact, wilcox
+diffexp_method: [deseq2, edger_glmlrt]
+# Count multi-mapping reads
+count_multimap_reads: true
+# Count overlapping features
+count_overlapping_features: true
+
+# Base URL for IGV web server
+igv_base_url: http://127.0.0.1:5000
+
+# Configuration for singularity
+container:
+  singularity_path: singularity
+  udocker_path: udocker
+  image: singularity/exseek.simg
+  wrapper_dir: singularity/wrappers
+
+# Configuration for cluster jobs
+cluster:
+  # Command template for submitting a job to cluster
+  submit_command: 'bsub -q {cluster.queue} -J {cluster.name} -e {cluster.stderr} -o {cluster.stdout} -R {cluster.resources} -n {cluster.threads}'
+  # Snakemake configuration file for cluster jobs
+  config_file: config/cluster.yaml