exSEEK / Git / [4c33d4] /exseek/config/default

Models:
DanielG/
exSEEK
Downloads: 1
[4c33d4]: / exseek / config / default_config.yaml
History
Download this file
142 lines (132 with data), 4.8 kB

# RNA types for sequential mapping in small-RNA pipeline
rna_types: [univec, rRNA, lncRNA, mature_miRNA, miRNA, mRNA, piRNA, snoRNA, 
  snRNA, srpRNA, tRNA, tucpRNA, Y_RNA]

# Define recurrent domain as domains called in fraction of samples above this value
cov_threshold: 0.05
 
# Maximum number of features to select
n_features_to_select: [10]

# Parameters for evalation of features
evaluation_features:
  classifier: logistic_regression
  classifier_params:
    logistic_regression:
      penalty: l2

# Type of counts for feature selection
#   domains_combined: combine miRNA/piRNA with long RNA domains
#   transcript: transcript-level features
#   featurecounts: gene-level features counted using featureCounts
count_method: mirna_and_long_fragments
# Define low expression value as read counts below this value
filtercount: 5
# Threshold for filtering low expression features
filterexpv: 0
# Quantification method for low expression filter
filtermethod: filtercount
# Keep features with high expression in fraction of samples above this value
filtersample: 0.2
# Imputation methods to try (set to "null" to skip imputation)
#imputation_methods: ["viper_count", "null"]
imputation_method: ["null"]
# Read depth normalization methods to try
normalization_method: ["TMM"]
# Batch effect removal methods to try (set "null" to skip batch effect removal)
batch_removal_method: ["ComBat"]
# Column index of batch effect in batch_info.txt to considier for Combat
batch_index: 1
    
# Root directory
root_dir: "."
# Directory for sequences and annotations
genome_dir: "genome/hg38"
# Temporary directory (e.g. samtools sort, sort)
temp_dir: "tmp"
# Directory for third-party tools
tools_dir: "tools"
# Directory for exSeek scripts
bin_dir: "bin"
# Directory for spike-in sequences and index
spikein_dir: "genome/hg38/spikein"
# bin path to R
# r_dir: "/usr/bin"
# Input files are clean reads
input_clean_reads: false

# Number of threads for uncompression and compression
threads_compress: 1
# Default number of threads to use
threads: 1
# alignment software to use (valie choices: bowtie, star)
aligner: bowtie2
# Remove 3'-end adaptor sequence from single-end reads
adaptor: ""
# Remove 5'-end adaptor sequence from single-end reads
adaptor_5p: ""
# Remove 3'-end adaptor sequence from the first read in a pair
adaptor1: ""
# Remove 3'-end adaptor sequence from the second read in a pair
adaptor2: ""
# Remove 5'-end adaptor sequence from the first read in a pair
adaptor1_5p: ""
# Remove 5'-end adaptor sequence from the second in a pair
adaptor2_5p: ""
# Exact number of bases to trim from 5'-end
trim_5p: 0
# Exact number of bases to trim from 3'-end
trim_3p: 0
# Trim exact number of bases after adapter trimming
trim_after_adapter: false
# Discard reads of length below this value
min_read_length: 16
# Maximum read length
max_read_length: 100
# Trim bases with quality below this value from 3'-end
min_base_quality: 30
# Trim bases with quality below this value from 5'-end
min_base_quality_5p: 30
# Trim bases with quality below this value from 3'-end
min_base_quality_3p: 30
# Quality encoding in FASTQ files
quality_base: 33
# Strandness (valid choices: forward, reverse, no)
strandness: forward
# Filter out reads with mapping quality below this value
min_mapping_quality: 0
# Only considier longest transcript for transcriptome mapping
use_longest_transcript: true
# Expected read length for mapping using STAR
star_genome_generate:
  sjdbOverhang: 100
  limitGenomeGenerateRAM: 31000000000
# Number of threads for mapping
threads_mapping: 4
# Remove duplicates for long RNA-seq before feature counting
remove_duplicates_long: true
# Input reads are paired-end
paired_end: false
# Use small RNA-seq pipeline (sequential mapping)
small_rna: true
# Remove UMI tags (leading nucleotides)
umi_tags: false
# Length of the UMI barcode
umi_length: 0
# Evaluate published biomarkers
evaluate_features_preprocess_methods: []
# Differential expression method
# Available methods: deseq2, edger_glmlrt, edger_glmqlf, edger_exact, wilcox
diffexp_method: [deseq2, edger_glmlrt]
# Count multi-mapping reads
count_multimap_reads: true
# Count overlapping features
count_overlapping_features: true

# Base URL for IGV web server
igv_base_url: http://127.0.0.1:5000

# Configuration for singularity
container:
  singularity_path: singularity
  udocker_path: udocker
  image: singularity/exseek.simg
  wrapper_dir: singularity/wrappers

# Configuration for cluster jobs
cluster:
  # Command template for submitting a job to cluster
  submit_command: 'bsub -q {cluster.queue} -J {cluster.name} -e {cluster.stderr} -o {cluster.stdout} -R {cluster.resources} -n {cluster.threads}'
  # Snakemake configuration file for cluster jobs
  config_file: config/cluster.yaml