[41c1e8]: / exseek / snakefiles / mapping_circ.snakemake

Download this file

131 lines (115 with data), 4.0 kB

shell.prefix('set -x;')

shell.prefix('set -x;')

with open(config['sample_id_file'], 'r') as f:
    sample_ids = f.read().split()

data_dir = config['data_dir']
output_dir = config['output_dir']
python2 = config['python2']
tools_dir = config['tools_dir']

def get_all_inputs(wildcards):
    available_inputs = dict(

    )
    enabled_inputs = list(available_inputs.keys())
    inputs = []
    for key, l in available_inputs.items():
        if key in enabled_inputs:
            inputs += l
    return inputs


rule all:
    input:
        get_all_inputs


rule map_genome:
    input:
        fastq='{output_dir}/unmapped/{sample_id}/rRNA.fastq',
        index=genome_dir + '/genome_index/bowtie2/genome'
    output:
        bam='{output_dir}/circ/map_genome/{sample_id}.bam'
    params:
        temp_dir=config['temp_dir']
    threads:
        config['threads']
    shell:
        '''bowtie2 -p {threads} --very-sensitive --score-min=C,-15,0 --mm \
            -x {input.index} -q -U {inpu.fastq} \
            | samtools view -hbuS - | samtools sort -T {params.temp_dir} - -o {output.bam}
        '''

rule get_unmapped:
    input:
        bam='{output_dir}/circ/bowtie2_1pass/{sample_id}.bam'
    output:
        bam='{output_dir}/circ/unmapped/{sample_id}.bam'
    shell:
        '''samtools view -hf 4 {input.bam} | samtools view -Sb - > {output.bam}
        '''

rule unmapped_to_anchors:
    input:
        '{output_dir}/circ/unmapped/{sample_id}.bam'
    output:
        '{output_dir}/circ/anchors/{sample_id}.fastq'
    shell:
        '''{python2} {tools_dir}/find_circ-1.2/unmapped2anchors.py {input} > {output}
        '''

rule find_circ:
    input:
        fastq='{output_dir}/circ/anchors/{sample_id}.fastq',
        index=genome_dir + '/genome_index/bowtie2/genome',
        genome_fasta=genome_dir + '/fasta/genome.fa'
    output:
        stats='{output_dir}/circ/find_circ/{sample_id}.stats',
        reads='{output_dir}/circ/find_circ/{sample_id}.fa',
        bed='{output_dir}/circ/find_circ/{sample_id}.bed'
    threads:
        config['threads']
    shell:
        '''bowtie2 -p8 --very-sensitive --score-min=C,-15,0 --reorder --mm \
            -q -U {input.fastq} -x {input.index} \
            | {python2} {tools_dir}/find_circ-1.2/find_circ.py --genome {input.genome_fasta} \
            --name={wildcards.sample_id}  --prefix={wildcards.sample_id}_ \
            --stats={output.stats} \
            --reads={output.reads} \
            > {output.bed}
        '''

rule filter_circ:
    input:
        '{output_dir}/circ/find_circ/{sample_id}.bed'
    output:
        '{output_dir}/circ/filter_circ/{sample_id}.bed'
    shell:
        '''grep CIRCULAR {input} | grep -v chrM | awk '$5>=1' \
            | grep UNAMBIGUOUS_BP | grep ANCHOR_UNIQUE \
            | {python2} {tools_dir}/find_circ-1.2/maxlength.py 100000 \
            > {output}
        '''

rule circexplorer_map_pe:
    input:
        reads1='',
        reads2=''
    params:
        index=genome_dir + '/index/star_long/',
        output_prefix=''
    shell:
        '''STAR --genomeDir /path/to/database/hg19_star \
            --readFilesIn {input.reads1} {input.reads2} \
            --runThreadN 3 \
            --chimSegmentMin 20 \
            --chimScoreMin 1 \
            --alignIntronMax 100000 \
            --outFilterMismatchNmax 4 \
            --alignTranscriptsPerReadNmax 100000 \
            --outFilterMultimapNmax 2 \
            --outFileNamePrefix {params.output_prefix} \
            --outSAMtype BAM Unsorted \
            --readFilesCommand zcat
        '''

rule circexplorer_pe:
    input:
        junction='',
        genome='',
        ref=''
    output:
        junction='',
        circexplorer=''
    shell:
        '''{tools_dir}/CIRCexplorer/circ/star_parse.py {input.junction} {output.junction}
        {tools_dir}/CIRCexplorer/circ/CIRCexplorer.py -j {output.junction} -g {input.genome} -r {input.ref} -o {output.circexplorer}
        '''