[41c1e8]: / exseek / snakefiles / create_index_small.snakemake

Download this file

87 lines (79 with data), 3.3 kB

include: 'common.snakemake'

rna_types = ['Y_RNA', 'lncRNA', 'mRNA', 'miRNA', 'piRNA', 'rRNA', 'snRNA', 'snoRNA', 'srpRNA', 'tRNA', 'tucpRNA']

def get_all_inputs(wildcards):
    inputs = dict(
        fasta_index=expand('{genome_dir}/fasta/genome.fa.fai',
            genome_dir=config['genome_dir']),
        rsem_index=expand('{genome_dir}/rsem_index/bowtie2/{rna_type}.transcripts.fa',
            genome_dir=config['genome_dir'], rna_type=rna_types),
        genome_index_bowtie2=expand('{genome_dir}/genome_index/bowtie2/genome.1.bt2',
            genome_dir=config['genome_dir']),
        bowtie2_index=expand('{genome_dir}/index/bowtie2/{rna_type}.1.bt2',
            genome_dir=config['genome_dir'], rna_type=rna_types),
    )
    all_inputs = []
    for l in inputs.values():
        all_inputs += l
    return all_inputs

rule all:
    input:
        get_all_inputs

rule fasta_index:
    input:
        '{genome_dir}/fasta/genome.fa'
    output:
        '{genome_dir}/fasta/genome.fa.fai'
    shell:
        '''samtools faidx {input}
        '''

rule transcript_index_bowtie2:
    input:
        '{genome_dir}/fasta/{rna_type}.fa'
    output:
        bt2_1='{genome_dir}/index/bowtie2/{rna_type}.1.bt2',
        bt2rev_1='{genome_dir}/index/bowtie2/{rna_type}.rev.1.bt2'
    params:
        output_prefix='{genome_dir}/index/bowtie2/{rna_type}'
    threads:
        config['threads']
    shell:
        '''bowtie2-build --threads {threads} {input} {params.output_prefix}
        '''

rule genome_index_bowtie2:
    input:
        '{genome_dir}/fasta/genome.fa'
    output:
        bt2_1='{genome_dir}/genome_index/bowtie2/genome.1.bt2',
        bt2rev_1='{genome_dir}/genome_index/bowtie2/genome.rev.1.bt2'
    params:
        output_prefix='{genome_dir}/genome_index/bowtie2/genome'
    threads:
        config['threads']
    shell:
        '''bowtie2-build --threads {threads} {input} {params.output_prefix}
        '''

rule rsem_index_bowtie2:
    input:
        fasta='{genome_dir}/fasta/genome.fa',
        gtf=lambda wildcards: expand('{genome_dir}/{gtf}/{rna_type}.gtf',
            genome_dir=wildcards.genome_dir, 
            gtf={True: 'gtf_longest_transcript', False: 'gtf_by_biotype'}[config['use_longest_transcript']],
            rna_type=wildcards.rna_type)
    output:
        chrlist='{genome_dir}/rsem_index/{aligner}/{rna_type}.chrlist',
        grp='{genome_dir}/rsem_index/{aligner}/{rna_type}.grp',
        idx_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.idx.fa',
        transcripts_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.transcripts.fa',
        bt2_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.1.bt2',
        bt2_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.2.bt2',
        bt2_3='{genome_dir}/rsem_index/{aligner}/{rna_type}.3.bt2',
        bt2_4='{genome_dir}/rsem_index/{aligner}/{rna_type}.4.bt2',
        bt2_rev_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.1.bt2',
        bt2_rev_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.2.bt2'
    params:
        output_prefix='{genome_dir}/rsem_index/{aligner}/{rna_type}'
    wildcard_constraints:
        aligner='bowtie2'
    shell:
        '''rsem-prepare-reference --gtf {input.gtf} --bowtie2 {input.fasta} {params.output_prefix}
        '''