--- a +++ b/exseek/snakefiles/create_index_small.snakemake @@ -0,0 +1,86 @@ +include: 'common.snakemake' + +rna_types = ['Y_RNA', 'lncRNA', 'mRNA', 'miRNA', 'piRNA', 'rRNA', 'snRNA', 'snoRNA', 'srpRNA', 'tRNA', 'tucpRNA'] + +def get_all_inputs(wildcards): + inputs = dict( + fasta_index=expand('{genome_dir}/fasta/genome.fa.fai', + genome_dir=config['genome_dir']), + rsem_index=expand('{genome_dir}/rsem_index/bowtie2/{rna_type}.transcripts.fa', + genome_dir=config['genome_dir'], rna_type=rna_types), + genome_index_bowtie2=expand('{genome_dir}/genome_index/bowtie2/genome.1.bt2', + genome_dir=config['genome_dir']), + bowtie2_index=expand('{genome_dir}/index/bowtie2/{rna_type}.1.bt2', + genome_dir=config['genome_dir'], rna_type=rna_types), + ) + all_inputs = [] + for l in inputs.values(): + all_inputs += l + return all_inputs + +rule all: + input: + get_all_inputs + +rule fasta_index: + input: + '{genome_dir}/fasta/genome.fa' + output: + '{genome_dir}/fasta/genome.fa.fai' + shell: + '''samtools faidx {input} + ''' + +rule transcript_index_bowtie2: + input: + '{genome_dir}/fasta/{rna_type}.fa' + output: + bt2_1='{genome_dir}/index/bowtie2/{rna_type}.1.bt2', + bt2rev_1='{genome_dir}/index/bowtie2/{rna_type}.rev.1.bt2' + params: + output_prefix='{genome_dir}/index/bowtie2/{rna_type}' + threads: + config['threads'] + shell: + '''bowtie2-build --threads {threads} {input} {params.output_prefix} + ''' + +rule genome_index_bowtie2: + input: + '{genome_dir}/fasta/genome.fa' + output: + bt2_1='{genome_dir}/genome_index/bowtie2/genome.1.bt2', + bt2rev_1='{genome_dir}/genome_index/bowtie2/genome.rev.1.bt2' + params: + output_prefix='{genome_dir}/genome_index/bowtie2/genome' + threads: + config['threads'] + shell: + '''bowtie2-build --threads {threads} {input} {params.output_prefix} + ''' + +rule rsem_index_bowtie2: + input: + fasta='{genome_dir}/fasta/genome.fa', + gtf=lambda wildcards: expand('{genome_dir}/{gtf}/{rna_type}.gtf', + genome_dir=wildcards.genome_dir, + gtf={True: 'gtf_longest_transcript', False: 'gtf_by_biotype'}[config['use_longest_transcript']], + rna_type=wildcards.rna_type) + output: + chrlist='{genome_dir}/rsem_index/{aligner}/{rna_type}.chrlist', + grp='{genome_dir}/rsem_index/{aligner}/{rna_type}.grp', + idx_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.idx.fa', + transcripts_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.transcripts.fa', + bt2_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.1.bt2', + bt2_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.2.bt2', + bt2_3='{genome_dir}/rsem_index/{aligner}/{rna_type}.3.bt2', + bt2_4='{genome_dir}/rsem_index/{aligner}/{rna_type}.4.bt2', + bt2_rev_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.1.bt2', + bt2_rev_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.2.bt2' + params: + output_prefix='{genome_dir}/rsem_index/{aligner}/{rna_type}' + wildcard_constraints: + aligner='bowtie2' + shell: + '''rsem-prepare-reference --gtf {input.gtf} --bowtie2 {input.fasta} {params.output_prefix} + '''