87 lines (79 with data), 3.3 kB
include: 'common.snakemake'
rna_types = ['Y_RNA', 'lncRNA', 'mRNA', 'miRNA', 'piRNA', 'rRNA', 'snRNA', 'snoRNA', 'srpRNA', 'tRNA', 'tucpRNA']
def get_all_inputs(wildcards):
inputs = dict(
fasta_index=expand('{genome_dir}/fasta/genome.fa.fai',
genome_dir=config['genome_dir']),
rsem_index=expand('{genome_dir}/rsem_index/bowtie2/{rna_type}.transcripts.fa',
genome_dir=config['genome_dir'], rna_type=rna_types),
genome_index_bowtie2=expand('{genome_dir}/genome_index/bowtie2/genome.1.bt2',
genome_dir=config['genome_dir']),
bowtie2_index=expand('{genome_dir}/index/bowtie2/{rna_type}.1.bt2',
genome_dir=config['genome_dir'], rna_type=rna_types),
)
all_inputs = []
for l in inputs.values():
all_inputs += l
return all_inputs
rule all:
input:
get_all_inputs
rule fasta_index:
input:
'{genome_dir}/fasta/genome.fa'
output:
'{genome_dir}/fasta/genome.fa.fai'
shell:
'''samtools faidx {input}
'''
rule transcript_index_bowtie2:
input:
'{genome_dir}/fasta/{rna_type}.fa'
output:
bt2_1='{genome_dir}/index/bowtie2/{rna_type}.1.bt2',
bt2rev_1='{genome_dir}/index/bowtie2/{rna_type}.rev.1.bt2'
params:
output_prefix='{genome_dir}/index/bowtie2/{rna_type}'
threads:
config['threads']
shell:
'''bowtie2-build --threads {threads} {input} {params.output_prefix}
'''
rule genome_index_bowtie2:
input:
'{genome_dir}/fasta/genome.fa'
output:
bt2_1='{genome_dir}/genome_index/bowtie2/genome.1.bt2',
bt2rev_1='{genome_dir}/genome_index/bowtie2/genome.rev.1.bt2'
params:
output_prefix='{genome_dir}/genome_index/bowtie2/genome'
threads:
config['threads']
shell:
'''bowtie2-build --threads {threads} {input} {params.output_prefix}
'''
rule rsem_index_bowtie2:
input:
fasta='{genome_dir}/fasta/genome.fa',
gtf=lambda wildcards: expand('{genome_dir}/{gtf}/{rna_type}.gtf',
genome_dir=wildcards.genome_dir,
gtf={True: 'gtf_longest_transcript', False: 'gtf_by_biotype'}[config['use_longest_transcript']],
rna_type=wildcards.rna_type)
output:
chrlist='{genome_dir}/rsem_index/{aligner}/{rna_type}.chrlist',
grp='{genome_dir}/rsem_index/{aligner}/{rna_type}.grp',
idx_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.idx.fa',
transcripts_fa='{genome_dir}/rsem_index/{aligner}/{rna_type}.transcripts.fa',
bt2_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.1.bt2',
bt2_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.2.bt2',
bt2_3='{genome_dir}/rsem_index/{aligner}/{rna_type}.3.bt2',
bt2_4='{genome_dir}/rsem_index/{aligner}/{rna_type}.4.bt2',
bt2_rev_1='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.1.bt2',
bt2_rev_2='{genome_dir}/rsem_index/{aligner}/{rna_type}.rev.2.bt2'
params:
output_prefix='{genome_dir}/rsem_index/{aligner}/{rna_type}'
wildcard_constraints:
aligner='bowtie2'
shell:
'''rsem-prepare-reference --gtf {input.gtf} --bowtie2 {input.fasta} {params.output_prefix}
'''