131 lines (115 with data), 4.0 kB
shell.prefix('set -x;')
shell.prefix('set -x;')
with open(config['sample_id_file'], 'r') as f:
sample_ids = f.read().split()
data_dir = config['data_dir']
output_dir = config['output_dir']
python2 = config['python2']
tools_dir = config['tools_dir']
def get_all_inputs(wildcards):
available_inputs = dict(
)
enabled_inputs = list(available_inputs.keys())
inputs = []
for key, l in available_inputs.items():
if key in enabled_inputs:
inputs += l
return inputs
rule all:
input:
get_all_inputs
rule map_genome:
input:
fastq='{output_dir}/unmapped/{sample_id}/rRNA.fastq',
index=genome_dir + '/genome_index/bowtie2/genome'
output:
bam='{output_dir}/circ/map_genome/{sample_id}.bam'
params:
temp_dir=config['temp_dir']
threads:
config['threads']
shell:
'''bowtie2 -p {threads} --very-sensitive --score-min=C,-15,0 --mm \
-x {input.index} -q -U {inpu.fastq} \
| samtools view -hbuS - | samtools sort -T {params.temp_dir} - -o {output.bam}
'''
rule get_unmapped:
input:
bam='{output_dir}/circ/bowtie2_1pass/{sample_id}.bam'
output:
bam='{output_dir}/circ/unmapped/{sample_id}.bam'
shell:
'''samtools view -hf 4 {input.bam} | samtools view -Sb - > {output.bam}
'''
rule unmapped_to_anchors:
input:
'{output_dir}/circ/unmapped/{sample_id}.bam'
output:
'{output_dir}/circ/anchors/{sample_id}.fastq'
shell:
'''{python2} {tools_dir}/find_circ-1.2/unmapped2anchors.py {input} > {output}
'''
rule find_circ:
input:
fastq='{output_dir}/circ/anchors/{sample_id}.fastq',
index=genome_dir + '/genome_index/bowtie2/genome',
genome_fasta=genome_dir + '/fasta/genome.fa'
output:
stats='{output_dir}/circ/find_circ/{sample_id}.stats',
reads='{output_dir}/circ/find_circ/{sample_id}.fa',
bed='{output_dir}/circ/find_circ/{sample_id}.bed'
threads:
config['threads']
shell:
'''bowtie2 -p8 --very-sensitive --score-min=C,-15,0 --reorder --mm \
-q -U {input.fastq} -x {input.index} \
| {python2} {tools_dir}/find_circ-1.2/find_circ.py --genome {input.genome_fasta} \
--name={wildcards.sample_id} --prefix={wildcards.sample_id}_ \
--stats={output.stats} \
--reads={output.reads} \
> {output.bed}
'''
rule filter_circ:
input:
'{output_dir}/circ/find_circ/{sample_id}.bed'
output:
'{output_dir}/circ/filter_circ/{sample_id}.bed'
shell:
'''grep CIRCULAR {input} | grep -v chrM | awk '$5>=1' \
| grep UNAMBIGUOUS_BP | grep ANCHOR_UNIQUE \
| {python2} {tools_dir}/find_circ-1.2/maxlength.py 100000 \
> {output}
'''
rule circexplorer_map_pe:
input:
reads1='',
reads2=''
params:
index=genome_dir + '/index/star_long/',
output_prefix=''
shell:
'''STAR --genomeDir /path/to/database/hg19_star \
--readFilesIn {input.reads1} {input.reads2} \
--runThreadN 3 \
--chimSegmentMin 20 \
--chimScoreMin 1 \
--alignIntronMax 100000 \
--outFilterMismatchNmax 4 \
--alignTranscriptsPerReadNmax 100000 \
--outFilterMultimapNmax 2 \
--outFileNamePrefix {params.output_prefix} \
--outSAMtype BAM Unsorted \
--readFilesCommand zcat
'''
rule circexplorer_pe:
input:
junction='',
genome='',
ref=''
output:
junction='',
circexplorer=''
shell:
'''{tools_dir}/CIRCexplorer/circ/star_parse.py {input.junction} {output.junction}
{tools_dir}/CIRCexplorer/circ/CIRCexplorer.py -j {output.junction} -g {input.genome} -r {input.ref} -o {output.circexplorer}
'''