Download this file

89 lines (66 with data), 2.4 kB

from glob import glob
import collections
import os
from singlecellmultiomics.utils import get_contig_list_from_fasta
"""
This workflow:
    - Runs bcftools on the supplied bam file
    - Merges results
"""
################## configuration ##################
configfile: "config.json"
# config

# Obtain contigs:
job_size = 5_000_000 # Bases per job

contigs, sizes = get_contig_list_from_fasta(config['reference_file'], with_length=True)
contig_sizes = dict(zip(contigs,sizes))


def get_target_vcf_list(wildcards):
    """
    Obtain a list of target vcf paths
    """
    global contig_sizes
    global job_size

    target_list = []
    for contig, size in contig_sizes.items():
        if size is None:
            continue

        # The bcftools -r argument is 1 based and inclusive
        target_list += ['variant_calls/TEMP/%s_%s_%s.vcf.gz' % (contig, bin_start, min(bin_start+job_size-1, size))
        for bin_start in  range(1,size+1,job_size)]


    return target_list


#[contig for contig in get_contig_list_from_fasta(config['reference_file']) if contig!='MISC_ALT_CONTIGS_SCMO']

rule all:
    input: 'variants/unfiltered_variants.vcf.gz'

rule bcftools_call:
    input:
        scbam=config['bam'],

    output:
        vcf = "variant_calls/TEMP/{contig}_{start}_{end}.vcf.gz",
        vcfindex = "variant_calls/TEMP/{contig}_{start}_{end}.vcf.gz.csi"

    log:
        stdout="log/variant_calls/{contig}_{start}_{end}.stdout",
        stderr="log/variant_calls/{contig}_{start}_{end}.stderr"

    threads: 1
    params:
        runtime="60h",
        reference=config['reference_file'],

    resources:
        mem_mb = lambda wildcards, attempt, input: attempt * 10000,
        runtime = lambda wildcards, attempt, input: attempt * 24

    shell:
        """bcftools mpileup -r {wildcards.contig}:{wildcards.start}-{wildcards.end} -Ou {input.scbam} -f {params.reference} -d 1000000  | bcftools call -mv -Oz -o {output.vcf} > {log.stdout} 2> {log.stderr};
        bcftools index {output.vcf}
        """


rule bcftools_gather:
    input:
        chr_vcfs = get_target_vcf_list

    output:
        vcf = 'variants/unfiltered_variants.vcf.gz'

    threads: 1
    resources:
        runtime=lambda wildcards, attempt, input: ( attempt * 24),
        mem_mb = 2000,
    message:
        'Merging contig VCF files'

    shell:
        "bcftools concat -Oz -o {output.vcf} {input.chr_vcfs}; bcftools index {output.vcf}"