[0ce940]: / tests / Snakefile

Download this file

56 lines (42 with data), 1.2 kB

"""Download the required files
"""

from bpnet.cli.schemas import DataSpec

base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data'

dataspecs = ['models/dataspec.yml']

SUBSET = 'subset'

# download all the required files by the DataSpec to ./data
rule all:
    input:
        [os.path.normpath(os.path.join('data', f))
         for ds in dataspecs
         for f in DataSpec.load(ds).list_all_files(include_peaks=True)],
        f"data/mm10.{SUBSET}.fa",
        f"data/chip-nexus/peaks.{SUBSET}.1kb.bed.gz"

# -----------------------


rule download:
    """Download an individual file
    """
    output:
        f = 'data/{tf}/{path}'
    params:
        base_url = base_url
    shell:
        "wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}"


rule download_fasta:
    """Download an individual file
    """
    output:
        f = f'data/mm10.{SUBSET}.fa.gz'
    params:
        base_url = base_url,
        SUBSET = SUBSET
    shell:
        "wget {params.base_url}/mm10.{params.SUBSET}.fa.gz -O {output.f}"


# extract the fasta file

rule unzip:
    input:
        f = "{path}.fa.gz"
    output:
        f = "{path}.fa"
    shell:
        "zcat {input.f} > {output.f}"