[d45a3a]: / examples / Snakefile

Download this file

64 lines (48 with data), 1.5 kB

"""Download the required files
"""

from bpnet.dataspecs import DataSpec

base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data'

dataspecs = ['chip-nexus/dataspec.yml',
             'chip-seq/dataspec.yml']

# download all the required files by the DataSpec to ./data
rule all:
    input:
        [os.path.normpath(os.path.join('data', f))
         for ds in dataspecs
         for f in DataSpec.load(ds).list_all_files(include_peaks=True)],
        "data/mm10.subset.fa"

rule chip_nexus:
    input:
        [os.path.normpath(os.path.join('data', f))
         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]


rule chip_seq:
    input:
        [os.path.normpath(os.path.join('data', f))
         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]


# -----------------------


rule download:
    """Download an individual file
    """
    output:
        f = 'data/{tf}/{path}'
    params:
        base_url = base_url
    shell:
        "wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}"


rule download_fasta:
    """Download an individual file
    """
    output:
        f = 'data/mm10.subset.fa.gz'
    params:
        base_url = base_url
    shell:
        "wget {params.base_url}/mm10.subset.fa.gz -O {output.f}"

# extract the fasta file

rule unzip:
    input:
        f = "{path}.fa.gz"
    output:
        f = "{path}.fa"
    shell:
        "zcat {input.f} > {output.f}"