a b/examples/Snakefile
1
"""Download the required files
2
"""
3
4
from bpnet.dataspecs import DataSpec
5
6
base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data'
7
8
dataspecs = ['chip-nexus/dataspec.yml',
9
             'chip-seq/dataspec.yml']
10
11
# download all the required files by the DataSpec to ./data
12
rule all:
13
    input:
14
        [os.path.normpath(os.path.join('data', f))
15
         for ds in dataspecs
16
         for f in DataSpec.load(ds).list_all_files(include_peaks=True)],
17
        "data/mm10.subset.fa"
18
19
rule chip_nexus:
20
    input:
21
        [os.path.normpath(os.path.join('data', f))
22
         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]
23
24
25
rule chip_seq:
26
    input:
27
        [os.path.normpath(os.path.join('data', f))
28
         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]
29
30
31
# -----------------------
32
33
34
rule download:
35
    """Download an individual file
36
    """
37
    output:
38
        f = 'data/{tf}/{path}'
39
    params:
40
        base_url = base_url
41
    shell:
42
        "wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}"
43
44
45
rule download_fasta:
46
    """Download an individual file
47
    """
48
    output:
49
        f = 'data/mm10.subset.fa.gz'
50
    params:
51
        base_url = base_url
52
    shell:
53
        "wget {params.base_url}/mm10.subset.fa.gz -O {output.f}"
54
55
# extract the fasta file
56
57
rule unzip:
58
    input:
59
        f = "{path}.fa.gz"
60
    output:
61
        f = "{path}.fa"
62
    shell:
63
        "zcat {input.f} > {output.f}"