|
a |
|
b/examples/Snakefile |
|
|
1 |
"""Download the required files |
|
|
2 |
""" |
|
|
3 |
|
|
|
4 |
from bpnet.dataspecs import DataSpec |
|
|
5 |
|
|
|
6 |
base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data' |
|
|
7 |
|
|
|
8 |
dataspecs = ['chip-nexus/dataspec.yml', |
|
|
9 |
'chip-seq/dataspec.yml'] |
|
|
10 |
|
|
|
11 |
# download all the required files by the DataSpec to ./data |
|
|
12 |
rule all: |
|
|
13 |
input: |
|
|
14 |
[os.path.normpath(os.path.join('data', f)) |
|
|
15 |
for ds in dataspecs |
|
|
16 |
for f in DataSpec.load(ds).list_all_files(include_peaks=True)], |
|
|
17 |
"data/mm10.subset.fa" |
|
|
18 |
|
|
|
19 |
rule chip_nexus: |
|
|
20 |
input: |
|
|
21 |
[os.path.normpath(os.path.join('data', f)) |
|
|
22 |
for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)] |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
rule chip_seq: |
|
|
26 |
input: |
|
|
27 |
[os.path.normpath(os.path.join('data', f)) |
|
|
28 |
for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)] |
|
|
29 |
|
|
|
30 |
|
|
|
31 |
# ----------------------- |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
rule download: |
|
|
35 |
"""Download an individual file |
|
|
36 |
""" |
|
|
37 |
output: |
|
|
38 |
f = 'data/{tf}/{path}' |
|
|
39 |
params: |
|
|
40 |
base_url = base_url |
|
|
41 |
shell: |
|
|
42 |
"wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}" |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
rule download_fasta: |
|
|
46 |
"""Download an individual file |
|
|
47 |
""" |
|
|
48 |
output: |
|
|
49 |
f = 'data/mm10.subset.fa.gz' |
|
|
50 |
params: |
|
|
51 |
base_url = base_url |
|
|
52 |
shell: |
|
|
53 |
"wget {params.base_url}/mm10.subset.fa.gz -O {output.f}" |
|
|
54 |
|
|
|
55 |
# extract the fasta file |
|
|
56 |
|
|
|
57 |
rule unzip: |
|
|
58 |
input: |
|
|
59 |
f = "{path}.fa.gz" |
|
|
60 |
output: |
|
|
61 |
f = "{path}.fa" |
|
|
62 |
shell: |
|
|
63 |
"zcat {input.f} > {output.f}" |