56 lines (42 with data), 1.2 kB
"""Download the required files
"""
from bpnet.cli.schemas import DataSpec
base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data'
dataspecs = ['models/dataspec.yml']
SUBSET = 'subset'
# download all the required files by the DataSpec to ./data
rule all:
input:
[os.path.normpath(os.path.join('data', f))
for ds in dataspecs
for f in DataSpec.load(ds).list_all_files(include_peaks=True)],
f"data/mm10.{SUBSET}.fa",
f"data/chip-nexus/peaks.{SUBSET}.1kb.bed.gz"
# -----------------------
rule download:
"""Download an individual file
"""
output:
f = 'data/{tf}/{path}'
params:
base_url = base_url
shell:
"wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}"
rule download_fasta:
"""Download an individual file
"""
output:
f = f'data/mm10.{SUBSET}.fa.gz'
params:
base_url = base_url,
SUBSET = SUBSET
shell:
"wget {params.base_url}/mm10.{params.SUBSET}.fa.gz -O {output.f}"
# extract the fasta file
rule unzip:
input:
f = "{path}.fa.gz"
output:
f = "{path}.fa"
shell:
"zcat {input.f} > {output.f}"