--- a +++ b/examples/Snakefile @@ -0,0 +1,63 @@ +"""Download the required files +""" + +from bpnet.dataspecs import DataSpec + +base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data' + +dataspecs = ['chip-nexus/dataspec.yml', + 'chip-seq/dataspec.yml'] + +# download all the required files by the DataSpec to ./data +rule all: + input: + [os.path.normpath(os.path.join('data', f)) + for ds in dataspecs + for f in DataSpec.load(ds).list_all_files(include_peaks=True)], + "data/mm10.subset.fa" + +rule chip_nexus: + input: + [os.path.normpath(os.path.join('data', f)) + for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)] + + +rule chip_seq: + input: + [os.path.normpath(os.path.join('data', f)) + for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)] + + +# ----------------------- + + +rule download: + """Download an individual file + """ + output: + f = 'data/{tf}/{path}' + params: + base_url = base_url + shell: + "wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}" + + +rule download_fasta: + """Download an individual file + """ + output: + f = 'data/mm10.subset.fa.gz' + params: + base_url = base_url + shell: + "wget {params.base_url}/mm10.subset.fa.gz -O {output.f}" + +# extract the fasta file + +rule unzip: + input: + f = "{path}.fa.gz" + output: + f = "{path}.fa" + shell: + "zcat {input.f} > {output.f}"