Diff of /examples/Snakefile [000000] .. [d45a3a]

Switch to side-by-side view

--- a
+++ b/examples/Snakefile
@@ -0,0 +1,63 @@
+"""Download the required files
+"""
+
+from bpnet.dataspecs import DataSpec
+
+base_url = 'http://mitra.stanford.edu/kundaje/avsec/chipnexus/paper/data'
+
+dataspecs = ['chip-nexus/dataspec.yml',
+             'chip-seq/dataspec.yml']
+
+# download all the required files by the DataSpec to ./data
+rule all:
+    input:
+        [os.path.normpath(os.path.join('data', f))
+         for ds in dataspecs
+         for f in DataSpec.load(ds).list_all_files(include_peaks=True)],
+        "data/mm10.subset.fa"
+
+rule chip_nexus:
+    input:
+        [os.path.normpath(os.path.join('data', f))
+         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]
+
+
+rule chip_seq:
+    input:
+        [os.path.normpath(os.path.join('data', f))
+         for f in DataSpec.load('chip-nexus/dataspec.yml').list_all_files(include_peaks=True)]
+
+
+# -----------------------
+
+
+rule download:
+    """Download an individual file
+    """
+    output:
+        f = 'data/{tf}/{path}'
+    params:
+        base_url = base_url
+    shell:
+        "wget {params.base_url}/{wildcards.tf}/{wildcards.path} -O {output.f}"
+
+
+rule download_fasta:
+    """Download an individual file
+    """
+    output:
+        f = 'data/mm10.subset.fa.gz'
+    params:
+        base_url = base_url
+    shell:
+        "wget {params.base_url}/mm10.subset.fa.gz -O {output.f}"
+
+# extract the fasta file
+
+rule unzip:
+    input:
+        f = "{path}.fa.gz"
+    output:
+        f = "{path}.fa"
+    shell:
+        "zcat {input.f} > {output.f}"