Diff of /tests/test_data.py [000000] .. [d7cf27]

Switch to side-by-side view

--- a
+++ b/tests/test_data.py
@@ -0,0 +1,111 @@
+import os
+
+import matplotlib
+import pkg_resources
+import pytest
+
+from janggu.data import Bioseq
+from janggu.data import split_train_test
+from janggu.data import subset
+from janggu.data import view
+from janggu.data.data import _data_props
+
+matplotlib.use('AGG')
+
+
+def test_dna_props_extraction(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file,
+                                    binsize=200, stepsize=200,
+                                    order=1)
+
+    props = _data_props(dna)
+    assert 'dna' in props
+    assert props['dna']['shape'] == (200, 1, 4)
+
+    with pytest.raises(Exception):
+        _data_props((0,))
+
+
+def test_split_train_test():
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       roi=bed_file,
+                                       binsize=200, stepsize=200,
+                                       order=1, store_whole_genome=True)
+
+    traindna, testdna = split_train_test(dna, holdout_chroms='chr2')
+
+    assert len(traindna) == 50
+    assert len(testdna) == 50
+    assert len(dna) == len(traindna) + len(testdna)
+
+    traindna, testdna = split_train_test([dna, dna], holdout_chroms='chr2')
+
+    assert len(traindna[0]) == 50
+    assert len(testdna[0]) == 50
+    assert len(dna) == len(traindna[0]) + len(testdna[0])
+
+
+def test_subset_include_chrname_test():
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       roi=bed_file,
+                                       binsize=200, stepsize=200,
+                                       order=1, store_whole_genome=True)
+
+    subdna = subset(dna, include_regions='chr2')
+
+    assert len(subdna) == 50
+
+
+def test_subset_exclude_chrname_test():
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       roi=bed_file,
+                                       binsize=200, stepsize=200,
+                                       order=1, store_whole_genome=True)
+
+    subdna = subset(dna, exclude_regions='chr2')
+
+    assert len(subdna) == 50
+
+
+def test_view_bed_test():
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+    bedsub_file = os.path.join(data_path, 'scored_sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       roi=bed_file,
+                                       binsize=200, stepsize=200,
+                                       order=1, store_whole_genome=True)
+
+    subdna = view(dna, use_regions=bedsub_file)
+
+    assert len(subdna) == 4