--- a +++ b/tests/test_data.py @@ -0,0 +1,111 @@ +import os + +import matplotlib +import pkg_resources +import pytest + +from janggu.data import Bioseq +from janggu.data import split_train_test +from janggu.data import subset +from janggu.data import view +from janggu.data.data import _data_props + +matplotlib.use('AGG') + + +def test_dna_props_extraction(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + + refgenome = os.path.join(data_path, 'sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=bed_file, + binsize=200, stepsize=200, + order=1) + + props = _data_props(dna) + assert 'dna' in props + assert props['dna']['shape'] == (200, 1, 4) + + with pytest.raises(Exception): + _data_props((0,)) + + +def test_split_train_test(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + + refgenome = os.path.join(data_path, 'sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=bed_file, + binsize=200, stepsize=200, + order=1, store_whole_genome=True) + + traindna, testdna = split_train_test(dna, holdout_chroms='chr2') + + assert len(traindna) == 50 + assert len(testdna) == 50 + assert len(dna) == len(traindna) + len(testdna) + + traindna, testdna = split_train_test([dna, dna], holdout_chroms='chr2') + + assert len(traindna[0]) == 50 + assert len(testdna[0]) == 50 + assert len(dna) == len(traindna[0]) + len(testdna[0]) + + +def test_subset_include_chrname_test(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + + refgenome = os.path.join(data_path, 'sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=bed_file, + binsize=200, stepsize=200, + order=1, store_whole_genome=True) + + subdna = subset(dna, include_regions='chr2') + + assert len(subdna) == 50 + + +def test_subset_exclude_chrname_test(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + + refgenome = os.path.join(data_path, 'sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=bed_file, + binsize=200, stepsize=200, + order=1, store_whole_genome=True) + + subdna = subset(dna, exclude_regions='chr2') + + assert len(subdna) == 50 + + +def test_view_bed_test(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + bedsub_file = os.path.join(data_path, 'scored_sample.bed') + + refgenome = os.path.join(data_path, 'sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=bed_file, + binsize=200, stepsize=200, + order=1, store_whole_genome=True) + + subdna = view(dna, use_regions=bedsub_file) + + assert len(subdna) == 4