--- a +++ b/tests/test_gindexer.py @@ -0,0 +1,129 @@ +import os + +import matplotlib +import numpy as np +import pkg_resources +import pytest +import pandas as pd + +from janggu.data import GenomicIndexer + +matplotlib.use('AGG') + +def test_gindexer_short_interval(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + + gi = GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample_equalsize.bed'), + binsize=200, stepsize=200) + assert len(gi) == 4 + gi = GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample_equalsize.bed'), + binsize=180, stepsize=20) + assert len(gi) == 8 + gi = GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample_equalsize.bed'), + binsize=210, stepsize=20, zero_padding=False) + assert len(gi) == 0 + + gi = GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample_equalsize.bed'), + binsize=210, stepsize=20, zero_padding=True) + assert len(gi) == 4 + + +def test_gindexer_short_interval_with_dataframe(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + df = pd.read_csv(os.path.join(data_path, 'sample_equalsize.bed'), + sep='\t', header=None, names=['chrom', 'start', 'end']) + + gi = GenomicIndexer.create_from_file(df, + binsize=200, stepsize=200) + assert len(gi) == 4 + gi = GenomicIndexer.create_from_file(df, + binsize=180, stepsize=20) + assert len(gi) == 8 + gi = GenomicIndexer.create_from_file(df, + binsize=210, stepsize=20, + zero_padding=False) + assert len(gi) == 0 + + gi = GenomicIndexer.create_from_file(df, + binsize=210, stepsize=20, + zero_padding=True) + assert len(gi) == 4 + + +def test_gindexer_errors(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + with pytest.raises(ValueError): + GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample.bed'), + binsize=0, stepsize=50) + + with pytest.raises(ValueError): + GenomicIndexer.create_from_file(os.path.join(data_path, + 'sample.bed'), + binsize=10, stepsize=0) + with pytest.raises(ValueError): + # due to flank < 0 + GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'), + binsize=200, stepsize=50, flank=-1) + # due to unequal intervals + gi=GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'), + binsize=None, stepsize=None, flank=0) + #print(len(gi)) + #for reg in gi: + # print(reg) + GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'), + binsize=200, stepsize=200, flank=0) + +def test_gindexer_merged(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + gi = GenomicIndexer.create_from_file( + os.path.join(data_path, 'sample.bed'), binsize=200, stepsize=200) + np.testing.assert_equal(len(gi), 100) + gi2 = gi.filter_by_region(include='chr1') + gi3 = gi.filter_by_region(include='chr10') + gi4 = gi.filter_by_region(exclude='chr2') + gi5 = gi.filter_by_region(exclude='chr10') + + + np.testing.assert_equal(len(gi2), 50) + + np.testing.assert_equal(len(gi3), 0) + np.testing.assert_equal(len(gi4), 50) + np.testing.assert_equal(len(gi5), 100) + + +def test_gindexer_merged_variable_length_ranges(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + # with fixed size + gi = GenomicIndexer.create_from_file( + os.path.join(data_path, 'sample.bed'), binsize=3000, stepsize=3000, + zero_padding=False) + np.testing.assert_equal(len(gi), 6) + + iv = gi[0] + np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), + ('chr1', 15000, 18000, '+')) + iv = gi[-1] + np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), + ('chr2', 21000, 24000, '-')) + + # with variable size regions + gi = GenomicIndexer.create_from_file( + os.path.join(data_path, 'sample.bed'), binsize=3000, + stepsize=3000, zero_padding=True) + np.testing.assert_equal(len(gi), 8) + + iv = gi[0] + np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), + ('chr1', 15000, 18000, '+')) + iv = gi[-1] + np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand), + ('chr2', 24000, 25000, '-'))