--- a +++ b/tests/test_coverage.py @@ -0,0 +1,2605 @@ +import os +from itertools import product + +import matplotlib +matplotlib.use('AGG') # pylint: disable= + +import numpy as np +import pandas +import pkg_resources +import pytest +from pybedtools import BedTool + +from janggu.data import Bioseq +from janggu.data import Cover +from janggu.data import Transpose +from janggu.data import GenomicIndexer +from janggu.data import plotGenomeTrack +from janggu.data import LineTrack +from janggu.data import SeqTrack +from janggu.data import HeatTrack + + +def test_channel_last_first(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bwfile_ = os.path.join(data_path, "sample.bw") + + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=1, + binsize=200, + roi=bed_file, + store_whole_genome=True, + storage='ndarray') + assert cover.shape == (100, 200, 1, 1) + assert cover[0].shape == (1, 200, 1, 1) + cover1 = cover + + cover = Transpose(Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=1, + binsize=200, + roi=bed_file, + store_whole_genome=True, + storage='ndarray'), axis=(0, 3, 2, 1)) + assert cover.shape == (100, 1, 1, 200) + assert cover[0].shape == (1, 1, 1, 200) + + np.testing.assert_equal(cover1[0], np.transpose(cover[0], (0, 3, 2, 1))) + + +def test_cover_roi_binsize_padding(tmpdir): + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample_equalsize.bed') + print(pandas.read_csv(bed_file, + sep='\t', header=None, + names=['chrom', 'start', 'end', + 'name', 'score', 'strand'])) + + roi_file = os.path.join(data_path, "sample.bed") + roi = pandas.read_csv(roi_file, + sep='\t', header=None, + names=['chrom', 'start', 'end', + 'name', 'score', 'strand']) + + roi.end.iloc[0] += 12 + roi.end.iloc[1] += 111 + print(roi) + + with pytest.raises(ValueError): + # error due to binsize not being a multiple of resolution + Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=30, + stepsize=30, + store_whole_genome=True, + cache=False, resolution=7) + + with pytest.raises(ValueError): + # interval starts must align with resolution intervals + rroi = roi.copy() + rroi.start += 1 + Cover.create_from_bed('test', + bedfiles=bed_file, + roi=rroi, binsize=30, + stepsize=30, + store_whole_genome=True, + cache=False, resolution=30) + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + storage=store, + cache=False, resolution=10) + assert len(cov) == 68 + assert cov.shape == (68, 30, 1, 1) + [c for c in cov] + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + cache=False, resolution=3) + assert len(cov) == 68 + assert cov.shape == (68, 100, 1, 1) + [c for c in cov] + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + storage=store, + cache=False, resolution=3) + assert len(cov) == 68 + assert cov.shape == (68, 100, 1, 1) + [c for c in cov] + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + storage=store, + cache=False, resolution=3) + assert len(cov) == 68 + assert cov.shape == (68, 100, 1, 1) + [c for c in cov] + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + storage=store, + cache=False, resolution=100) + assert len(cov) == 68 + assert cov.shape == (68, 3, 1, 1) + [c for c in cov] + for swg, store in product([True, False], ['ndarray', 'sparse']): + cov = Cover.create_from_bed('test', + bedfiles=bed_file, + roi=roi, binsize=300, + stepsize=300, + store_whole_genome=swg, + cache=False, resolution=100, + storage=store, + zero_padding=False) + assert len(cov) == 66 + assert cov.shape == (66, 3, 1, 1) + [c for c in cov] + + bwfile_ = os.path.join(data_path, "sample.bw") + + for swg, store in product([True, False], ['ndarray', 'sparse']): + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=100, + binsize=300, + roi=roi, + storage=store, + store_whole_genome=swg) + assert len(cover) == 68 + assert cover.shape == (68, 3, 1, 1) + [c for c in cover] + for swg, store in product([True, False], ['ndarray', 'sparse']): + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=100, + binsize=300, + roi=roi, zero_padding=False, + storage=store, + store_whole_genome=swg) + assert len(cover) == 66 + assert cover.shape == (66, 3, 1, 1) + [c for c in cover] + + bamfile_ = os.path.join(data_path, "sample.bam") + for swg, store in product([True, False], ['ndarray', 'sparse']): + cover = Cover.create_from_bam( + 'test', + bamfile_, + resolution=100, + binsize=300, + roi=roi, + stranded=False, + storage=store, + store_whole_genome=swg) + assert len(cover) == 68 + assert cover.shape == (68, 3, 1, 1) + [c for c in cover] + for swg, store in product([True, False], ['ndarray', 'sparse']): + cover = Cover.create_from_bam( + 'test', + bamfile_, + resolution=100, + binsize=300, + roi=roi, zero_padding=False, + stranded=False, + storage=store, + store_whole_genome=swg) + assert len(cover) == 66 + assert cover.shape == (66, 3, 1, 1) + [c for c in cover] + +def test_cover_export_bigwig(tmpdir): + path = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bwfile_ = os.path.join(data_path, "sample.bw") + + for resolution in [1, 50]: + for storage in [True, False]: + print('resolution=', resolution) + print('store_whole_genome', storage) + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=resolution, + binsize=200, + roi=bed_file, + store_whole_genome=storage, + storage='ndarray') + + cover.export_to_bigwig(output_dir=path) + + cov2 = Cover.create_from_bigwig('test', + bigwigfiles='{path}/{name}.{sample}.bigwig'.format( + path=path, name=cover.name, + sample=cover.conditions[0]), + resolution=resolution, + binsize=200, + roi=bed_file, + store_whole_genome=storage, + storage='ndarray') + + assert cover.shape == (100, 200 // resolution, 1, 1) + assert cover.shape == cov2.shape + np.testing.assert_allclose(cover[:].sum(), 1044.0 / resolution) + np.testing.assert_allclose(cov2[:].sum(), 1044.0 / resolution) + + +def test_bam_genomic_interval_access(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bamfile_ = os.path.join(data_path, "sample.bam") + + for reso, shift, storage in product([1, 50], [0, 1], [True, False]): + cover = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + flank=0, + storage='ndarray', + store_whole_genome=storage, + resolution=reso) + + for i in range(len(cover)): + print('storage :',storage,'/ resolution :',reso,'/ shift :',shift) + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, + axis=1), cover[cover.gindexer[i]]) + + chrom, start, end, strand = cover.gindexer[i].chrom, \ + cover.gindexer[i].start, \ + cover.gindexer[i].end, \ + cover.gindexer[i].strand + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, axis=1), + cover[chrom, start, end, strand]) + + np.testing.assert_equal(cover[chrom, start, end, strand], + cover[chrom, start-1, end+1, strand][:, 1:-1, :, :]) + if shift != 0: + start += shift * reso + end += shift * reso + + if strand != '-': + gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:] + np.testing.assert_equal(cover[i][:, shift:,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 2, 1))[:, :, 0, :, :]) + else: + gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:] + np.testing.assert_equal(cover[i][:, :-shift,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 2, 1))[:, :, 0, :, :]) + + +def test_bigwig_genomic_interval_access(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bamfile_ = os.path.join(data_path, "sample.bw") + + for reso, shift, storage in product([1, 50], [0, 1], [True, False]): + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bamfile_, + roi=bed_file, + flank=0, + storage='ndarray', + store_whole_genome=storage, + resolution=reso) + + for i in range(len(cover)): + print('storage :',storage,'/ resolution :',reso,'/ shift :',shift) + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, + axis=1), cover[cover.gindexer[i]]) + + chrom, start, end, strand = cover.gindexer[i].chrom, \ + cover.gindexer[i].start, \ + cover.gindexer[i].end, \ + cover.gindexer[i].strand + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, axis=1), + cover[chrom, start, end, strand]) + + if shift != 0: + start += shift * reso + end += shift * reso + + if strand != '-': + gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:] + np.testing.assert_equal(cover[i][:, shift:,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :]) + else: + gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:] + np.testing.assert_equal(cover[i][:, :-shift,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :]) + + +def test_bed_genomic_interval_access(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bamfile_ = os.path.join(data_path, "sample.bed") + + + for reso, shift, storage in product([1, 50], [0, 1], [True, False]): + cover = Cover.create_from_bed( + 'test', + bedfiles=bamfile_, + roi=bed_file, + flank=0, + storage='ndarray', + store_whole_genome=storage, + resolution=reso) + + for i in range(len(cover)): + print('storage :',storage,'/ resolution :',reso,'/ shift :',shift) + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, + axis=1), cover[cover.gindexer[i]]) + + chrom, start, end, strand = cover.gindexer[i].chrom, \ + cover.gindexer[i].start, \ + cover.gindexer[i].end, \ + cover.gindexer[i].strand + + np.testing.assert_equal(np.repeat(cover[i], + cover.garray.resolution, axis=1), + cover[chrom, start, end, strand]) + + if shift != 0: + start += shift * reso + end += shift * reso + + if strand != '-': + gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:] + np.testing.assert_equal(cover[i][:, shift:,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :]) + else: + gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:] + np.testing.assert_equal(cover[i][:, :-shift,:, :], + gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :]) + + +def test_bam_inferred_binsize(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive.bed") + + bamfile_ = os.path.join(data_path, "sample.bam") + + cover = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + flank=0, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 200, 2, 1) + + +def test_bigwig_inferred_binsize(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive.bed") + + bwfile_ = os.path.join(data_path, "sample.bw") + + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=1, + roi=bed_file, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + + +def test_bed_unsync_roi_targets(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive.bed") + bed_shift_file = os.path.join(data_path, "positive_shift.bed") + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=None, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 1, 1, 1) + assert cover[:].sum() == 1 + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=50, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 4, 1, 1) + assert cover[:].sum() == 1 + + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=50, + store_whole_genome=True, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 4, 1, 1) + assert cover[:].sum() == 1 + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=1, + store_whole_genome=False, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + assert cover[0].sum() == 49 + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=1, + store_whole_genome=True, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + assert cover[:].sum() == 49 + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=bed_file, + resolution=1, + store_whole_genome=True, + storage='ndarray', minoverlap=.5) + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + assert cover[:].sum() == 0 + + # check bed file loading without roi + cover_ = Cover.create_from_bed( + 'test', + bedfiles=bed_shift_file, + roi=None, + resolution=1, + store_whole_genome=True, + storage='ndarray', minoverlap=.5) + + cover_.gindexer = cover.gindexer + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + assert cover[:].sum() == 0 + +def test_bed_inferred_binsize(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive.bed") + + + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=bed_file, + resolution=1, + storage='ndarray') + assert len(cover) == 25 + assert cover.shape == (25, 200, 1, 1) + + bed_file = os.path.join(data_path, "positive_gap.bed") + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=bed_file, + resolution=1, + store_whole_genome=True, + storage='ndarray') + assert len(cover) == 2 + assert cover.shape == (2, 50, 1, 1) + +def test_bed_overreaching_ends_whole_genome(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "bed_test.bed") + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=bed_file, + binsize=2, + flank=20, + resolution=1, + store_whole_genome=True, + storage=store) + assert len(cover) == 9 + assert cover.shape == (9, 2+2*20, 1, 1) + np.testing.assert_equal(cover[0].sum(), 18) + np.testing.assert_equal(cover[:].sum(), 9*18) + + +def test_bed_overreaching_ends_part_genome(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "bed_test.bed") + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=bed_file, + binsize=2, + flank=2, + resolution=1, + store_whole_genome=False, + storage=store) + assert len(cover) == 9 + assert cover.shape == (9, 2+2*2, 1, 1) + np.testing.assert_equal(cover[0].sum(), 4) + np.testing.assert_equal(cover[:].sum(), 6*7 + 8) + + +def test_bed_store_whole_genome_option(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive_shift.bed") + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=bed_file, + store_whole_genome=True, + storage='ndarray') + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bed_file, + roi=bed_file, + store_whole_genome=False, + storage='ndarray') + + assert len(cover1) == 1 + assert len(cover2) == len(cover1) + assert cover1.shape == (1, 49, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], np.ones(cover1.shape)) + np.testing.assert_equal(cover2[:], np.ones(cover1.shape)) + + +def test_bed_store_whole_genome_option_dataframe(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + # as pd.dataframe + roi = pandas.read_csv(bed_file, + sep='\t', header=None, + names=['chrom', 'start', 'end', + 'name', 'score', 'strand']) + + print(roi.head()) + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=True, + cache=False, + storage='ndarray') + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=False, + cache=True, + storage='ndarray') + + print(cover1.gindexer[0]) + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + np.testing.assert_equal(cover1[:], np.ones(cover1.shape)) + + # as bedtool + roi = BedTool(bed_file) + print(roi) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=True, + storage='ndarray') + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=False, + cache=True, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + np.testing.assert_equal(cover1[:], np.ones(cover1.shape)) + + # as interval list + roi = [iv for iv in BedTool(bed_file)] + print(roi) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=True, + storage='ndarray') + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bed_file, + roi=roi, + binsize=200, stepsize=200, + store_whole_genome=False, + cache=True, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + np.testing.assert_equal(cover1[:], np.ones(cover1.shape)) + + +def test_bigwig_store_whole_genome_option(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + bwfile_ = os.path.join(data_path, "sample.bw") + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile_, + roi=bed_file, + store_whole_genome=False, + binsize=200, stepsize=200, + storage='ndarray') + cover3 = Cover.create_from_bigwig( + 'test3', + bigwigfiles=bwfile_, + roi=bed_file, + store_whole_genome=False, + binsize=200, stepsize=200, + nan_to_num=False, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 1044.0 + assert cover3[:].sum() == 1044.0 + + +def test_bigwig_store_whole_genome_option_dataframe(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + bwfile_ = os.path.join(data_path, "sample.bw") + + # as dataframe + roi = pandas.read_csv(bed_file, + sep='\t', header=None, names=['chrom', 'start', 'end', 'name', 'score', 'strand']) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + cover3 = Cover.create_from_bigwig( + 'test3', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + nan_to_num=False, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 1044.0 + assert cover3[:].sum() == 1044.0 + + # as bedtool + roi = BedTool(bed_file) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + cover3 = Cover.create_from_bigwig( + 'test3', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + nan_to_num=False, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 1044.0 + assert cover3[:].sum() == 1044.0 + + # as list of intervals + roi = [iv for iv in roi] + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + cover3 = Cover.create_from_bigwig( + 'test3', + bigwigfiles=bwfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + nan_to_num=False, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 1044.0 + assert cover3[:].sum() == 1044.0 + + + +def test_bam_store_whole_genome_option_dataframe(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + bamfile_ = os.path.join(data_path, "sample.bam") + + # as dataframe + roi = pandas.read_csv(bed_file, + sep='\t', header=None, names=['chrom', 'start', 'end', 'name', 'score', 'strand']) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 2, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 29. + + # as bedtool + roi = BedTool(bed_file) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 2, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 29. + + # as list of intervals + roi = [iv for iv in roi] + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bamfile_, + roi=roi, + store_whole_genome=False, + binsize=200, stepsize=200, + cache=True, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 2, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 29. + + +def test_bam_store_whole_genome_option(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + bamfile_ = os.path.join(data_path, "sample.bam") + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + store_whole_genome=True, + binsize=200, stepsize=200, + storage='ndarray') + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bamfile_, + roi=bed_file, + store_whole_genome=False, + binsize=200, stepsize=200, + storage='ndarray') + + assert len(cover1) == 100 + assert len(cover2) == len(cover1) + assert cover1.shape == (100, 200, 2, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + assert cover1[:].sum() == 29. + + +def test_cover_from_bam_sanity(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bamfile_ = os.path.join(data_path, "sample.bam") + cover = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + flank=0, + storage='ndarray') + cover[0] + + with pytest.raises(IndexError): + # not interable + cover[1.2] + + cov2 = Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + storage='ndarray', + store_whole_genome=True) + + assert len(cover.gindexer) == len(cover.garray.handle['data']) + assert len(cov2.garray.handle) != len(cover.garray.handle['data']) + + with pytest.raises(Exception): + # name must be a string + Cover.create_from_bam( + 1.2, + bamfiles=bamfile_, + roi=bed_file, + binsize=1, stepsize=1, + storage='ndarray') + + with pytest.raises(Exception): + # bamfile does not exist + Cover.create_from_bam( + 'test', + bamfiles="", + roi=bed_file, + binsize=1, stepsize=1, + flank=-1, + storage='ndarray') + with pytest.raises(Exception): + # bamfile does not exist + Cover.create_from_bam( + 'test', + bamfiles=[], + roi=bed_file, + binsize=1, stepsize=1, + flank=-1, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=1, stepsize=1, + flank=-1, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=1, stepsize=-1, + flank=0, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=-1, stepsize=1, + flank=0, + storage='ndarray') + + with pytest.warns(FutureWarning): + Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + storage='ndarray', + overwrite=True) + + with pytest.warns(FutureWarning): + Cover.create_from_bam( + 'test', + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + storage='ndarray', + datatags=['asdf']) + + +def test_cover_from_bigwig_sanity(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "sample.bed") + + bwfile_ = os.path.join(data_path, "sample.bw") + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=50, + resolution=50, + flank=0, + storage='ndarray') + cover[0] + assert len(cover.gindexer) == 394 + assert len(cover.garray.handle['data']) == 394 + + cover = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=50, + resolution=50, + flank=0, + storage='ndarray', + store_whole_genome=True) + cover[0] + assert len(cover.gindexer) == 394 + assert len(cover.garray.handle) == 2 + cov2 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + resolution=7, + storage='ndarray', + store_whole_genome=True) + + assert len(cov2.garray.handle) == 2 + assert cov2['chr1', 100, 200].shape == (1, 100, 1, 1) + + with pytest.raises(Exception): + cov2.shape + with pytest.raises(Exception): + cov2[0] + + with pytest.raises(Exception): + # name must be a string + Cover.create_from_bigwig( + 1.2, + bigwigfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=1, + storage='ndarray') + + with pytest.raises(Exception): + Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=1, + flank=-1, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=-1, + flank=0, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=-1, stepsize=1, + flank=0, + storage='ndarray') + + with pytest.warns(FutureWarning): + Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + flank=0, + storage='ndarray', + overwrite=True) + with pytest.warns(FutureWarning): + Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + storage='ndarray', + datatags=['asdf']) + + +def test_cover_from_bed_sanity(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, 'sample.bed') + + bwfile_ = os.path.join(data_path, "scored_sample.bed") + cover = Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=50, + resolution=50, + flank=0, + storage='ndarray') + cover[0] + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=50, + resolution=50, + storage='ndarray') + + with pytest.raises(Exception): + # name must be a string + Cover.create_from_bed( + 1.2, + bedfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=1, + storage='ndarray') + + with pytest.raises(Exception): + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=1, + flank=-1, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=1, stepsize=-1, + flank=0, + storage='ndarray') + with pytest.raises(Exception): + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=-1, stepsize=1, + flank=0, + storage='ndarray') + with pytest.raises(Exception): + csvfile = os.path.join(data_path, 'ctcf_sample.csv') + # must be a bed file + Cover.create_from_bed( + 'test', + bedfiles=csvfile, + roi=bed_file, + binsize=1, stepsize=1, + storage='ndarray') + with pytest.warns(FutureWarning): + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + flank=0, + storage='ndarray', + overwrite=True) + with pytest.warns(FutureWarning): + Cover.create_from_bed( + 'test', + bedfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + storage='ndarray', + datatags=['asdf']) + + +def test_cover_bam_unstranded(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample.bam") + gsfile_ = os.path.join(data_path, 'sample.chrom.sizes') + + content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'], + index_col='chr') + + gsize = content.to_dict()['length'] + + bed_file = os.path.join(data_path, "sample.bed") + + + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + stranded=False) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 200, 1, 1)) + + # the region is read relative to the forward strand + # read on the reverse strand + val = np.where(cover[4] == 1) + np.testing.assert_equal(cover[4].sum(), 1.) + np.testing.assert_equal(val[1][0], 179) # pos + + # two reads on the forward strand + val = np.where(cover[13] == 1) + np.testing.assert_equal(cover[13].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([162, 178])) # pos + + # the region is read relative to the reverse strand + # for index 50 + # read on the reverse strand + val = np.where(cover[52] == 1) + np.testing.assert_equal(cover[52].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([9, 89])) # pos + + # two reads on the forward strand + val = np.where(cover[96] == 1) + np.testing.assert_equal(cover[96].sum(), 1.) + np.testing.assert_equal(val[1], np.asarray([25])) # pos + + +def test_cover_bam_paired_5pend(): + # sample2.bam contains paired end examples, + # unmapped examples, unmapped mate and low quality example + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample2.bam") + + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + stranded=False, + pairedend='5pend', + min_mapq=30, + store_whole_genome=True) + + assert cover.garray.handle['ref'].sum() == 4, cover.garray.handle['ref'] + + # the read starts at index 6 and tlen is 39 + assert cover.garray.handle['ref'][6, 0, 0] == 1 + # another read maps to index 24 + assert cover.garray.handle['ref'][24, 0, 0] == 1 + + +def test_cover_bam_paired_midpoint(): + # sample2.bam contains paired end examples, + # unmapped examples, unmapped mate and low quality example + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample2.bam") + + + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + stranded=False, + pairedend='midpoint', + min_mapq=30, + store_whole_genome=True) + + assert cover.garray.handle['ref'].sum() == 2, cover.garray.handle['ref'] + print(cover.garray.handle['ref']) + # the read starts at index 6 and tlen is 39 + assert cover.garray.handle['ref'][6 + 39//2, 0, 0] == 1 + # another read maps to index 34 + assert cover.garray.handle['ref'][34, 0, 0] == 1 + + +def test_cover_bam_list(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample.bam") + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=[bamfile_], + roi=bed_file, + conditions=['condition2'], + normalizer='tpm', + binsize=200, stepsize=200) + +def test_cover_bam(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample.bam") + gsfile_ = os.path.join(data_path, 'sample.chrom.sizes') + + content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'], + index_col='chr') + + gsize = content.to_dict()['length'] + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + storage=store, cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 200, 2, 1)) + + # the region is read relative to the forward strand + # read on the reverse strand + val = np.where(cover[4] == 1) + np.testing.assert_equal(cover[4].sum(), 1.) + np.testing.assert_equal(val[1][0], 179) # pos + np.testing.assert_equal(val[2][0], 1) # strand + + # two reads on the forward strand + val = np.where(cover[13] == 1) + np.testing.assert_equal(cover[13].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([162, 178])) # pos + np.testing.assert_equal(val[2], np.asarray([0, 0])) # strand + + # the region is read relative to the reverse strand + # for index 50 + # read on the reverse strand + val = np.where(cover[52] == 1) + np.testing.assert_equal(cover[52].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([9, 89])) # pos + np.testing.assert_equal(val[2], np.asarray([0, 0])) # strand + + # two reads on the forward strand + val = np.where(cover[96] == 1) + np.testing.assert_equal(cover[96].sum(), 1.) + np.testing.assert_equal(val[1], np.asarray([25])) # pos + np.testing.assert_equal(val[2], np.asarray([1])) # strand + + +def test_load_bam_resolution10(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample.bam") + gsfile_ = os.path.join(data_path, 'sample.chrom.sizes') + + content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'], + index_col='chr') + + gsize = content.to_dict()['length'] + + bed_file = os.path.join(data_path, "sample.bed") + + for store, store_genome in product(['ndarray', 'hdf5', 'sparse'], [True, False]): + # base pair binsize + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + resolution=10, + store_whole_genome=store_genome, + storage=store, cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 20, 2, 1)) + + # the region is read relative to the forward strand + # read on the reverse strand + val = np.where(cover[4] == 1) + np.testing.assert_equal(cover[4].sum(), 1.) + np.testing.assert_equal(val[1][0], 17) # pos + np.testing.assert_equal(val[2][0], 1) # strand + + # two reads on the forward strand + val = np.where(cover[13] == 1) + np.testing.assert_equal(cover[13].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([16, 17])) # pos + np.testing.assert_equal(val[2], np.asarray([0, 0])) # strand + + # the region is read relative to the reverse strand + # for index 50 + # read on the reverse strand + val = np.where(cover[52] == 1) + np.testing.assert_equal(cover[52].sum(), 2.) + np.testing.assert_equal(val[1], np.asarray([0, 8])) # pos + np.testing.assert_equal(val[2], np.asarray([0, 0])) # strand + + # two reads on the forward strand + val = np.where(cover[96] == 1) + np.testing.assert_equal(cover[96].sum(), 1.) + np.testing.assert_equal(val[1], np.asarray([2])) # pos + np.testing.assert_equal(val[2], np.asarray([1])) # strand + + +def test_load_bam_resolutionNone(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bamfile_ = os.path.join(data_path, "sample.bam") + gsfile_ = os.path.join(data_path, 'sample.chrom.sizes') + + content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'], + index_col='chr') + + gsize = content.to_dict()['length'] + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + cover1 = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + resolution=1, + storage=store, cache=True) + cover = Cover.create_from_bam( + "yeast_I_II_III.bam", + bamfiles=bamfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + resolution=None, + storage=store, cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 2, 1)) + + np.testing.assert_equal(cover1[:].sum(axis=1), cover[:].sum(axis=1)) + + +def test_load_cover_bigwig_default(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + bwfile_ = os.path.join(data_path, "sample.bw") + gsfile_ = os.path.join(data_path, 'sample.chrom.sizes') + + gsize = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'], + index_col='chr').to_dict()['length'] + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + print(store) + cover = Cover.create_from_bigwig( + "cov", + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + storage=store, + store_whole_genome=True, + cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 200, 1, 1)) + + # there is one read in the region + np.testing.assert_allclose(cover[4].sum(), 36.) + np.testing.assert_allclose(cover[52].sum(), 2*36.) + + cover = Cover.create_from_bigwig( + "cov", + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + genomesize=gsize, + store_whole_genome=False, cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 200, 1, 1)) + + # there is one read in the region + np.testing.assert_allclose(cover[4].sum(), 36.) + np.testing.assert_allclose(cover[52].sum(), 2*36.) + +def test_load_cover_bigwig_resolution1(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + bwfile_ = os.path.join(data_path, "sample.bw") + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + print(store) + cover = Cover.create_from_bigwig( + "cov", + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + resolution=1, + storage=store, cache=True) + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 200, 1, 1)) + + # there is one read in the region 4 + np.testing.assert_allclose(cover[4].sum(), 36) + np.testing.assert_equal(cover[4][0, :, 0, 0], + np.asarray( + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])) + # and two reads in region 52 + np.testing.assert_allclose(cover[52].sum(), 2*36) + np.testing.assert_equal(cover[52][0, :, 0, 0], + np.asarray( + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])) + + + +def test_load_cover_bigwig_resolutionNone(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + data_path = pkg_resources.resource_filename('janggu', 'resources/') + + bwfile_ = os.path.join(data_path, "sample.bw") + + bed_file = os.path.join(data_path, "sample.bed") + + for store in ['ndarray', 'hdf5', 'sparse']: + # base pair binsize + print(store) + cover1 = Cover.create_from_bigwig( + "cov", + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + resolution=1, + storage=store, cache=True) + + cover = Cover.create_from_bigwig( + "cov", + bigwigfiles=bwfile_, + roi=bed_file, + binsize=200, stepsize=200, + resolution=None, + storage=store, cache=True, + collapser='sum') + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + + np.testing.assert_equal(cover1[:].sum(axis=1), cover[:].sum(axis=1)) + + +def test_load_cover_bed_binary(tmpdir): + os.environ['JANGGU_OUTPUT'] = tmpdir.strpath + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/scored_sample.bed') + + for store in ['ndarray', 'hdf5', 'sparse']: + print('store', store) + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + mode='binary', cache=True) + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + storage=store, + resolution=50, + collapser='max', + mode='binary', cache=True) + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*1) + + cover = Cover.create_from_bed( + "cov50_firstdim", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + storage=store, + resolution=None, + collapser='max', + mode='binary', cache=True) + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + + cover = Cover.create_from_bed( + "cov50_firstdim", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + storage=store, + store_whole_genome=True, + resolution=200, + collapser='max', + mode='binary', cache=True) + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + + +def test_load_cover_bed_scored(): + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/scored_sample.bed') + + for store in ['ndarray', 'sparse']: + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + store_whole_genome=True, + mode='score') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 5) + np.testing.assert_equal(cover[50].sum(), 0) + np.testing.assert_equal(cover[54].sum(), 4) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + storage=store, + resolution=50, + mode='score') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*5) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + storage=store, + resolution=None, + binsize=200, stepsize=200, + collapser='max', + mode='score') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 5) + + +def test_load_cover_bed_categorical(): + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/scored_sample.bed') + + with pytest.raises(ValueError): + # Only one bed file allowed. + cover = Cover.create_from_bed( + "cov", + bedfiles=[score_file] * 2, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + mode='categorical') + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + mode='categorical') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]]) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=50, + storage=store, + mode='categorical') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*1) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + resolution=None, + binsize=200, stepsize=200, + storage=store, + collapser='max', + mode='categorical') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]]) + + +def test_load_cover_bed_score_category(): + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/scored_sample.bed') + + with pytest.raises(ValueError): + # Only one bed file allowed. + cover = Cover.create_from_bed( + "cov", + bedfiles=[score_file] * 2, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + mode='score_category') + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + mode='score_category') + + assert cover.conditions == ['1', '2', '4', '5'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]]) + + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + conditions=['1', '2', '4', '5'], + storage=store, + mode='score_category') + + assert cover.conditions == ['1', '2', '4', '5'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]]) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=50, + storage=store, + mode='score_category') + + assert cover.conditions == ['1', '2', '4', '5'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*1) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + resolution=None, + binsize=200, stepsize=200, + storage=store, + collapser='max', + mode='score_category') + + assert cover.conditions == ['1', '2', '4', '5'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 4)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]]) + + +def test_load_cover_bedgraph(): + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/sample.bedgraph') + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + mode='bedgraph') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), .5) + np.testing.assert_equal(cover[4], [[[[.5]]]]) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=50, + storage=store, + mode='bedgraph') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*.5) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + resolution=None, + binsize=200, stepsize=200, + storage=store, + collapser='max', + mode='bedgraph') + + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 1)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), .5) + np.testing.assert_equal(cover[4], [[[[.5]]]]) + + +def test_load_cover_bed_name_category(): + bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + score_file = pkg_resources.resource_filename('janggu', + 'resources/scored_sample.bed') + + with pytest.raises(ValueError): + # Only one bed file allowed. + cover = Cover.create_from_bed( + "cov", + bedfiles=[score_file] * 2, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + mode='name_category') + + for store in ['ndarray', 'sparse']: + print(store) + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + storage=store, + mode='name_category') + + assert cover.conditions == ['state1', 'state2'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 2)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[3], [[[[1., 0.]]]]) + np.testing.assert_equal(cover[4], [[[[0., 1.]]]]) + + cover = Cover.create_from_bed( + "cov", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=200, + conditions=['state1', 'state2'], + storage=store, + mode='name_category') + + assert cover.conditions == ['state1', 'state2'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 2)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[3], [[[[1., 0.]]]]) + np.testing.assert_equal(cover[4], [[[[0., 1.]]]]) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + binsize=200, stepsize=200, + resolution=50, + storage=store, + mode='name_category') + + assert cover.conditions == ['state1', 'state2'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 4, 1, 2)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 4*1) + + cover = Cover.create_from_bed( + "cov50", + bedfiles=score_file, + roi=bed_file, + resolution=None, + binsize=200, stepsize=200, + storage=store, + collapser='max', + mode='name_category') + + assert cover.conditions == ['state1', 'state2'] + np.testing.assert_equal(len(cover), 100) + np.testing.assert_equal(cover.shape, (100, 1, 1, 2)) + np.testing.assert_equal(cover[0].sum(), 0) + np.testing.assert_equal(cover[4].sum(), 1) + np.testing.assert_equal(cover[3], [[[[1., 0.]]]]) + np.testing.assert_equal(cover[4], [[[[0., 1.]]]]) + + +def test_filter_by_region(): + + roi_file = pkg_resources.resource_filename('janggu', + 'resources/bed_test.bed') + + roi = GenomicIndexer.create_from_file(regions=roi_file, binsize=2, stepsize=2) + np.testing.assert_equal(len(roi), 9) + + np.testing.assert_equal((roi[0].chrom, roi[0].start, roi[0].end), ('chr1', 0, 2)) + np.testing.assert_equal((roi[-1].chrom, roi[-1].start, roi[-1].end), ('chr1', 16, 18)) + + test1 = roi.filter_by_region(include='chr1', start=0, end=18) + + for i in range(len(test1)): + np.testing.assert_equal(test1[i], roi[i]) + + test2 = roi.filter_by_region(include='chr1', start=5, end=10) + np.testing.assert_equal(len(test2), 3) + np.testing.assert_equal((test2[0].chrom, test2[0].start, test2[0].end), ('chr1', 4, 6)) + np.testing.assert_equal((test2[1].chrom, test2[1].start, test2[1].end), ('chr1', 6, 8)) + np.testing.assert_equal((test2[2].chrom, test2[2].start, test2[2].end), ('chr1', 8, 10)) + + test3 = roi.filter_by_region(include='chr1', start=5, end=11) + np.testing.assert_equal(len(test3), 4) + np.testing.assert_equal((test3[0].chrom, test3[0].start, test3[0].end), ('chr1', 4, 6)) + np.testing.assert_equal((test3[1].chrom, test3[1].start, test3[1].end), ('chr1', 6, 8)) + np.testing.assert_equal((test3[2].chrom, test3[2].start, test3[2].end), ('chr1', 8, 10)) + np.testing.assert_equal((test3[3].chrom, test3[3].start, test3[3].end), ('chr1', 10, 12)) + + test4 = roi.filter_by_region(include='chr1', start=6, end=10) + np.testing.assert_equal(len(test4), 2) + np.testing.assert_equal((test4[0].chrom, test4[0].start, test4[0].end), ('chr1', 6, 8)) + np.testing.assert_equal((test4[1].chrom, test4[1].start, test4[1].end), ('chr1', 8, 10)) + + test5 = roi.filter_by_region(include='chr1', start=6, end=11) + np.testing.assert_equal(len(test5), 3) + np.testing.assert_equal((test5[0].chrom, test5[0].start, test5[0].end), ('chr1', 6, 8)) + np.testing.assert_equal((test5[1].chrom, test5[1].start, test5[1].end), ('chr1', 8, 10)) + np.testing.assert_equal((test5[2].chrom, test5[2].start, test5[2].end), ('chr1', 10, 12)) + + test6 = roi.filter_by_region(include='chr1', start=20, end=30) + np.testing.assert_equal(len(test6), 0) + + +def test_plotgenometracks_bigwigs(): + + roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + + bw_file = pkg_resources.resource_filename('janggu', 'resources/sample.bw') + + cover = Cover.create_from_bigwig('coverage2', + bigwigfiles=bw_file, + roi=roi, + binsize=200, + stepsize=200, + resolution=50) + + cover2 = Cover.create_from_bigwig('morecoverage', + bigwigfiles=[bw_file] * 4, + roi=roi, + binsize=200, + stepsize=200, + resolution=50) + + # line plots + a = plotGenomeTrack([cover,cover2],'chr1',16000,18000) + a = plotGenomeTrack(cover,'chr1',16000,18000) + + a = plotGenomeTrack(LineTrack(cover),'chr1',16000,18000) + + a = plotGenomeTrack([cover,cover2],'chr1',16000,18000, plottypes=['heatmap'] * 2) + with pytest.raises(AssertionError): + # differing number of plottypes and coverage objects raises an error + a = plotGenomeTrack(cover,'chr1',16000,18000, plottypes=['heatmap'] * 2) + with pytest.raises(ValueError): + # coverage not a sequence + a = plotGenomeTrack(cover,'chr1',16000,18000, plottypes=['seqplot']) + with pytest.raises(ValueError): + # coverage not a sequence + a = plotGenomeTrack(cover2,'chr1',16000,18000, plottypes=['seqplot']) + + +def test_plotgenometracks_bams(): + + roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + + bw_file = pkg_resources.resource_filename('janggu', 'resources/sample.bam') + + cover = Cover.create_from_bam('coverage', + bamfiles=bw_file, + roi=roi, + binsize=200, + stepsize=200, + resolution=50) + + # line plots + a = plotGenomeTrack(cover,'chr1',16000,18000) + + a = plotGenomeTrack([cover,cover],'chr1',16000,18000, plottypes=['heatmap'] * 2) + + a = plotGenomeTrack([HeatTrack(cover), HeatTrack(cover)],'chr1',16000,18000) + a = plotGenomeTrack([LineTrack(cover)],'chr1',16000,18000) + + +def test_plotgenometracks_seqplot(): + + roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed') + + refgenome = pkg_resources.resource_filename('janggu', + 'resources/sample_genome.fa') + + dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, + storage='ndarray', + roi=roi, order=1, + store_whole_genome=True) + + a = plotGenomeTrack(dna,'chr1',16000,18000, plottypes=['seqplot']) + + a = plotGenomeTrack(SeqTrack(dna), 'chr1', 16000, 18000) + +def test_padding_value_nan(): + variantsfile = pkg_resources.resource_filename('janggu', 'resources/pseudo_snps.vcf') + gindexer = GenomicIndexer.create_from_file(variantsfile, None, None) + array = np.zeros((len(gindexer), 3)) + + snpcov = Cover.create_from_array('snps', array, + gindexer, + store_whole_genome=True, + padding_value=np.nan) + + assert snpcov.shape == (6, 1, 1, 3) + + np.testing.assert_equal(snpcov['pseudo1', 650, 670][0,:,0,0], + np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, + 0., np.nan, 0., np.nan, 0., 0., 0., + np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])) + + snpcov = Cover.create_from_array('snps', array, + gindexer, + store_whole_genome=False, + padding_value=np.nan) + + assert snpcov.shape == (6, 1, 1, 3) + + np.testing.assert_equal(snpcov['pseudo1', 650, 670][0,:,0,0], + np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, + 0., np.nan, 0., np.nan, 0., 0., 0., + np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])) + + +def test_bedgraph(): + data_path = pkg_resources.resource_filename('janggu', 'resources/') + bed_file = os.path.join(data_path, "positive.bed") + bgfile_ = os.path.join(data_path, "positive.bedgraph") + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bgfile_, + roi=bed_file, + mode='bedgraph', + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bgfile_, + roi=bed_file, + mode='bedgraph', + store_whole_genome=False) + + assert len(cover1) == 25 + assert len(cover2) == len(cover1) + assert cover1.shape == (25, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + +def test_fulltilebigwig(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile.bed") + bwfile = os.path.join(data_path, "sample.bw") + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, + store_whole_genome=False) + + assert len(cover1) == 2 + assert len(cover2) == len(cover1) + assert cover1.shape == (2, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + +def test_fulltilebigwig2(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile2.bed") + bwfile = os.path.join(data_path, "sample.bw") + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, + store_whole_genome=False) + + assert len(cover1) == 3 + assert len(cover2) == len(cover1) + assert cover1.shape == (3, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bigwig( + 'test', + bigwigfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=True) + cover2 = Cover.create_from_bigwig( + 'test2', + bigwigfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + +def test_fulltilebam(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile.bed") + bwfile = os.path.join(data_path, "sample.bam") + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + stranded=False, + roi=roi, + store_whole_genome=False) + + assert len(cover1) == 2 + assert len(cover2) == len(cover1) + assert cover1.shape == (2, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, binsize=200, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + roi=roi, binsize=200, + stranded=False, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, binsize=200, + flank=150, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + roi=roi, binsize=200, + flank=150, + stranded=False, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + +def test_fulltilebam2(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile2.bed") + bwfile = os.path.join(data_path, "sample.bam") + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + roi=roi, + stranded=False, + store_whole_genome=False) + + assert len(cover1) == 3 + assert len(cover2) == len(cover1) + assert cover1.shape == (3, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, binsize=200, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + roi=roi, binsize=200, + stranded=False, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bam( + 'test', + bamfiles=bwfile, + roi=roi, binsize=200, + flank=150, + stranded=False, + store_whole_genome=True) + cover2 = Cover.create_from_bam( + 'test2', + bamfiles=bwfile, + roi=roi, binsize=200, + flank=150, + stranded=False, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + +def test_fulltilebed(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile.bed") + bwfile = os.path.join(data_path, "sample.bed") + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, + store_whole_genome=False) + + assert len(cover1) == 2 + assert len(cover2) == len(cover1) + assert cover1.shape == (2, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=False) + + assert len(cover1) == 300 + assert len(cover2) == len(cover1) + assert cover1.shape == (300, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + +def test_fulltilebed2(): + + import pkg_resources + import os + from janggu.data import Cover + + data_path = pkg_resources.resource_filename('janggu', 'resources/') + roi = os.path.join(data_path, "sample_fulltile2.bed") + bwfile = os.path.join(data_path, "sample.bed") + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, + store_whole_genome=False) + + assert len(cover1) == 3 + assert len(cover2) == len(cover1) + assert cover1.shape == (3, 30000, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, binsize=200, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 200, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) + + cover1 = Cover.create_from_bed( + 'test', + bedfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=True) + cover2 = Cover.create_from_bed( + 'test2', + bedfiles=bwfile, + roi=roi, binsize=200, + flank=150, + store_whole_genome=False) + + assert len(cover1) == 450 + assert len(cover2) == len(cover1) + assert cover1.shape == (450, 500, 1, 1) + assert cover1.shape == cover2.shape + np.testing.assert_equal(cover1[:], cover2[:]) +