Diff of /tests/test_gindexer.py [000000] .. [d7cf27]

Switch to unified view

a b/tests/test_gindexer.py
1
import os
2
3
import matplotlib
4
import numpy as np
5
import pkg_resources
6
import pytest
7
import pandas as pd
8
9
from janggu.data import GenomicIndexer
10
11
matplotlib.use('AGG')
12
13
def test_gindexer_short_interval():
14
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
15
16
17
    gi = GenomicIndexer.create_from_file(os.path.join(data_path,
18
                                                 'sample_equalsize.bed'),
19
                                    binsize=200, stepsize=200)
20
    assert len(gi) == 4
21
    gi = GenomicIndexer.create_from_file(os.path.join(data_path,
22
                                                 'sample_equalsize.bed'),
23
                                    binsize=180, stepsize=20)
24
    assert len(gi) == 8
25
    gi = GenomicIndexer.create_from_file(os.path.join(data_path,
26
                                                 'sample_equalsize.bed'),
27
                                    binsize=210, stepsize=20, zero_padding=False)
28
    assert len(gi) == 0
29
30
    gi = GenomicIndexer.create_from_file(os.path.join(data_path,
31
                                                 'sample_equalsize.bed'),
32
                                    binsize=210, stepsize=20, zero_padding=True)
33
    assert len(gi) == 4
34
35
36
def test_gindexer_short_interval_with_dataframe():
37
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
38
    df = pd.read_csv(os.path.join(data_path, 'sample_equalsize.bed'),
39
                     sep='\t', header=None, names=['chrom', 'start', 'end'])
40
41
    gi = GenomicIndexer.create_from_file(df,
42
                                         binsize=200, stepsize=200)
43
    assert len(gi) == 4
44
    gi = GenomicIndexer.create_from_file(df,
45
                                         binsize=180, stepsize=20)
46
    assert len(gi) == 8
47
    gi = GenomicIndexer.create_from_file(df,
48
                                         binsize=210, stepsize=20,
49
                                         zero_padding=False)
50
    assert len(gi) == 0
51
52
    gi = GenomicIndexer.create_from_file(df,
53
                                         binsize=210, stepsize=20,
54
                                         zero_padding=True)
55
    assert len(gi) == 4
56
57
58
def test_gindexer_errors():
59
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
60
61
    with pytest.raises(ValueError):
62
        GenomicIndexer.create_from_file(os.path.join(data_path,
63
                                                     'sample.bed'),
64
                                        binsize=0, stepsize=50)
65
66
    with pytest.raises(ValueError):
67
        GenomicIndexer.create_from_file(os.path.join(data_path,
68
                                                     'sample.bed'),
69
                                        binsize=10, stepsize=0)
70
    with pytest.raises(ValueError):
71
        # due to flank < 0
72
        GenomicIndexer.create_from_file(os.path.join(data_path, 'sample.bed'),
73
                                        binsize=200, stepsize=50, flank=-1)
74
    # due to unequal intervals
75
    gi=GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'),
76
                                    binsize=None, stepsize=None, flank=0)
77
    #print(len(gi))
78
    #for reg in gi:
79
    #    print(reg)
80
    GenomicIndexer.create_from_file(os.path.join(data_path, 'scores.bed'),
81
                                    binsize=200, stepsize=200, flank=0)
82
83
def test_gindexer_merged():
84
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
85
86
    gi = GenomicIndexer.create_from_file(
87
        os.path.join(data_path, 'sample.bed'), binsize=200, stepsize=200)
88
    np.testing.assert_equal(len(gi), 100)
89
    gi2 = gi.filter_by_region(include='chr1')
90
    gi3 = gi.filter_by_region(include='chr10')
91
    gi4 = gi.filter_by_region(exclude='chr2')
92
    gi5 = gi.filter_by_region(exclude='chr10')
93
94
95
    np.testing.assert_equal(len(gi2), 50)
96
97
    np.testing.assert_equal(len(gi3), 0)
98
    np.testing.assert_equal(len(gi4), 50)
99
    np.testing.assert_equal(len(gi5), 100)
100
101
102
def test_gindexer_merged_variable_length_ranges():
103
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
104
105
    # with fixed size
106
    gi = GenomicIndexer.create_from_file(
107
        os.path.join(data_path, 'sample.bed'), binsize=3000, stepsize=3000,
108
        zero_padding=False)
109
    np.testing.assert_equal(len(gi), 6)
110
111
    iv = gi[0]
112
    np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand),
113
                            ('chr1', 15000, 18000, '+'))
114
    iv = gi[-1]
115
    np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand),
116
                            ('chr2', 21000, 24000, '-'))
117
118
    # with variable size regions
119
    gi = GenomicIndexer.create_from_file(
120
        os.path.join(data_path, 'sample.bed'), binsize=3000,
121
        stepsize=3000, zero_padding=True)
122
    np.testing.assert_equal(len(gi), 8)
123
124
    iv = gi[0]
125
    np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand),
126
                            ('chr1', 15000, 18000, '+'))
127
    iv = gi[-1]
128
    np.testing.assert_equal((iv.chrom, iv.start, iv.end, iv.strand),
129
                            ('chr2', 24000, 25000, '-'))