Diff of /tests/test_data.py [000000] .. [d7cf27]

Switch to unified view

a b/tests/test_data.py
1
import os
2
3
import matplotlib
4
import pkg_resources
5
import pytest
6
7
from janggu.data import Bioseq
8
from janggu.data import split_train_test
9
from janggu.data import subset
10
from janggu.data import view
11
from janggu.data.data import _data_props
12
13
matplotlib.use('AGG')
14
15
16
def test_dna_props_extraction(tmpdir):
17
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
18
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
19
    bed_file = os.path.join(data_path, 'sample.bed')
20
21
    refgenome = os.path.join(data_path, 'sample_genome.fa')
22
23
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
24
                                    storage='ndarray',
25
                                    roi=bed_file,
26
                                    binsize=200, stepsize=200,
27
                                    order=1)
28
29
    props = _data_props(dna)
30
    assert 'dna' in props
31
    assert props['dna']['shape'] == (200, 1, 4)
32
33
    with pytest.raises(Exception):
34
        _data_props((0,))
35
36
37
def test_split_train_test():
38
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
39
    bed_file = os.path.join(data_path, 'sample.bed')
40
41
    refgenome = os.path.join(data_path, 'sample_genome.fa')
42
43
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
44
                                       storage='ndarray',
45
                                       roi=bed_file,
46
                                       binsize=200, stepsize=200,
47
                                       order=1, store_whole_genome=True)
48
49
    traindna, testdna = split_train_test(dna, holdout_chroms='chr2')
50
51
    assert len(traindna) == 50
52
    assert len(testdna) == 50
53
    assert len(dna) == len(traindna) + len(testdna)
54
55
    traindna, testdna = split_train_test([dna, dna], holdout_chroms='chr2')
56
57
    assert len(traindna[0]) == 50
58
    assert len(testdna[0]) == 50
59
    assert len(dna) == len(traindna[0]) + len(testdna[0])
60
61
62
def test_subset_include_chrname_test():
63
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
64
    bed_file = os.path.join(data_path, 'sample.bed')
65
66
    refgenome = os.path.join(data_path, 'sample_genome.fa')
67
68
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
69
                                       storage='ndarray',
70
                                       roi=bed_file,
71
                                       binsize=200, stepsize=200,
72
                                       order=1, store_whole_genome=True)
73
74
    subdna = subset(dna, include_regions='chr2')
75
76
    assert len(subdna) == 50
77
78
79
def test_subset_exclude_chrname_test():
80
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
81
    bed_file = os.path.join(data_path, 'sample.bed')
82
83
    refgenome = os.path.join(data_path, 'sample_genome.fa')
84
85
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
86
                                       storage='ndarray',
87
                                       roi=bed_file,
88
                                       binsize=200, stepsize=200,
89
                                       order=1, store_whole_genome=True)
90
91
    subdna = subset(dna, exclude_regions='chr2')
92
93
    assert len(subdna) == 50
94
95
96
def test_view_bed_test():
97
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
98
    bed_file = os.path.join(data_path, 'sample.bed')
99
    bedsub_file = os.path.join(data_path, 'scored_sample.bed')
100
101
    refgenome = os.path.join(data_path, 'sample_genome.fa')
102
103
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
104
                                       storage='ndarray',
105
                                       roi=bed_file,
106
                                       binsize=200, stepsize=200,
107
                                       order=1, store_whole_genome=True)
108
109
    subdna = view(dna, use_regions=bedsub_file)
110
111
    assert len(subdna) == 4