Switch to unified view

a b/tests/test_genomicarray.py
1
import os
2
3
import numpy as np
4
import pytest
5
from pybedtools import Interval
6
7
from janggu.data import GenomicIndexer
8
from janggu.data import create_genomic_array
9
from janggu.data.genomicarray import get_collapser
10
from janggu.data.genomicarray import get_normalizer
11
12
13
def test_get_collapser():
14
    with pytest.raises(Exception):
15
        # this collapser is not available
16
        get_collapser('blabla')
17
18
19
def test_get_normalizer():
20
    with pytest.raises(Exception):
21
        # this normalizer is not available
22
        get_normalizer('blabla')
23
24
25
def test_invalid_storage():
26
    with pytest.raises(Exception):
27
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=True,
28
                                  typecode='int8',
29
                                  storage='storgae', resolution=1, cache=False)
30
31
def test_resolution_negative():
32
    with pytest.raises(Exception):
33
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=True,
34
                                  typecode='int8',
35
                                  storage='ndarray', cache=False,
36
                                  resolution=-1)
37
38
39
def test_hdf5_no_cache():
40
41
    with pytest.raises(Exception):
42
        # cache must be True
43
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}),
44
                                  stranded=True, typecode='int8',
45
                                  storage='hdf5', cache=None)
46
47
48
def test_invalid_access():
49
50
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=False,
51
                              typecode='int8',
52
                              storage='ndarray')
53
54
    with pytest.raises(Exception):
55
        # access only via genomic interval
56
        ga[1]
57
58
    with pytest.raises(Exception):
59
        # access only via genomic interval and condition
60
        ga[1] = 1
61
62
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=False,
63
                              typecode='int8',
64
                              storage='sparse')
65
66
    with pytest.raises(Exception):
67
        # access only via genomic interval
68
        ga[1]
69
70
    with pytest.raises(Exception):
71
        # access only via genomic interval and condition
72
        ga[1] = 1
73
74
75
def test_bwga_instance_unstranded_taged(tmpdir):
76
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
77
    iv = Interval('chr10', 100, 120, strand='.')
78
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=False, typecode='int8',
79
                              storage='ndarray', datatags='test_bwga_instance_unstranded')
80
81
    with pytest.raises(Exception):
82
        # access only via genomic interval
83
        ga[1]
84
85
    with pytest.raises(Exception):
86
        # access only via genomic interval and condition
87
        ga[1] = 1
88
89
    np.testing.assert_equal(ga[iv].shape, (20, 1, 1))
90
    np.testing.assert_equal(ga[iv], np.zeros((20, 1, 1)))
91
92
    ga[iv, 0] = np.ones((20,1))
93
    np.testing.assert_equal(ga[iv], np.ones((20, 1, 1)))
94
    np.testing.assert_equal(ga[iv].sum(), 20)
95
    iv = Interval('chr10', 0, 300, strand='.')
96
    np.testing.assert_equal(ga[iv].sum(), 20)
97
98
99
def test_bwga_instance_unstranded(tmpdir):
100
    iv = Interval('chr10', 100, 120, strand='.')
101
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=False, typecode='int8',
102
                              storage='ndarray', cache=False)
103
    np.testing.assert_equal(ga[iv].shape, (20, 1, 1))
104
    np.testing.assert_equal(ga[iv], np.zeros((20, 1, 1)))
105
106
    ga[iv, 0] = np.ones((20,1))
107
    np.testing.assert_equal(ga[iv], np.ones((20, 1, 1)))
108
    np.testing.assert_equal(ga[iv].sum(), 20)
109
    iv = Interval('chr10', 0, 300, strand='.')
110
    np.testing.assert_equal(ga[iv].sum(), 20)
111
112
113
def test_bwga_instance_stranded(tmpdir):
114
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
115
    iv = Interval('chr10', 100, 120, strand='+')
116
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=True, typecode='int8',
117
                              storage='ndarray')
118
    np.testing.assert_equal(ga[iv].shape, (20, 2, 1))
119
    np.testing.assert_equal(ga[iv], np.zeros((20, 2, 1)))
120
121
    x = np.zeros((20, 2, 1))
122
    x[:, :1, :] = 1
123
    ga[iv, 0] = x[:,:,0]
124
    np.testing.assert_equal(ga[iv], x)
125
    np.testing.assert_equal(ga[iv].sum(), 20)
126
    iv = Interval('chr10', 0, 300)
127
    np.testing.assert_equal(ga[iv].sum(), 20)
128
129
130
def test_bwga_instance_stranded_notcached(tmpdir):
131
132
    iv = Interval('chr10', 100, 120, strand='+')
133
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr10': 300}), stranded=True, typecode='int8',
134
                              storage='ndarray', cache=False)
135
    np.testing.assert_equal(ga[iv].shape, (20, 2, 1))
136
    np.testing.assert_equal(ga[iv], np.zeros((20, 2, 1)))
137
138
    x = np.zeros((20, 2, 1))
139
    x[:, :1, :] = 1
140
    ga[iv, 0] = x[:,:,0]
141
    np.testing.assert_equal(ga[iv], x)
142
    np.testing.assert_equal(ga[iv].sum(), 20)
143
    iv = Interval('chr10', 0, 300)
144
    np.testing.assert_equal(ga[iv].sum(), 20)
145
146
147
def test_zscore_normalization(tmpdir):
148
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
149
150
    def loading(garray):
151
        garray[Interval('chr1', 0, 150), 0] = np.repeat(1, 150).reshape(-1,1)
152
        garray[Interval('chr2', 0, 300), 0] = np.repeat(-1, 300).reshape(-1,1)
153
        return garray
154
155
    for store in ['ndarray', 'hdf5']:
156
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
157
                                  stranded=False, typecode='float32',
158
                                  storage=store, cache=True, loader=loading,
159
                                  normalizer=['zscore'])
160
        np.testing.assert_allclose(ga.weighted_mean(), np.asarray([0.0]),
161
                                   rtol=1e-5, atol=1e-5)
162
        np.testing.assert_allclose(ga.weighted_sd(), np.asarray([1.]),
163
                                   rtol=1e-5, atol=1e-5)
164
        np.testing.assert_allclose(ga[Interval('chr1', 100, 101)],
165
                                   np.asarray([[[1.412641340027806]]]),
166
                                   rtol=1e-5, atol=1e-5)
167
        np.testing.assert_allclose(ga[Interval('chr2', 100, 101)],
168
                                   np.asarray([[[-0.706320670013903]]]),
169
                                   rtol=1e-5, atol=1e-5)
170
171
172
def test_logzscore_normalization(tmpdir):
173
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
174
175
    def loading(garray):
176
        garray[Interval('chr1', 0, 150), 0] = np.repeat(10, 150).reshape(-1, 1)
177
        garray[Interval('chr2', 0, 300), 0] = np.repeat(100, 300).reshape(-1,1)
178
        return garray
179
180
    from janggu.data.genomicarray import LogTransform, ZScore
181
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
182
                          stranded=False, typecode='float32',
183
                          storage='ndarray', cache=None, loader=loading)
184
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
185
                          stranded=False, typecode='float32',
186
                          storage='ndarray', cache=None, loader=loading,
187
                          normalizer=[LogTransform()])
188
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
189
                          stranded=False, typecode='float32',
190
                          storage='ndarray', cache=None, loader=loading,
191
                          normalizer=[ZScore()])
192
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
193
                          stranded=False, typecode='float32',
194
                          storage='ndarray', cache=None, loader=loading,
195
                          normalizer=[LogTransform(), ZScore()])
196
    ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
197
                          stranded=False, typecode='float32',
198
                          storage='ndarray', cache=None, loader=loading,
199
                          normalizer=['zscorelog'])
200
    for store in ['ndarray', 'hdf5']:
201
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
202
                                  stranded=False, typecode='float32',
203
                                  storage=store, cache="cache_file", loader=loading,
204
                                  normalizer=['zscorelog'])
205
        np.testing.assert_allclose(ga.weighted_mean(), np.asarray([0.0]),
206
                                   rtol=1e-5, atol=1e-5)
207
        np.testing.assert_allclose(ga.weighted_sd(), np.asarray([1.]),
208
                                   rtol=1e-5, atol=1e-5)
209
        np.testing.assert_allclose(ga[Interval('chr1', 100, 101)],
210
                                   np.asarray([[[-1.412641340027806]]]),
211
                                   rtol=1e-5, atol=1e-5)
212
        np.testing.assert_allclose(ga[Interval('chr2', 100, 101)],
213
                                   np.asarray([[[0.706320670013903]]]),
214
                                   rtol=1e-5, atol=1e-5)
215
216
217
def test_perctrim(tmpdir):
218
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
219
220
    def loading(garray):
221
        garray[Interval('chr1', 0, 150), 0] = np.random.normal(loc=10, size=150).reshape(-1, 1)
222
        garray[Interval('chr2', 0, 300), 0] = np.random.normal(loc=100, size=300).reshape(-1, 1)
223
        return garray
224
225
    for store in ['ndarray', 'hdf5']:
226
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}),
227
                              stranded=False, typecode='float32',
228
                              storage=store, cache="cache_file", loader=loading,
229
                              normalizer=['binsizenorm', 'perctrim'])
230
231
232
def test_tmp_normalization(tmpdir):
233
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
234
235
    def loading(garray):
236
        garray[Interval('chr1', 0, 150), 0] = np.repeat(10, 150).reshape(-1, 1)
237
        garray[Interval('chr2', 0, 300), 0] = np.repeat(1, 300).reshape(-1, 1)
238
        return garray
239
240
    for store in ['ndarray', 'hdf5']:
241
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}), stranded=False, typecode='float32',
242
                                  storage=store, cache="cache_file", resolution=50, loader=loading,
243
                                  collapser='sum',
244
                                  normalizer=['tpm'])
245
        np.testing.assert_allclose(ga[Interval('chr1', 100, 101)], np.asarray([[[10 * 1000/50 * 1e6/(720.)]]]))
246
        np.testing.assert_allclose(ga[Interval('chr2', 100, 101)], np.asarray([[[1 * 1000/50 * 1e6/(720.)]]]))
247
248
249
def test_check_resolution_collapse_compatibility(tmpdir):
250
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
251
252
    def loading(garray):
253
        garray[Interval('chr1', 0, 150), 0] = np.repeat(10, 150).reshape(-1, 1)
254
        garray[Interval('chr2', 0, 300), 0] = np.repeat(1, 300).reshape(-1, 1)
255
        return garray
256
257
    with pytest.raises(Exception):
258
        # Error because resolution=50 but no collapser defined
259
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}), stranded=False, typecode='float32',
260
                                  storage="ndarray", cache=None, resolution=50, loader=loading,
261
                                  collapser=None,
262
                                  normalizer=['tpm'])
263
264
    with pytest.raises(Exception):
265
        # Error because resolution=None but no collapser defined
266
        ga = create_genomic_array(GenomicIndexer.create_from_genomesize({'chr1': 150, 'chr2': 300}), stranded=False, typecode='float32',
267
                                  storage="ndarray", cache=None, resolution=None, loader=loading,
268
                                  collapser=None,
269
                                  normalizer=['tpm'])
270
271
    ga = create_genomic_array(GenomicIndexer.create_from_file([Interval('chr1', 0, 150), Interval('chr2', 0, 150), Interval('chr2', 150, 300)], binsize=150, stepsize=None, ),
272
                              stranded=False, typecode='float32',
273
                              storage="ndarray", cache=None, resolution=1, loader=loading)
274
    ga = create_genomic_array(GenomicIndexer.create_from_file([Interval('chr1', 0, 150), Interval('chr2', 0, 300)], binsize=None, stepsize=None, collapse=True),
275
                              stranded=False, typecode='float32',
276
                              storage="ndarray", cache='test', resolution=None, loader=loading,
277
                              store_whole_genome=None,
278
                              collapser='sum')
279
    ga = create_genomic_array(GenomicIndexer.create_from_file([Interval('chr1', 0, 150), Interval('chr2', 0, 300)], binsize=None, stepsize=None, collapse=True),
280
                              stranded=False, typecode='float32',
281
                              storage="ndarray", cache=None, resolution=None, loader=loading,
282
                              collapser='sum',
283
                              normalizer=['tpm'])