a b/tests/test_evaluation.py
1
import json
2
import os
3
4
import numpy
5
import pandas
6
import pkg_resources
7
import pyBigWig
8
import pytest
9
from keras import Input
10
from keras import Model
11
from keras.layers import Dense
12
from keras.layers import Flatten
13
from pybedtools import BedTool
14
15
from janggu import Janggu
16
from janggu import inputlayer
17
from janggu import outputconv
18
from janggu import outputdense
19
from janggu.data import Array
20
from janggu.data import GenomicIndexer
21
from janggu.evaluation import Scorer
22
from janggu.evaluation import _dimension_match
23
from janggu.utils import ExportBed
24
from janggu.utils import ExportBigwig
25
from janggu.utils import ExportClustermap
26
from janggu.utils import ExportScorePlot
27
from janggu.utils import ExportTsne
28
from janggu.utils import ExportTsv
29
30
31
def test_input_dims():
32
    data = Array('testa', numpy.zeros((10, 10, 1)))
33
    xin = Input((10, 1), name='testy')
34
    out = Dense(1)(xin)
35
    m = Model(xin, out)
36
37
    # False due to mismatch of names
38
    assert not _dimension_match(m, data, 'input_layers')
39
40
    xin = Input((20, 10, 1), name='testa')
41
    out = Dense(1)(xin)
42
    m = Model(xin, out)
43
44
    # False due to mismatch of dims
45
    assert not _dimension_match(m, data, 'input_layers')
46
    # more input datasets supplied than inputs to models
47
    assert not _dimension_match(m, [data, data], 'input_layers')
48
49
    xin = Input((10, 1), name='testa')
50
    out = Dense(1)(xin)
51
    m = Model(xin, out)
52
53
    # False due to mismatch of dims
54
    assert _dimension_match(m, data, 'input_layers')
55
56
57
def test_output_dims():
58
    data = Array('testa', numpy.zeros((10, 10, 1)))
59
    label = Array('testy', numpy.zeros((10, 1)))
60
    xin = Input(data.shape, name='asdf')
61
    out = Flatten()(xin)
62
    out = Dense(1)(out)
63
    m = Model(xin, out)
64
65
    # False due to mismatch of names
66
    assert not _dimension_match(m, label, 'output_layers')
67
68
    xin = Input(data.shape, name='testa')
69
    out = Flatten()(xin)
70
    out = Dense(2, name='testy')(out)
71
    m = Model(xin, out)
72
73
    # False due to mismatch of dims
74
    assert not _dimension_match(m, label, 'output_layers')
75
76
    xin = Input(data.shape, name='testa')
77
    out = Flatten()(xin)
78
    out = Dense(1, name='testy')(out)
79
    m = Model(xin, out)
80
81
    # False due to mismatch of dims
82
    assert _dimension_match(m, label, 'output_layers')
83
84
    assert _dimension_match(m, None, 'output_layers')
85
86
87
def get_janggu(inputs, outputs):
88
    @inputlayer
89
    @outputdense('sigmoid')
90
    def _model(inputs, inp, oup, params):
91
        return inputs, inputs[0]
92
    bwm = Janggu.create(_model,
93
                        inputs=inputs,
94
                        outputs=outputs,
95
                        name='nptest')
96
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
97
    storage = bwm._storage_path(bwm.name, outputdir=bwm.outputdir)
98
    assert not os.path.exists(storage)
99
    return bwm
100
101
102
def get_janggu_conv(inputs, outputs):
103
    @inputlayer
104
    @outputconv('sigmoid')
105
    def _model(inputs, inp, oup, params):
106
        return inputs, inputs[0]
107
108
    bwm = Janggu.create(_model,
109
                        inputs=inputs,
110
                        outputs=outputs,
111
                        name='nptest')
112
113
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
114
115
    storage = bwm._storage_path(bwm.name, outputdir=bwm.outputdir)
116
    assert not os.path.exists(storage)
117
    return bwm
118
119
120
def test_output_score_by_name(tmpdir):
121
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
122
123
    inputs = Array("x", numpy.random.random((100, 10)))
124
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
125
                    conditions=['random'])
126
127
    bwm = get_janggu(inputs, outputs)
128
129
    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, immediate_export=False)
130
131
    bwm.evaluate(inputs, outputs, callbacks=['auc', 'roc', 'prc',
132
                                             'auprc', 'auroc',
133
                                             'cor', 'mae', 'mse',
134
                                             'var_explained', dummy_eval])
135
136
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auc.tsv"))
137
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "prc.png"))
138
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "roc.png"))
139
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "cor.tsv"))
140
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mae.tsv"))
141
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mse.tsv"))
142
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "var_explained.tsv"))
143
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auprc.tsv"))
144
    assert not os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.json"))
145
146
    dummy_eval.export(os.path.join(tmpdir.strpath, dummy_eval.subdir), bwm.name)
147
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.json"))
148
149
    with pytest.raises(ValueError):
150
        bwm.evaluate(inputs, outputs, callbacks=['adsf'])
151
152
153
def test_output_json_score(tmpdir):
154
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
155
156
    inputs = Array("x", numpy.random.random((100, 10)))
157
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
158
                    conditions=['random'])
159
160
    bwm = get_janggu(inputs, outputs)
161
162
    # check exception if no scoring function is provided
163
    dummy_eval = Scorer('score')
164
165
    with pytest.raises(ValueError):
166
        bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
167
168
    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15)
169
170
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
171
172
    # check correctness of json
173
    with open(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
174
                           "score.json"), 'r') as f:
175
        content = json.load(f)
176
        # now nptest was evaluated
177
        assert 'random' in content
178
179
180
def test_output_tsv_score(tmpdir):
181
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
182
    inputs = Array("x", numpy.random.random((100, 10)))
183
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
184
                    conditions=['random'])
185
186
    bwm = get_janggu(inputs, outputs)
187
188
    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv())
189
190
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
191
192
    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
193
                                        "score.tsv"),
194
                           sep='\t', header=[0]).iloc[0, 0] == 0.15
195
196
197
def test_output_export_score_plot(tmpdir):
198
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
199
    inputs = Array("x", numpy.random.random((100, 10)))
200
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
201
                    conditions=['random'])
202
203
    bwm = get_janggu(inputs, outputs)
204
205
    dummy_eval = Scorer('score',
206
                        lambda y_true, y_pred:
207
                        ([0., 0.5, 0.5, 1.],
208
                         [0.5, 0.5, 1., 1.],
209
                         [0.8, 0.4, 0.35, 0.1]),
210
                        exporter=ExportScorePlot())
211
212
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
213
214
    dummy_eval = Scorer('score',
215
                        lambda y_true, y_pred:
216
                        ([0., 0.5, 0.5, 1.],
217
                         [0.5, 0.5, 1., 1.],
218
                         [0.8, 0.4, 0.35, 0.1]),
219
                        exporter=ExportScorePlot(figsize=(10,12),
220
                                                 xlabel='FPR',
221
                                                 ylabel='TPR',
222
                                                 fform='eps'))
223
224
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
225
226
    # check if plot was produced
227
    assert os.path.exists(os.path.join(tmpdir.strpath,
228
                                       "evaluation", bwm.name, "score.png"))
229
    assert os.path.exists(os.path.join(tmpdir.strpath,
230
                                       "evaluation", bwm.name, "score.eps"))
231
232
233
def test_output_export_clustermap(tmpdir):
234
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
235
    inputs = Array("x", numpy.random.random((100, 10)))
236
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
237
                    conditions=['random'])
238
239
    @inputlayer
240
    @outputdense('sigmoid')
241
    def _model(inputs, inp, oup, params):
242
        with inputs.use('x') as layer:
243
            outputs = Dense(3, name='hidden')(layer)
244
        return inputs, outputs
245
246
    bwm = Janggu.create(_model,
247
                        inputs=inputs,
248
                        outputs=outputs,
249
                        name='nptest')
250
251
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
252
253
    dummy_eval = Scorer('cluster', exporter=ExportClustermap())
254
255
    bwm.predict(inputs, layername='hidden',
256
                callbacks=[dummy_eval])
257
258
    dummy_eval = Scorer('cluster', exporter=ExportClustermap(fform='eps',
259
                                                             annot={'annot':[1]*50 + [0]*50}))
260
    bwm.predict(inputs, layername='hidden',
261
                callbacks=[dummy_eval])
262
263
    # check if plot was produced
264
    assert os.path.exists(os.path.join(tmpdir.strpath,
265
                                       "evaluation", bwm.name, 'hidden',
266
                                       "cluster.png"))
267
    assert os.path.exists(os.path.join(tmpdir.strpath,
268
                                       "evaluation", bwm.name, 'hidden',
269
                                       "cluster.eps"))
270
271
272
@pytest.mark.filterwarnings("ignore:the matrix")
273
def test_output_export_tsne(tmpdir):
274
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
275
    inputs = Array("x", numpy.random.random((100, 10)))
276
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
277
                    conditions=['random'])
278
279
    @inputlayer
280
    @outputdense('sigmoid')
281
    def _model(inputs, inp, oup, params):
282
        with inputs.use('x') as layer:
283
            outputs = Dense(3, name='hidden')(layer)
284
        return inputs, outputs
285
286
    bwm = Janggu.create(_model,
287
                        inputs=inputs,
288
                        outputs=outputs,
289
                        name='nptest')
290
291
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
292
293
    dummy_eval = Scorer('tsne', exporter=ExportTsne())
294
295
    bwm.predict(inputs, layername='hidden',
296
                callbacks=[dummy_eval])
297
298
    dummy_eval = Scorer('tsne', exporter=ExportTsne(fform='eps',
299
                                                    annot={'annot':[1]*50 + [0]*50},
300
                                                    figsize=(10, 10)))
301
    bwm.predict(inputs, layername='hidden',
302
                callbacks=[dummy_eval])
303
    # check if plot was produced
304
    assert os.path.exists(os.path.join(tmpdir.strpath,
305
                                       "evaluation", bwm.name, 'hidden',
306
                                       "tsne.png"))
307
    assert os.path.exists(os.path.join(tmpdir.strpath,
308
                                       "evaluation", bwm.name, 'hidden',
309
                                       "tsne.eps"))
310
311
312
def test_output_bed_loss_resolution_equal_stepsize(tmpdir):
313
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
314
    # generate loss
315
    #
316
    # resolution < stepsize
317
    inputs = Array("x", numpy.random.random((7, 1, 1, 10)))
318
    outputs = Array('y', numpy.random.random((7, 1, 1, 4)),
319
                    conditions=['c1', 'c2', 'c3', 'c4'])
320
321
    bwm = get_janggu_conv(inputs, outputs)
322
    data_path = pkg_resources.resource_filename('janggu',
323
                                                'resources/10regions.bed')
324
325
    gi = GenomicIndexer.create_from_file(data_path,
326
                                         binsize=200,
327
                                         stepsize=200)
328
329
    dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t),
330
                        exporter=ExportBed(gindexer=gi, resolution=200))
331
332
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
333
334
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
335
                         'loss.{}.bed')
336
337
    for cond in ['c1', 'c2', 'c3', 'c4']:
338
        assert os.path.exists(file_.format(cond))
339
340
    bed = BedTool(file_.format('c1'))
341
342
    nreg = 0
343
    for reg in bed:
344
        numpy.testing.assert_equal(float(reg.score), 0.1)
345
        nreg += 1
346
#        numpy.testing.assert_equal(breg.score, value)
347
348
    assert nreg == 7, 'There should be 7 regions in the bed file.'
349
350
351
def test_output_bed_loss_resolution_unequal_stepsize(tmpdir):
352
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
353
    # generate loss
354
    #
355
    # resolution < stepsize
356
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
357
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
358
                    conditions=['c1', 'c2', 'c3', 'c4'])
359
360
    bwm = get_janggu(inputs, outputs)
361
    data_path = pkg_resources.resource_filename('janggu',
362
                                                'resources/10regions.bed')
363
364
    gi = GenomicIndexer.create_from_file(data_path,
365
                                         binsize=200,
366
                                         stepsize=200)
367
368
    # dummy_eval = Scorer('loss', lambda t, p: -t * numpy.log(p),
369
    #                    exporter=export_bed, export_args={'gindexer': gi})
370
    dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t),
371
                        exporter=ExportBed(gindexer=gi, resolution=50))
372
373
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
374
375
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
376
                         'loss.{}.bed')
377
378
    for cond in ['c1', 'c2', 'c3', 'c4']:
379
        assert os.path.exists(file_.format(cond))
380
381
    bed = BedTool(file_.format('c1'))
382
383
    nreg = 0
384
    for reg in bed:
385
        numpy.testing.assert_equal(float(reg.score), 0.1)
386
        nreg += 1
387
388
    assert nreg == 28, 'There should be 28 regions in the bed file.'
389
390
391
def test_output_bed_predict_resolution_equal_stepsize(tmpdir):
392
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
393
    # generate loss
394
    #
395
    # resolution < stepsize
396
    inputs = Array("x", numpy.random.random((7, 1, 1, 10)))
397
    outputs = Array('y', numpy.random.random((7, 1, 1, 4)),
398
                    conditions=['c1', 'c2', 'c3', 'c4'])
399
400
    bwm = get_janggu_conv(inputs, outputs)
401
    data_path = pkg_resources.resource_filename('janggu',
402
                                                'resources/10regions.bed')
403
404
    gi = GenomicIndexer.create_from_file(data_path,
405
                                         binsize=200,
406
                                         stepsize=200)
407
408
    dummy_eval = Scorer('pred', lambda p: [0.1] * len(p),
409
                        exporter=ExportBed(gindexer=gi, resolution=200),
410
                        conditions=['c1', 'c2', 'c3', 'c4'])
411
412
    bwm.predict(inputs, callbacks=[dummy_eval])
413
414
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
415
                         'pred.{}.bed')
416
417
    for cond in ['c1', 'c2', 'c3', 'c4']:
418
        assert os.path.exists(file_.format(cond))
419
420
    bed = BedTool(file_.format('c1'))
421
422
    nreg = 0
423
    for reg in bed:
424
        numpy.testing.assert_equal(float(reg.score), 0.1)
425
        nreg += 1
426
427
    assert nreg == 7, 'There should be 7 regions in the bed file.'
428
429
430
def test_output_bed_predict_denseout(tmpdir):
431
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
432
    # generate loss
433
    #
434
    # resolution < stepsize
435
    inputs = Array("x", numpy.random.random((7, 10)))
436
    outputs = Array('y', numpy.random.random((7, 4)),
437
                    conditions=['c1', 'c2', 'c3', 'c4'])
438
439
    bwm = get_janggu(inputs, outputs)
440
    data_path = pkg_resources.resource_filename('janggu',
441
                                                'resources/10regions.bed')
442
443
    gi = GenomicIndexer.create_from_file(data_path,
444
                                         binsize=200,
445
                                         stepsize=200)
446
447
    dummy_eval = Scorer('pred', lambda p: [0.1] * len(p),
448
                        exporter=ExportBed(gindexer=gi, resolution=200),
449
                        conditions=['c1', 'c2', 'c3', 'c4'])
450
451
    bwm.predict(inputs, callbacks=[dummy_eval])
452
453
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
454
                         'pred.{}.bed')
455
456
    for cond in ['c1', 'c2', 'c3', 'c4']:
457
        assert os.path.exists(file_.format(cond))
458
459
    bed = BedTool(file_.format('c1'))
460
461
    nreg = 0
462
    for reg in bed:
463
        numpy.testing.assert_equal(float(reg.score), 0.1)
464
        nreg += 1
465
466
    assert nreg == 7, 'There should be 7 regions in the bed file.'
467
468
469
def test_output_bed_predict_resolution_unequal_stepsize(tmpdir):
470
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
471
    # generate loss
472
    #
473
    # resolution < stepsize
474
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
475
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
476
                    conditions=['c1', 'c2', 'c3', 'c4'])
477
478
    bwm = get_janggu(inputs, outputs)
479
    data_path = pkg_resources.resource_filename('janggu',
480
                                                'resources/10regions.bed')
481
482
    gi = GenomicIndexer.create_from_file(data_path,
483
                                         binsize=200,
484
                                         stepsize=200)
485
486
    dummy_eval = Scorer('pred', lambda p: [0.1] * len(p),
487
                        exporter=ExportBed(gindexer=gi, resolution=50),
488
                        conditions=['c1', 'c2', 'c3', 'c4'])
489
490
    bwm.predict(inputs, callbacks=[dummy_eval])
491
492
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
493
                         'pred.{}.bed')
494
495
    for cond in ['c1', 'c2', 'c3', 'c4']:
496
        assert os.path.exists(file_.format(cond))
497
498
    bed = BedTool(file_.format('c1'))
499
500
    nreg = 0
501
    for reg in bed:
502
        numpy.testing.assert_equal(float(reg.score), 0.1)
503
        nreg += 1
504
505
    assert nreg == 28, 'There should be 28 regions in the bed file.'
506
507
508
def test_output_bigwig_predict_denseout(tmpdir):
509
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
510
    # generate loss
511
    #
512
    # resolution < stepsize
513
    inputs = Array("x", numpy.random.random((7, 10)))
514
    outputs = Array('y', numpy.random.random((7, 4)),
515
                    conditions=['c1', 'c2', 'c3', 'c4'])
516
517
    bwm = get_janggu(inputs, outputs)
518
    data_path = pkg_resources.resource_filename('janggu',
519
                                                'resources/10regions.bed')
520
521
    gi = GenomicIndexer.create_from_file(data_path,
522
                                         binsize=200,
523
                                         stepsize=200)
524
525
    dummy_eval = Scorer('pred', lambda p: [0.1] * len(p),
526
                        exporter=ExportBigwig(gindexer=gi),
527
                        conditions=['c1', 'c2', 'c3', 'c4'])
528
529
    bwm.predict(inputs, callbacks=[dummy_eval])
530
531
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
532
                         'pred.{}.bigwig')
533
534
    for cond in ['c1', 'c2', 'c3', 'c4']:
535
        assert os.path.exists(file_.format(cond))
536
537
    bw = pyBigWig.open(file_.format('c1'))
538
539
    co = bw.values('chr1', 600, 2000)
540
541
    numpy.testing.assert_allclose(numpy.mean(co), 0.1, rtol=1e-5)
542
543
544
def test_output_bigwig_predict_convout(tmpdir):
545
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
546
    # generate loss
547
    #
548
    # resolution < stepsize
549
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
550
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
551
                    conditions=['c1', 'c2', 'c3', 'c4'])
552
553
    bwm = get_janggu_conv(inputs, outputs)
554
    data_path = pkg_resources.resource_filename('janggu',
555
                                                'resources/10regions.bed')
556
557
    gi = GenomicIndexer.create_from_file(data_path,
558
                                         binsize=200,
559
                                         stepsize=200)
560
561
    dummy_eval = Scorer('pred', lambda p: [0.2] * len(p),
562
                        exporter=ExportBigwig(gindexer=gi),
563
                        conditions=['c1', 'c2', 'c3', 'c4'])
564
565
    bwm.predict(inputs, callbacks=[dummy_eval])
566
567
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
568
                         'pred.{}.bigwig')
569
570
    for cond in ['c1', 'c2', 'c3', 'c4']:
571
        assert os.path.exists(file_.format(cond))
572
573
    bw = pyBigWig.open(file_.format('c1'))
574
575
    co = bw.values('chr1', 600, 2000)
576
577
    numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
578
579
580
def test_output_bigwig_loss_resolution_equal_stepsize(tmpdir):
581
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
582
    # generate loss
583
    #
584
    # resolution < stepsize
585
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
586
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
587
                    conditions=['c1', 'c2', 'c3', 'c4'])
588
589
    bwm = get_janggu(inputs, outputs)
590
    data_path = pkg_resources.resource_filename('janggu',
591
                                                'resources/10regions.bed')
592
593
    gi = GenomicIndexer.create_from_file(data_path,
594
                                         binsize=200,
595
                                         stepsize=200)
596
597
    dummy_eval = Scorer('loss', lambda t, p: [0.2] * len(t),
598
                        exporter=ExportBigwig(gindexer=gi))
599
600
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
601
602
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
603
                         'loss.{}.bigwig')
604
605
    for cond in ['c1', 'c2', 'c3', 'c4']:
606
        assert os.path.exists(file_.format(cond))
607
608
    bw = pyBigWig.open(file_.format('c1'))
609
610
    co = bw.values('chr1', 600, 2000)
611
612
    numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
613
614
615
def test_output_bigwig_loss_resolution_unequal_stepsize(tmpdir):
616
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
617
    # generate loss
618
    #
619
    # resolution < stepsize
620
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
621
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
622
                    conditions=['c1', 'c2', 'c3', 'c4'])
623
624
    bwm = get_janggu(inputs, outputs)
625
    data_path = pkg_resources.resource_filename('janggu',
626
                                                'resources/10regions.bed')
627
628
    gi = GenomicIndexer.create_from_file(data_path,
629
                                         binsize=200,
630
                                         stepsize=50)
631
632
    dummy_eval = Scorer('loss', lambda t, p: [0.2] * len(t),
633
                        exporter=ExportBigwig(gindexer=gi))
634
635
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])
636
637
    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
638
                         'loss.{}.bigwig')
639
640
    for cond in ['c1', 'c2', 'c3', 'c4']:
641
        assert os.path.exists(file_.format(cond))
642
643
    bw = pyBigWig.open(file_.format('c1'))
644
645
    co = bw.values('chr1', 600, 2000-150)
646
647
    numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
648
649
650
def test_output_tsv_score_across_conditions(tmpdir):
651
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
652
    inputs = Array("x", numpy.random.random((100, 10)))
653
    outputs = Array('y', numpy.random.randint(2, size=(100, 2)),
654
                    conditions=['c1', 'c2'])
655
656
    bwm = get_janggu(inputs, outputs)
657
658
    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15,
659
                        exporter=ExportTsv())
660
    dummy_evalacross = Scorer('scoreacross',
661
                              lambda y_true, y_pred: 0.15,
662
                              exporter=ExportTsv(),
663
                              percondition=False)
664
665
    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval, dummy_evalacross])
666
667
    # percondition=True
668
    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
669
                                        "score.tsv"),
670
                           sep='\t', header=[0]).shape == (1, 2)
671
    # percondition=False
672
    val = pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
673
                                       "scoreacross.tsv"),
674
                          sep='\t', header=[0])
675
    assert val['across'][0] == .15
676
    assert val.shape == (1, 1)