a b/tests/test_janggo.py
1
import os
2
3
import h5py
4
import matplotlib
5
import numpy as np
6
import pandas as pd
7
import pkg_resources
8
import pytest
9
from keras.layers import Average
10
from keras.layers import Concatenate
11
from keras.layers import Conv2D
12
from keras.layers import Dense
13
from keras.layers import Flatten
14
from keras.layers import Input
15
from keras.layers import Maximum
16
from keras.layers import MaxPooling2D
17
from keras import Model
18
19
from janggu import Janggu
20
from janggu import input_attribution
21
from janggu import inputlayer
22
from janggu import model_from_json
23
from janggu import model_from_yaml
24
from janggu import outputconv
25
from janggu import outputdense
26
from janggu import predict_variant_effect
27
from janggu.data import Array
28
from janggu.data import Bioseq
29
from janggu.data import Cover
30
from janggu.data import GenomicIndexer
31
from janggu.data import ReduceDim
32
from janggu.data.data import JangguSequence
33
from janggu.layers import Complement
34
from janggu.layers import DnaConv2D
35
from janggu.layers import LocalAveragePooling2D
36
from janggu.layers import Reverse
37
38
matplotlib.use('AGG')
39
40
41
@pytest.mark.filterwarnings("ignore:inspect")
42
def test_localaveragepooling2D(tmpdir):
43
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
44
    # some test data
45
    testin = np.ones((1, 10, 1, 3))
46
    testin[:, :, :, 1] += 1
47
    testin[:, :, :, 2] += 2
48
49
    # test local average pooling
50
    lin = Input((10, 1, 3))
51
    out = LocalAveragePooling2D(3)(lin)
52
    m = Janggu(lin, out)
53
54
    testout = m.predict(testin)
55
    np.testing.assert_equal(testout, testin[:, :8, :, :])
56
57
    # more tests
58
    testin = np.ones((1, 3, 1, 2))
59
    testin[:, 0, :, :] = 0
60
    testin[:, 2, :, :] = 2
61
    testin[:, :, :, 1] += 1
62
63
    # test local average pooling
64
    lin = Input((3, 1, 2))
65
    out = LocalAveragePooling2D(3)(lin)
66
    m = Janggu(lin, out)
67
68
    testout = m.predict(testin)
69
    np.testing.assert_equal(testout.shape, (1, 1, 1, 2))
70
    np.testing.assert_equal(testout[0, 0, 0, 0], 1)
71
    np.testing.assert_equal(testout[0, 0, 0, 1], 2)
72
73
74
@pytest.mark.filterwarnings("ignore:inspect")
75
@pytest.mark.filterwarnings("ignore:The truth value")
76
def test_janggu_generate_name(tmpdir):
77
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
78
79
    def _cnn_model(inputs, inp, oup, params):
80
        inputs = Input((10, 1))
81
        layer = Flatten()(inputs)
82
        output = Dense(params[0])(layer)
83
        return inputs, output
84
85
    bwm = Janggu.create(_cnn_model, modelparams=(2,))
86
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
87
88
    storage = bwm._storage_path(bwm.name, outputdir=bwm.outputdir)
89
90
    bwm.save()
91
    bwm.summary()
92
93
    assert os.path.exists(storage)
94
95
    Janggu.create_by_name(bwm.name)
96
97
98
def test_dnaconv():
99
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
100
    bed_file = os.path.join(data_path, 'sample.bed')
101
102
    refgenome = os.path.join(data_path, 'sample_genome.fa')
103
104
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
105
                                    storage='ndarray',
106
                                    roi=bed_file, order=1)
107
108
    xin = Input(dna.shape[1:])
109
    l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin)
110
    m1 = Model(xin, l1)
111
    res1 =m1.predict(dna[0])[0,0,0,:]
112
113
    clayer = m1.layers[1].forward_layer
114
    # forward only
115
    l1 = clayer(xin)
116
    m2 = Model(xin, l1)
117
    res2 = m2.predict(dna[0])[0,0, 0,:]
118
119
    rxin = Reverse()(Complement()(xin))
120
    l1 = clayer(rxin)
121
    l1 = Reverse()(l1)
122
    m3 = Model(xin, l1)
123
    res3 = m3.predict(dna[0])[0,0, 0,:]
124
125
    res4 = np.maximum(res3,res2)
126
    np.testing.assert_allclose(res1, res4, rtol=1e-4)
127
128
129
def test_dnaconv2():
130
    # this checks if DnaConv2D layer is instantiated correctly if
131
    # the conv2d layer has been instantiated beforehand.
132
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
133
    bed_file = os.path.join(data_path, 'sample.bed')
134
135
    refgenome = os.path.join(data_path, 'sample_genome.fa')
136
137
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
138
                                    storage='ndarray',
139
                                    roi=bed_file, order=1)
140
141
    xin = Input(dna.shape[1:])
142
    clayer = Conv2D(30, (21, 1), activation='relu')
143
144
    clayer(xin)
145
146
    l1 = DnaConv2D(clayer)(xin)
147
    m1 = Model(xin, l1)
148
    res1 =m1.predict(dna[0])[0,0,0,:]
149
150
    np.testing.assert_allclose(clayer.get_weights()[0], m1.layers[1].forward_layer.get_weights()[0])
151
    assert len(clayer.weights) == 2
152
153
154
@pytest.mark.filterwarnings("ignore:The truth value")
155
def test_janggu_instance_dense(tmpdir):
156
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
157
    """Test Janggu creation by shape and name. """
158
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
159
    bed_file = os.path.join(data_path, 'sample.bed')
160
161
    csvfile = os.path.join(data_path, 'sample.csv')
162
163
    refgenome = os.path.join(data_path, 'sample_genome.fa')
164
165
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
166
                                    storage='ndarray',
167
                                    roi=bed_file, order=1)
168
169
    df = pd.read_csv(csvfile, header=None)
170
    ctcf = Array('ctcf', df.values, conditions=['peaks'])
171
172
    @inputlayer
173
    @outputdense('sigmoid')
174
    def _cnn_model(inputs, inp, oup, params):
175
        layer = inputs['.']
176
        layer = Complement()(layer)
177
        layer = Reverse()(layer)
178
        layer = Flatten()(layer)
179
        output = Dense(params[0])(layer)
180
        return inputs, output
181
182
    with pytest.raises(Exception):
183
        # due to No input name . defined
184
        bwm = Janggu.create(_cnn_model, modelparams=(2,),
185
                            inputs=dna,
186
                            outputs=ctcf,
187
                            name='dna_ctcf_HepG2-cnn')
188
189
    @inputlayer
190
    @outputdense('sigmoid')
191
    def _cnn_model(inputs, inp, oup, params):
192
        layer = inputs[list()]
193
        layer = Complement()(layer)
194
        layer = Reverse()(layer)
195
        layer = Flatten()(layer)
196
        output = Dense(params[0])(layer)
197
        return inputs, output
198
199
    with pytest.raises(Exception):
200
        # due to Wrong type for indexing
201
        bwm = Janggu.create(_cnn_model, modelparams=(2,),
202
                            inputs=dna,
203
                            outputs=ctcf,
204
                            name='dna_ctcf_HepG2-cnn')
205
206
    @inputlayer
207
    @outputdense('sigmoid')
208
    def _cnn_model(inputs, inp, oup, params):
209
        layer = inputs()[0]
210
        layer = Complement()(layer)
211
        layer = Reverse()(layer)
212
        layer = Flatten()(layer)
213
        output = Dense(params[0])(layer)
214
        return inputs, output
215
216
    with pytest.raises(Exception):
217
        # name with must be string
218
        bwm = Janggu.create(_cnn_model, modelparams=(2,),
219
                            inputs=dna,
220
                            outputs=ctcf,
221
                            name=12342134)
222
223
    # test with given model name
224
    bwm = Janggu.create(_cnn_model, modelparams=(2,),
225
                        inputs=dna,
226
                        outputs=ctcf,
227
                        name='dna_ctcf_HepG2-cnn')
228
    # test with auto. generated modelname.
229
    bwm = Janggu.create(_cnn_model, modelparams=(2,),
230
                        inputs=dna,
231
                        outputs=ctcf,
232
                        name='dna_ctcf_HepG2-cnn')
233
234
    @inputlayer
235
    @outputdense('sigmoid')
236
    def _cnn_model(inputs, inp, oup, params):
237
        layer = inputs[0]
238
        layer = Complement()(layer)
239
        layer = Reverse()(layer)
240
        layer = Flatten()(layer)
241
        output = Dense(params[0])(layer)
242
        return inputs, output
243
    bwm = Janggu.create(_cnn_model, modelparams=(2,),
244
                        inputs=dna,
245
                        outputs=ctcf,
246
                        name='dna_ctcf_HepG2-cnn')
247
248
    @inputlayer
249
    @outputdense('sigmoid')
250
    def _cnn_model(inputs, inp, oup, params):
251
        layer = inputs['dna']
252
        layer = Complement()(layer)
253
        layer = Reverse()(layer)
254
        layer = Flatten()(layer)
255
        output = Dense(params[0])(layer)
256
        return inputs, output
257
    bwm = Janggu.create(_cnn_model, modelparams=(2,),
258
                        inputs=dna,
259
                        outputs=ctcf,
260
                        name='dna_ctcf_HepG2-cnn')
261
    kbwm2 = model_from_json(bwm.kerasmodel.to_json())
262
    kbwm3 = model_from_yaml(bwm.kerasmodel.to_yaml())
263
264
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
265
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
266
267
    bwm.save()
268
    bwm.summary()
269
270
    assert os.path.exists(storage)
271
272
    Janggu.create_by_name('dna_ctcf_HepG2-cnn')
273
274
275
@pytest.mark.filterwarnings("ignore:The truth value")
276
def test_janggu_influence_genomic(tmpdir):
277
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
278
    """Test Janggu creation by shape and name. """
279
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
280
    bed_file = os.path.join(data_path, 'sample.bed')
281
282
    csvfile = os.path.join(data_path, 'sample.csv')
283
284
    refgenome = os.path.join(data_path, 'sample_genome.fa')
285
286
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
287
                                       storage='ndarray',
288
                                       binsize=50,
289
                                       roi=bed_file, order=1)
290
291
    df = pd.read_csv(csvfile, header=None)
292
    ctcf = Array('ctcf', df.values, conditions=['peaks'])
293
294
    @inputlayer
295
    @outputdense('sigmoid')
296
    def _cnn_model(inputs, inp, oup, params):
297
        layer = inputs['dna']
298
        layer = Flatten()(layer)
299
        output = Dense(params[0])(layer)
300
        return inputs, output
301
    model = Janggu.create(_cnn_model, modelparams=(2,),
302
                          inputs=dna,
303
                          outputs=ctcf,
304
                          name='dna_ctcf_HepG2-cnn')
305
306
    model.compile(optimizer='adadelta', loss='binary_crossentropy')
307
308
    # check with some simple offset
309
    iv = dna.gindexer[0]
310
    chrom, start, end = iv.chrom, iv.start, iv.end
311
    influence = input_attribution(model, dna, chrom=chrom, start=start, end=end)
312
313
    # check with an odd offset
314
315
    influence2 = input_attribution(model, dna, chrom=chrom, start=start-1, end=end+1)
316
    np.testing.assert_equal(influence[0][:], influence2[0][:][:,1:-1])
317
318
    # the same, but now using the index directly
319
    influence = input_attribution(model, dna, idx=0)
320
321
    # check with an odd offset
322
323
    influence2 = input_attribution(model, dna, chrom=chrom, start=start-1, end=end+1)
324
    np.testing.assert_equal(influence[0][:], influence2[0][:][:,1:-1])
325
326
327
@pytest.mark.filterwarnings("ignore:The truth value")
328
def test_janggu_influence_fasta(tmpdir):
329
330
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
331
332
    order = 1
333
    filename = os.path.join(data_path, 'sample.fa')
334
335
    data = Bioseq.create_from_seq('dna', fastafile=filename,
336
                                 order=order, cache=False)
337
338
    dna = data
339
340
    @inputlayer
341
    def _cnn_model(inputs, inp, oup, params):
342
        layer = inputs['dna']
343
        layer = Flatten()(layer)
344
        output = Dense(params[0])(layer)
345
        output = Dense(1, activation='sigmoid')(output)
346
        return inputs, output
347
348
    model = Janggu.create(_cnn_model, modelparams=(2,),
349
                          inputs=data,
350
                          name='dna_ctcf_HepG2-cnn')
351
352
    #model.compile(optimizer='adadelta', loss='binary_crossentropy')
353
354
    # check with some nice offset
355
    iv = dna.gindexer[0]
356
    chrom, start, end = iv.chrom, iv.start, iv.end
357
    influence = input_attribution(model, dna, chrom=chrom, start=start, end=end)
358
359
    influence2 = input_attribution(model, dna, idx=0)
360
    np.testing.assert_equal(influence[0][:], influence2[0][:])
361
362
363
@pytest.mark.filterwarnings("ignore:The truth value")
364
def test_janggu_variant_prediction(tmpdir):
365
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
366
    """Test Janggu creation by shape and name. """
367
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
368
369
    for order in [1, 2, 3]:
370
        refgenome = os.path.join(data_path, 'sample_genome.fa')
371
        vcffile = os.path.join(data_path, 'sample.vcf')
372
373
        dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
374
                                           storage='ndarray',
375
                                           binsize=50,
376
                                           store_whole_genome=True,
377
                                           order=order)
378
379
        def _cnn_model(inputs, inp, oup, params):
380
            inputs = Input((50 - params['order'] + 1, 1, pow(4, params['order'])))
381
            layer = Flatten()(inputs)
382
            layer = Dense(params['hiddenunits'])(layer)
383
            output = Dense(4, activation='sigmoid')(layer)
384
            return inputs, output
385
386
        model = Janggu.create(_cnn_model, modelparams={'hiddenunits':2, 'order':order},
387
                              name='dna_ctcf_HepG2-cnn')
388
389
        model.predict_variant_effect(dna, vcffile, conditions=['m'+str(i) for i in range(4)],
390
                                     output_folder=os.path.join(os.environ['JANGGU_OUTPUT']))
391
        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'))
392
        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz'))
393
394
        f = h5py.File(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'), 'r')
395
396
        gindexer = GenomicIndexer.create_from_file(os.path.join(os.environ['JANGGU_OUTPUT'],
397
                                                                'snps.bed.gz'), None, None)
398
399
        cov = Cover.create_from_array('snps', f['diffscore'],
400
                                      gindexer,
401
                                      store_whole_genome=True)
402
403
        print(cov['chr2', 55, 65].shape)
404
        print(cov['chr2', 55, 65])
405
406
        assert np.abs(cov['chr2', 59, 60]).sum() > 0.0
407
        assert np.abs(cov['chr2', 54, 55]).sum() == 0.0
408
        f.close()
409
410
411
@pytest.mark.filterwarnings("ignore:The truth value")
412
def test_janggu_variant_prediction_from_refgenome(tmpdir):
413
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
414
    """Test Janggu creation by shape and name. """
415
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
416
417
    for order in [1, 2, 3]:
418
        refgenome = os.path.join(data_path, 'sample_genome.fa')
419
        vcffile = os.path.join(data_path, 'sample.vcf')
420
421
        def _cnn_model(inputs, inp, oup, params):
422
            inputs = Input((50 - params['order'] + 1, 1, pow(4, params['order'])))
423
            layer = Flatten()(inputs)
424
            layer = Dense(params['hiddenunits'])(layer)
425
            output = Dense(4, activation='sigmoid')(layer)
426
            return inputs, output
427
428
        model = Janggu.create(_cnn_model, modelparams={'hiddenunits':2, 'order':order},
429
                              name='dna_ctcf_HepG2-cnn')
430
431
        predict_variant_effect(model.kerasmodel,
432
                               refgenome,
433
                               vcffile,
434
                               conditions=['m'+str(i) for i in range(4)],
435
                               output_folder=os.path.join(os.environ['JANGGU_OUTPUT']),
436
                               order=order)
437
        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'))
438
        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz'))
439
440
        f = h5py.File(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'), 'r')
441
442
        gindexer = GenomicIndexer.create_from_file(os.path.join(os.environ['JANGGU_OUTPUT'],
443
                                                                'snps.bed.gz'), None, None)
444
445
        cov = Cover.create_from_array('snps', f['diffscore'],
446
                                      gindexer,
447
                                      store_whole_genome=True)
448
449
        print(cov['chr2', 55, 65].shape)
450
        print(cov['chr2', 55, 65])
451
452
        assert np.abs(cov['chr2', 59, 60]).sum() > 0.0
453
        assert np.abs(cov['chr2', 54, 55]).sum() == 0.0
454
        f.close()
455
456
457
@pytest.mark.filterwarnings("ignore:The truth value")
458
def test_janggu_instance_conv(tmpdir):
459
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
460
    """Test Janggu creation by shape and name. """
461
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
462
    bed_file = os.path.join(data_path, 'sample.bed')
463
464
    posfile = os.path.join(data_path, 'scored_sample.bed')
465
466
    refgenome = os.path.join(data_path, 'sample_genome.fa')
467
468
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
469
                                       storage='ndarray',
470
                                       roi=bed_file, order=1,
471
                                       binsize=200,
472
                                       stepsize=50)
473
474
    ctcf = Cover.create_from_bed(
475
        "positives",
476
        bedfiles=posfile,
477
        roi=bed_file,
478
        binsize=200, stepsize=50,
479
        resolution=50,
480
        store_whole_genome=False,
481
        flank=0,
482
        collapser=None,
483
        storage='ndarray')
484
485
    ctcf = Cover.create_from_bed(
486
        "positives",
487
        bedfiles=posfile,
488
        roi=bed_file,
489
        binsize=200, stepsize=50,
490
        resolution=50,
491
        store_whole_genome=True,
492
        flank=0,
493
        collapser=None,
494
        storage='ndarray')
495
496
    @inputlayer
497
    @outputconv('sigmoid')
498
    def _cnn_model(inputs, inp, oup, params):
499
        with inputs.use('dna') as inlayer:
500
            layer = inlayer
501
        layer = Complement()(layer)
502
        layer = Reverse()(layer)
503
        return inputs, layer
504
505
    bwm = Janggu.create(_cnn_model, modelparams=(2,),
506
                        inputs=dna,
507
                        outputs=ctcf,
508
                        name='dna_ctcf_HepG2-cnn')
509
510
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
511
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
512
513
    bwm.save()
514
    bwm.summary()
515
516
    assert os.path.exists(storage)
517
518
    Janggu.create_by_name('dna_ctcf_HepG2-cnn')
519
520
521
@pytest.mark.filterwarnings("ignore:The truth value")
522
def test_janggu_use_dnaconv_none(tmpdir):
523
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
524
525
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
526
    bed_file = os.path.join(data_path, 'sample.bed')
527
528
    posfile = os.path.join(data_path, 'scored_sample.bed')
529
530
    refgenome = os.path.join(data_path, 'sample_genome.fa')
531
532
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
533
                                    storage='ndarray',
534
                                    roi=bed_file, order=1)
535
536
    @inputlayer
537
    def _cnn_model1(inputs, inp, oup, params):
538
        with inputs.use('dna') as inlayer:
539
            layer = inlayer
540
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
541
                              merge_mode=None, name='bothstrands')(layer)
542
        return inputs, layer
543
544
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
545
                        inputs=dna,
546
                        name='dna_ctcf_HepG2-cnn1')
547
548
    p1 = bwm1.predict(dna[1:2])
549
    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
550
551
    @inputlayer
552
    def _cnn_model2(inputs, inp, oup, params):
553
        with inputs.use('dna') as inlayer:
554
            layer = inlayer
555
            conv = Conv2D(5, (3, 1), name='singlestrand')
556
            fl = conv(layer)
557
            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
558
        return inputs, [fl, rl]
559
560
    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
561
                        inputs=dna,
562
                        name='dna_ctcf_HepG2-cnn2')
563
564
    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
565
566
    p2 = bwm2.predict(dna[1:2])
567
    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
568
569
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
570
    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
571
572
    bwm1.save()
573
    bwm1.summary()
574
575
    assert os.path.exists(storage)
576
577
    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
578
579
580
@pytest.mark.filterwarnings("ignore:The truth value")
581
def test_janggu_use_dnaconv_concat(tmpdir):
582
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
583
584
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
585
    bed_file = os.path.join(data_path, 'sample.bed')
586
587
    posfile = os.path.join(data_path, 'positive.bed')
588
589
    refgenome = os.path.join(data_path, 'sample_genome.fa')
590
591
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
592
                                    storage='ndarray',
593
                                    roi=bed_file, order=1)
594
595
    @inputlayer
596
    def _cnn_model1(inputs, inp, oup, params):
597
        with inputs.use('dna') as inlayer:
598
            layer = inlayer
599
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
600
                              merge_mode='concat', name='bothstrands')(layer)
601
        return inputs, layer
602
603
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
604
                        inputs=dna,
605
                        name='dna_ctcf_HepG2-cnn1')
606
607
    p1 = bwm1.predict(dna[1:2])
608
    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
609
610
    @inputlayer
611
    def _cnn_model2(inputs, inp, oup, params):
612
        with inputs.use('dna') as inlayer:
613
            layer = inlayer
614
            conv = Conv2D(5, (3, 1), name='singlestrand')
615
            fl = conv(layer)
616
            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
617
            layer = Concatenate()([fl, rl])
618
        return inputs, layer
619
620
    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
621
                        inputs=dna,
622
                        name='dna_ctcf_HepG2-cnn2')
623
624
    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
625
626
    p2 = bwm2.predict(dna[1:2])
627
    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
628
629
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
630
    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
631
632
    bwm1.save()
633
    bwm1.summary()
634
635
    assert os.path.exists(storage)
636
637
    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
638
639
640
@pytest.mark.filterwarnings("ignore:The truth value")
641
def test_janggu_use_dnaconv_ave(tmpdir):
642
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
643
644
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
645
    bed_file = os.path.join(data_path, 'sample.bed')
646
647
    posfile = os.path.join(data_path, 'positive.bed')
648
649
    refgenome = os.path.join(data_path, 'sample_genome.fa')
650
651
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
652
                                    storage='ndarray',
653
                                    roi=bed_file, order=1)
654
655
    @inputlayer
656
    def _cnn_model1(inputs, inp, oup, params):
657
        with inputs.use('dna') as inlayer:
658
            layer = inlayer
659
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
660
                              merge_mode='ave', name='bothstrands')(layer)
661
        return inputs, layer
662
663
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
664
                        inputs=dna,
665
                        name='dna_ctcf_HepG2-cnn1')
666
667
    p1 = bwm1.predict(dna[1:2])
668
    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
669
670
    @inputlayer
671
    def _cnn_model2(inputs, inp, oup, params):
672
        with inputs.use('dna') as inlayer:
673
            layer = inlayer
674
            conv = Conv2D(5, (3, 1), name='singlestrand')
675
            fl = conv(layer)
676
            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
677
            layer = Average()([fl, rl])
678
        return inputs, layer
679
680
    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
681
                        inputs=dna,
682
                        name='dna_ctcf_HepG2-cnn2')
683
684
    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
685
686
    p2 = bwm2.predict(dna[1:2])
687
    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
688
689
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
690
    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
691
692
    bwm1.save()
693
    bwm1.summary()
694
695
    assert os.path.exists(storage)
696
697
    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
698
699
700
@pytest.mark.filterwarnings("ignore:The truth value")
701
def test_janggu_use_dnaconv_max(tmpdir):
702
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
703
704
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
705
    bed_file = os.path.join(data_path, 'sample.bed')
706
707
    posfile = os.path.join(data_path, 'positive.bed')
708
709
    refgenome = os.path.join(data_path, 'sample_genome.fa')
710
711
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
712
                                    storage='ndarray',
713
                                    roi=bed_file, order=1)
714
715
    @inputlayer
716
    def _cnn_model1(inputs, inp, oup, params):
717
        with inputs.use('dna') as inlayer:
718
            layer = inlayer
719
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
720
                              merge_mode='max', name='bothstrands')(layer)
721
        return inputs, layer
722
723
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
724
                        inputs=dna,
725
                        name='dna_ctcf_HepG2-cnn1')
726
727
    p1 = bwm1.predict(dna[1:2])
728
    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
729
730
    @inputlayer
731
    def _cnn_model2(inputs, inp, oup, params):
732
        with inputs.use('dna') as inlayer:
733
            layer = inlayer
734
            conv = Conv2D(5, (3, 1), name='singlestrand')
735
            fl = conv(layer)
736
            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
737
            layer = Maximum()([fl, rl])
738
        return inputs, layer
739
740
    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
741
                        inputs=dna,
742
                        name='dna_ctcf_HepG2-cnn2')
743
744
    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
745
746
    p2 = bwm2.predict(dna[1:2])
747
    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
748
749
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
750
    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
751
752
    bwm1.save()
753
    bwm1.summary()
754
755
    assert os.path.exists(storage)
756
757
    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
758
759
760
761
@pytest.mark.filterwarnings("ignore:inspect")
762
def test_janggu_chr2_validation(tmpdir):
763
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
764
765
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
766
    bed_file = os.path.join(data_path, 'sample.bed')
767
768
    posfile = os.path.join(data_path, 'scored_sample.bed')
769
770
    refgenome = os.path.join(data_path, 'sample_genome.fa')
771
772
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
773
                                    binsize=200, stepsize=50,
774
                                    roi=bed_file, order=1)
775
776
    ctcf = Cover.create_from_bed(
777
        "positives",
778
        bedfiles=posfile,
779
        roi=bed_file,
780
        binsize=200, stepsize=50,
781
        resolution=None,
782
        flank=0,
783
        collapser='max',
784
        storage='ndarray')
785
786
    @inputlayer
787
    @outputconv('sigmoid')
788
    def _cnn_model1(inputs, inp, oup, params):
789
        with inputs.use('dna') as inlayer:
790
            layer = inlayer
791
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
792
                              merge_mode='max', name='bothstrands')(layer)
793
            layer = MaxPooling2D((198, 1))(layer)
794
        return inputs, layer
795
796
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
797
                        inputs=dna, outputs=ctcf,
798
                        name='dna_ctcf_HepG2-cnn1')
799
800
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
801
    p1 = bwm1.fit(dna, ctcf, validation_data=['chr2'])
802
803
804
@pytest.mark.filterwarnings("ignore:inspect")
805
def test_janggu_bedfile_validation(tmpdir):
806
    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
807
808
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
809
    bed_file = os.path.join(data_path, 'sample.bed')
810
    posfile = os.path.join(data_path, 'scored_sample.bed')
811
    refgenome = os.path.join(data_path, 'sample_genome.fa')
812
813
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
814
                                       binsize=200, stepsize=50,
815
                                       roi=bed_file, order=1)
816
817
    ctcf = Cover.create_from_bed(
818
        "positives",
819
        bedfiles=posfile,
820
        roi=bed_file,
821
        binsize=200, stepsize=50,
822
        resolution=None,
823
        flank=0,
824
        collapser='max',
825
        storage='ndarray')
826
827
    @inputlayer
828
    @outputconv('sigmoid')
829
    def _cnn_model1(inputs, inp, oup, params):
830
        with inputs.use('dna') as inlayer:
831
            layer = inlayer
832
            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
833
                              merge_mode='max', name='bothstrands')(layer)
834
            layer = MaxPooling2D((198, 1))(layer)
835
        return inputs, layer
836
837
    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
838
                        inputs=dna, outputs=ctcf,
839
                        name='dna_ctcf_HepG2-cnn1')
840
841
    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
842
    p1 = bwm1.fit(dna, ctcf, validation_data=posfile)
843
844
845
@pytest.mark.filterwarnings("ignore:inspect")
846
def test_janggu_train_predict_option0(tmpdir):
847
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
848
    """Train, predict and evaluate on dummy data.
849
850
    create: by_shape
851
    Input args: Dataset
852
    """
853
854
    inputs = Array("X", np.random.random((100, 10)))
855
    outputs = ReduceDim(Array('y', np.random.randint(2, size=(100, 1))[:,None],
856
                    conditions=['random']), axis=(1,))
857
858
    @inputlayer
859
    @outputdense('sigmoid')
860
    def test_model(inputs, inp, oup, params):
861
        return inputs, inputs[0]
862
863
    bwm = Janggu.create(test_model,
864
                        inputs=inputs,
865
                        outputs=outputs,
866
                        name='nptest')
867
868
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
869
870
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
871
    assert not os.path.exists(storage)
872
873
    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
874
875
    assert os.path.exists(storage)
876
877
    pred = bwm.predict(inputs)
878
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
879
    np.testing.assert_equal(pred.shape, outputs.shape)
880
881
    # test if the condition name is correctly used in the output table
882
    bwm.evaluate(inputs, outputs, callbacks=['auc'])
883
884
    outputauc = os.path.join(tmpdir.strpath, 'evaluation', 'nptest', 'auc.tsv')
885
    assert os.path.exists(outputauc)
886
    assert pd.read_csv(outputauc).columns[0] == 'random'
887
888
889
@pytest.mark.filterwarnings("ignore:inspect")
890
def test_janggu_train_predict_option1(tmpdir):
891
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
892
    """Train, predict and evaluate on dummy data.
893
894
    create: by_shape
895
    Input args: Dataset
896
    """
897
898
    inputs = Array("X", np.random.random((100, 10)))
899
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
900
                    conditions=['random'])
901
902
    @inputlayer
903
    @outputdense('sigmoid')
904
    def test_model(inputs, inp, oup, params):
905
        return inputs, inputs[0]
906
907
    bwm = Janggu.create(test_model,
908
                        inputs=inputs,
909
                        outputs=outputs,
910
                        name='nptest')
911
912
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
913
914
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
915
    assert not os.path.exists(storage)
916
917
    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
918
919
    assert os.path.exists(storage)
920
921
    pred = bwm.predict(inputs)
922
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
923
    np.testing.assert_equal(pred.shape, outputs.shape)
924
925
    # test if the condition name is correctly used in the output table
926
    bwm.evaluate(inputs, outputs, callbacks=['auc'])
927
928
    outputauc = os.path.join(tmpdir.strpath, 'evaluation', 'nptest', 'auc.tsv')
929
    assert os.path.exists(outputauc)
930
    assert pd.read_csv(outputauc).columns[0] == 'random'
931
932
933
@pytest.mark.filterwarnings("ignore:inspect")
934
def test_janggu_train_predict_option2(tmpdir):
935
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
936
    """Train, predict and evaluate on dummy data.
937
938
    create: NO
939
    Input args: list(Dataset)
940
    """
941
942
    inputs = Array("x", np.random.random((100, 10)))
943
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
944
                    conditions=['random'])
945
946
    def _model():
947
        inputs = Input((10,), name='x')
948
        output = Dense(1, activation='sigmoid', name='y')(inputs)
949
        model = Janggu(inputs=inputs, outputs=output, name='test')
950
        model.compile(optimizer='adadelta', loss='binary_crossentropy',
951
                      metrics=['accuracy'])
952
        return model
953
954
    bwm = _model()
955
956
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
957
    assert not os.path.exists(storage)
958
959
    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
960
961
    assert os.path.exists(storage)
962
963
    pred = bwm.predict([inputs])
964
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
965
    np.testing.assert_equal(pred.shape, outputs.shape)
966
    bwm.evaluate([inputs], [outputs])
967
968
969
@pytest.mark.filterwarnings("ignore:inspect")
970
def test_janggu_train_predict_option3(tmpdir):
971
    """Train, predict and evaluate on dummy data.
972
973
    Only works without generators and without evaluators.
974
975
    create: NO
976
    Input args: list(np.array)
977
    """
978
979
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
980
981
    inputs = np.random.random((100, 10))
982
    outputs = np.random.randint(2, size=(100, 1))
983
984
    def _model():
985
        inputs = Input((10,), name='x')
986
        output = Dense(1, activation='sigmoid')(inputs)
987
        model = Janggu(inputs=inputs, outputs=output, name='test')
988
        model.compile(optimizer='adadelta', loss='binary_crossentropy',
989
                      metrics=['accuracy'])
990
        return model
991
992
    bwm = _model()
993
994
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
995
    assert not os.path.exists(storage)
996
997
    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
998
999
    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
1000
    assert os.path.exists(storage)
1001
1002
    pred = bwm.predict([inputs])
1003
1004
    bwm.predict([inputs], batch_size=32)
1005
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
1006
    np.testing.assert_equal(pred.shape, outputs.shape)
1007
    bwm.evaluate([inputs], [outputs])
1008
1009
    bwm.evaluate([inputs], [outputs], batch_size=32)
1010
1011
1012
@pytest.mark.filterwarnings("ignore:inspect")
1013
def test_janggu_train_predict_option4(tmpdir):
1014
    """Train, predict and evaluate on dummy data.
1015
1016
    Only works without generators and without evaluators.
1017
1018
    create: NO
1019
    Input args: np.array
1020
    """
1021
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1022
1023
    inputs = np.random.random((100, 10))
1024
    outputs = np.random.randint(2, size=(100, 1))
1025
1026
    def _model(path):
1027
        inputs = Input((10,), name='x')
1028
        output = Dense(1, activation='sigmoid')(inputs)
1029
        model = Janggu(inputs=inputs, outputs=output, name='test')
1030
        model.compile(optimizer='adadelta', loss='binary_crossentropy',
1031
                      metrics=['accuracy'])
1032
        return model
1033
1034
    bwm = _model(tmpdir.strpath)
1035
1036
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
1037
    assert not os.path.exists(storage)
1038
1039
    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
1040
1041
    # This used to not work with normal numpy arrays,
1042
    # but now the numpy arrays are matched automatically
1043
    # with the layer names.
1044
    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
1045
1046
    assert os.path.exists(storage)
1047
1048
    pred = bwm.predict(inputs)
1049
1050
    bwm.predict(inputs, batch_size=32)
1051
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
1052
    np.testing.assert_equal(pred.shape, outputs.shape)
1053
    bwm.evaluate(inputs, outputs)
1054
1055
    bwm.evaluate(inputs, outputs, batch_size=32)
1056
1057
1058
@pytest.mark.filterwarnings("ignore:inspect")
1059
def test_janggu_train_predict_option5(tmpdir):
1060
    """Train, predict and evaluate on dummy data.
1061
1062
    create: NO
1063
    Input args: list(Dataset)
1064
    """
1065
1066
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1067
    inputs = Array("x", np.random.random((100, 10)))
1068
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
1069
                    conditions=['random'])
1070
1071
    def _model():
1072
        inputs = Input((10,), name='x')
1073
        output = Dense(1, name='y', activation='sigmoid')(inputs)
1074
        model = Janggu(inputs=inputs, outputs=output, name='test_model')
1075
        model.compile(optimizer='adadelta', loss='binary_crossentropy',
1076
                      metrics=['accuracy'])
1077
        return model
1078
1079
    bwm = _model()
1080
1081
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
1082
    assert not os.path.exists(storage)
1083
1084
    bwm.fit([inputs], [outputs], epochs=2, batch_size=32,
1085
            use_multiprocessing=False)
1086
1087
    assert os.path.exists(storage)
1088
1089
    pred = bwm.predict([inputs],
1090
                       use_multiprocessing=False)
1091
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
1092
    np.testing.assert_equal(pred.shape, outputs.shape)
1093
    bwm.evaluate([inputs], [outputs],
1094
                 use_multiprocessing=False)
1095
1096
1097
@pytest.mark.filterwarnings("ignore:inspect")
1098
def test_janggu_train_predict_option6(tmpdir):
1099
    """Train, predict and evaluate on dummy data.
1100
1101
    create: YES
1102
    Input args: Dataset
1103
    """
1104
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1105
1106
    inputs = Array("x", np.random.random((100, 10)))
1107
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
1108
                    conditions=['random'])
1109
1110
    @inputlayer
1111
    @outputdense('sigmoid')
1112
    def _model(inputs, inp, oup, params):
1113
        return inputs, inputs[0]
1114
1115
    bwm = Janggu.create(_model,
1116
                        inputs=inputs,
1117
                        outputs=outputs,
1118
                        name='nptest')
1119
1120
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
1121
1122
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
1123
    assert not os.path.exists(storage)
1124
1125
    bwm.fit(inputs, outputs, epochs=2, batch_size=32,
1126
            use_multiprocessing=False)
1127
1128
    assert os.path.exists(storage)
1129
1130
    pred = bwm.predict(inputs,
1131
                       use_multiprocessing=False)
1132
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
1133
    np.testing.assert_equal(pred.shape, outputs.shape)
1134
    bwm.evaluate(inputs, outputs,
1135
                 use_multiprocessing=False)
1136
1137
1138
@pytest.mark.filterwarnings("ignore:inspect")
1139
def test_janggu_train_predict_option7(tmpdir):
1140
    """Train, predict and evaluate on dummy data.
1141
1142
    create: YES
1143
    Input args: Dataset
1144
    validation_set: YES
1145
    batch_size: None
1146
    """
1147
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1148
1149
    inputs = Array("x", np.random.random((100, 10)))
1150
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
1151
                    conditions=['random'])
1152
1153
    @inputlayer
1154
    @outputdense('sigmoid')
1155
    def _model(inputs, inp, oup, params):
1156
        return inputs, inputs[0]
1157
1158
    bwm = Janggu.create(_model,
1159
                        inputs=inputs,
1160
                        outputs=outputs,
1161
                        name='nptest')
1162
1163
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
1164
1165
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
1166
    print('storage', storage)
1167
    print('env', os.environ['JANGGU_OUTPUT'])
1168
    print('name', bwm.name)
1169
    print('outputdir', bwm.outputdir)
1170
    assert not os.path.exists(storage)
1171
1172
    bwm.fit(inputs, outputs, epochs=2,
1173
            validation_data=(inputs, outputs),
1174
            use_multiprocessing=False)
1175
1176
    assert os.path.exists(storage)
1177
1178
    pred = bwm.predict(inputs,
1179
                       use_multiprocessing=False)
1180
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
1181
    np.testing.assert_equal(pred.shape, outputs.shape)
1182
    bwm.evaluate(inputs, outputs,
1183
                 use_multiprocessing=False)
1184
1185
@pytest.mark.filterwarnings("ignore:inspect")
1186
def test_sequence_config():
1187
    """Train, predict and evaluate on dummy data.
1188
1189
    create: YES
1190
    Input args: Dataset
1191
    validation_set: YES
1192
    batch_size: None
1193
    """
1194
1195
    inputs = Array("x", np.random.random((100, 10)))
1196
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
1197
                    conditions=['random'])
1198
1199
    jseq = JangguSequence(inputs.data, outputs.data, batch_size=10, as_dict=False)
1200
    assert len(jseq) == 10
1201
    for x, y, _ in jseq:
1202
        assert x[0].shape == (10, 10)
1203
        assert y[0].shape == (10, 1)
1204
        break
1205
1206
    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=False)
1207
    assert len(jseq) == 10
1208
    for x, y, _ in jseq:
1209
        assert x[0].shape == (10, 10)
1210
        assert y[0].shape == (10, 1)
1211
        break
1212
1213
    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=True)
1214
    assert len(jseq) == 10
1215
    for x, y, _ in jseq:
1216
        assert x['x'].shape == (10, 10)
1217
        assert y['y'].shape == (10, 1)
1218
        break
1219
1220
1221
@pytest.mark.filterwarnings("ignore:inspect")
1222
def test_janggu_train_predict_sequence(tmpdir):
1223
    """Train, predict and evaluate on dummy data.
1224
1225
    create: YES
1226
    Input args: Dataset
1227
    validation_set: YES
1228
    batch_size: None
1229
    """
1230
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1231
1232
    inputs = {'x': Array("x", np.random.random((100, 10)))}
1233
    outputs = {'y': Array('y', np.random.randint(2, size=(100, 1)),
1234
                    conditions=['random'])}
1235
1236
    jseq = JangguSequence(inputs, outputs, batch_size=10)
1237
1238
    @inputlayer
1239
    @outputdense('sigmoid')
1240
    def _model(inputs, inp, oup, params):
1241
        return inputs, inputs[0]
1242
1243
    bwm = Janggu.create(_model,
1244
                        inputs=jseq.inputs['x'],
1245
                        outputs=jseq.outputs['y'],
1246
                        name='nptest')
1247
1248
    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
1249
1250
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
1251
    print('storage', storage)
1252
    print('env', os.environ['JANGGU_OUTPUT'])
1253
    print('name', bwm.name)
1254
    print('outputdir', bwm.outputdir)
1255
    assert not os.path.exists(storage)
1256
1257
    bwm.fit(jseq, epochs=2,
1258
            validation_data=jseq,
1259
            use_multiprocessing=False)
1260
1261
    assert os.path.exists(storage)
1262
1263
    pred = bwm.predict(jseq, use_multiprocessing=False)
1264
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs['x']))
1265
    np.testing.assert_equal(pred.shape, outputs['y'].shape)
1266
    bwm.evaluate(jseq, use_multiprocessing=False)