Diff of /tests/test_janggo.py [000000] .. [d7cf27]

Switch to side-by-side view

--- a
+++ b/tests/test_janggo.py
@@ -0,0 +1,1266 @@
+import os
+
+import h5py
+import matplotlib
+import numpy as np
+import pandas as pd
+import pkg_resources
+import pytest
+from keras.layers import Average
+from keras.layers import Concatenate
+from keras.layers import Conv2D
+from keras.layers import Dense
+from keras.layers import Flatten
+from keras.layers import Input
+from keras.layers import Maximum
+from keras.layers import MaxPooling2D
+from keras import Model
+
+from janggu import Janggu
+from janggu import input_attribution
+from janggu import inputlayer
+from janggu import model_from_json
+from janggu import model_from_yaml
+from janggu import outputconv
+from janggu import outputdense
+from janggu import predict_variant_effect
+from janggu.data import Array
+from janggu.data import Bioseq
+from janggu.data import Cover
+from janggu.data import GenomicIndexer
+from janggu.data import ReduceDim
+from janggu.data.data import JangguSequence
+from janggu.layers import Complement
+from janggu.layers import DnaConv2D
+from janggu.layers import LocalAveragePooling2D
+from janggu.layers import Reverse
+
+matplotlib.use('AGG')
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_localaveragepooling2D(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    # some test data
+    testin = np.ones((1, 10, 1, 3))
+    testin[:, :, :, 1] += 1
+    testin[:, :, :, 2] += 2
+
+    # test local average pooling
+    lin = Input((10, 1, 3))
+    out = LocalAveragePooling2D(3)(lin)
+    m = Janggu(lin, out)
+
+    testout = m.predict(testin)
+    np.testing.assert_equal(testout, testin[:, :8, :, :])
+
+    # more tests
+    testin = np.ones((1, 3, 1, 2))
+    testin[:, 0, :, :] = 0
+    testin[:, 2, :, :] = 2
+    testin[:, :, :, 1] += 1
+
+    # test local average pooling
+    lin = Input((3, 1, 2))
+    out = LocalAveragePooling2D(3)(lin)
+    m = Janggu(lin, out)
+
+    testout = m.predict(testin)
+    np.testing.assert_equal(testout.shape, (1, 1, 1, 2))
+    np.testing.assert_equal(testout[0, 0, 0, 0], 1)
+    np.testing.assert_equal(testout[0, 0, 0, 1], 2)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_generate_name(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    def _cnn_model(inputs, inp, oup, params):
+        inputs = Input((10, 1))
+        layer = Flatten()(inputs)
+        output = Dense(params[0])(layer)
+        return inputs, output
+
+    bwm = Janggu.create(_cnn_model, modelparams=(2,))
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=bwm.outputdir)
+
+    bwm.save()
+    bwm.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name(bwm.name)
+
+
+def test_dnaconv():
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    xin = Input(dna.shape[1:])
+    l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin)
+    m1 = Model(xin, l1)
+    res1 =m1.predict(dna[0])[0,0,0,:]
+
+    clayer = m1.layers[1].forward_layer
+    # forward only
+    l1 = clayer(xin)
+    m2 = Model(xin, l1)
+    res2 = m2.predict(dna[0])[0,0, 0,:]
+
+    rxin = Reverse()(Complement()(xin))
+    l1 = clayer(rxin)
+    l1 = Reverse()(l1)
+    m3 = Model(xin, l1)
+    res3 = m3.predict(dna[0])[0,0, 0,:]
+
+    res4 = np.maximum(res3,res2)
+    np.testing.assert_allclose(res1, res4, rtol=1e-4)
+
+
+def test_dnaconv2():
+    # this checks if DnaConv2D layer is instantiated correctly if
+    # the conv2d layer has been instantiated beforehand.
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    xin = Input(dna.shape[1:])
+    clayer = Conv2D(30, (21, 1), activation='relu')
+
+    clayer(xin)
+
+    l1 = DnaConv2D(clayer)(xin)
+    m1 = Model(xin, l1)
+    res1 =m1.predict(dna[0])[0,0,0,:]
+
+    np.testing.assert_allclose(clayer.get_weights()[0], m1.layers[1].forward_layer.get_weights()[0])
+    assert len(clayer.weights) == 2
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_instance_dense(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Test Janggu creation by shape and name. """
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    csvfile = os.path.join(data_path, 'sample.csv')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    df = pd.read_csv(csvfile, header=None)
+    ctcf = Array('ctcf', df.values, conditions=['peaks'])
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs['.']
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+
+    with pytest.raises(Exception):
+        # due to No input name . defined
+        bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                            inputs=dna,
+                            outputs=ctcf,
+                            name='dna_ctcf_HepG2-cnn')
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs[list()]
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+
+    with pytest.raises(Exception):
+        # due to Wrong type for indexing
+        bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                            inputs=dna,
+                            outputs=ctcf,
+                            name='dna_ctcf_HepG2-cnn')
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs()[0]
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+
+    with pytest.raises(Exception):
+        # name with must be string
+        bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                            inputs=dna,
+                            outputs=ctcf,
+                            name=12342134)
+
+    # test with given model name
+    bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                        inputs=dna,
+                        outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn')
+    # test with auto. generated modelname.
+    bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                        inputs=dna,
+                        outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn')
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs[0]
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+    bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                        inputs=dna,
+                        outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn')
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs['dna']
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+    bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                        inputs=dna,
+                        outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn')
+    kbwm2 = model_from_json(bwm.kerasmodel.to_json())
+    kbwm3 = model_from_yaml(bwm.kerasmodel.to_yaml())
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+
+    bwm.save()
+    bwm.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn')
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_influence_genomic(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Test Janggu creation by shape and name. """
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    csvfile = os.path.join(data_path, 'sample.csv')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       binsize=50,
+                                       roi=bed_file, order=1)
+
+    df = pd.read_csv(csvfile, header=None)
+    ctcf = Array('ctcf', df.values, conditions=['peaks'])
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs['dna']
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        return inputs, output
+    model = Janggu.create(_cnn_model, modelparams=(2,),
+                          inputs=dna,
+                          outputs=ctcf,
+                          name='dna_ctcf_HepG2-cnn')
+
+    model.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    # check with some simple offset
+    iv = dna.gindexer[0]
+    chrom, start, end = iv.chrom, iv.start, iv.end
+    influence = input_attribution(model, dna, chrom=chrom, start=start, end=end)
+
+    # check with an odd offset
+
+    influence2 = input_attribution(model, dna, chrom=chrom, start=start-1, end=end+1)
+    np.testing.assert_equal(influence[0][:], influence2[0][:][:,1:-1])
+
+    # the same, but now using the index directly
+    influence = input_attribution(model, dna, idx=0)
+
+    # check with an odd offset
+
+    influence2 = input_attribution(model, dna, chrom=chrom, start=start-1, end=end+1)
+    np.testing.assert_equal(influence[0][:], influence2[0][:][:,1:-1])
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_influence_fasta(tmpdir):
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+
+    order = 1
+    filename = os.path.join(data_path, 'sample.fa')
+
+    data = Bioseq.create_from_seq('dna', fastafile=filename,
+                                 order=order, cache=False)
+
+    dna = data
+
+    @inputlayer
+    def _cnn_model(inputs, inp, oup, params):
+        layer = inputs['dna']
+        layer = Flatten()(layer)
+        output = Dense(params[0])(layer)
+        output = Dense(1, activation='sigmoid')(output)
+        return inputs, output
+
+    model = Janggu.create(_cnn_model, modelparams=(2,),
+                          inputs=data,
+                          name='dna_ctcf_HepG2-cnn')
+
+    #model.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    # check with some nice offset
+    iv = dna.gindexer[0]
+    chrom, start, end = iv.chrom, iv.start, iv.end
+    influence = input_attribution(model, dna, chrom=chrom, start=start, end=end)
+
+    influence2 = input_attribution(model, dna, idx=0)
+    np.testing.assert_equal(influence[0][:], influence2[0][:])
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_variant_prediction(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Test Janggu creation by shape and name. """
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+
+    for order in [1, 2, 3]:
+        refgenome = os.path.join(data_path, 'sample_genome.fa')
+        vcffile = os.path.join(data_path, 'sample.vcf')
+
+        dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                           storage='ndarray',
+                                           binsize=50,
+                                           store_whole_genome=True,
+                                           order=order)
+
+        def _cnn_model(inputs, inp, oup, params):
+            inputs = Input((50 - params['order'] + 1, 1, pow(4, params['order'])))
+            layer = Flatten()(inputs)
+            layer = Dense(params['hiddenunits'])(layer)
+            output = Dense(4, activation='sigmoid')(layer)
+            return inputs, output
+
+        model = Janggu.create(_cnn_model, modelparams={'hiddenunits':2, 'order':order},
+                              name='dna_ctcf_HepG2-cnn')
+
+        model.predict_variant_effect(dna, vcffile, conditions=['m'+str(i) for i in range(4)],
+                                     output_folder=os.path.join(os.environ['JANGGU_OUTPUT']))
+        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'))
+        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz'))
+
+        f = h5py.File(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'), 'r')
+
+        gindexer = GenomicIndexer.create_from_file(os.path.join(os.environ['JANGGU_OUTPUT'],
+                                                                'snps.bed.gz'), None, None)
+
+        cov = Cover.create_from_array('snps', f['diffscore'],
+                                      gindexer,
+                                      store_whole_genome=True)
+
+        print(cov['chr2', 55, 65].shape)
+        print(cov['chr2', 55, 65])
+
+        assert np.abs(cov['chr2', 59, 60]).sum() > 0.0
+        assert np.abs(cov['chr2', 54, 55]).sum() == 0.0
+        f.close()
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_variant_prediction_from_refgenome(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Test Janggu creation by shape and name. """
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+
+    for order in [1, 2, 3]:
+        refgenome = os.path.join(data_path, 'sample_genome.fa')
+        vcffile = os.path.join(data_path, 'sample.vcf')
+
+        def _cnn_model(inputs, inp, oup, params):
+            inputs = Input((50 - params['order'] + 1, 1, pow(4, params['order'])))
+            layer = Flatten()(inputs)
+            layer = Dense(params['hiddenunits'])(layer)
+            output = Dense(4, activation='sigmoid')(layer)
+            return inputs, output
+
+        model = Janggu.create(_cnn_model, modelparams={'hiddenunits':2, 'order':order},
+                              name='dna_ctcf_HepG2-cnn')
+
+        predict_variant_effect(model.kerasmodel,
+                               refgenome,
+                               vcffile,
+                               conditions=['m'+str(i) for i in range(4)],
+                               output_folder=os.path.join(os.environ['JANGGU_OUTPUT']),
+                               order=order)
+        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'))
+        assert os.path.exists(os.path.join(os.environ['JANGGU_OUTPUT'], 'snps.bed.gz'))
+
+        f = h5py.File(os.path.join(os.environ['JANGGU_OUTPUT'], 'scores.hdf5'), 'r')
+
+        gindexer = GenomicIndexer.create_from_file(os.path.join(os.environ['JANGGU_OUTPUT'],
+                                                                'snps.bed.gz'), None, None)
+
+        cov = Cover.create_from_array('snps', f['diffscore'],
+                                      gindexer,
+                                      store_whole_genome=True)
+
+        print(cov['chr2', 55, 65].shape)
+        print(cov['chr2', 55, 65])
+
+        assert np.abs(cov['chr2', 59, 60]).sum() > 0.0
+        assert np.abs(cov['chr2', 54, 55]).sum() == 0.0
+        f.close()
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_instance_conv(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Test Janggu creation by shape and name. """
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'scored_sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       storage='ndarray',
+                                       roi=bed_file, order=1,
+                                       binsize=200,
+                                       stepsize=50)
+
+    ctcf = Cover.create_from_bed(
+        "positives",
+        bedfiles=posfile,
+        roi=bed_file,
+        binsize=200, stepsize=50,
+        resolution=50,
+        store_whole_genome=False,
+        flank=0,
+        collapser=None,
+        storage='ndarray')
+
+    ctcf = Cover.create_from_bed(
+        "positives",
+        bedfiles=posfile,
+        roi=bed_file,
+        binsize=200, stepsize=50,
+        resolution=50,
+        store_whole_genome=True,
+        flank=0,
+        collapser=None,
+        storage='ndarray')
+
+    @inputlayer
+    @outputconv('sigmoid')
+    def _cnn_model(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+        layer = Complement()(layer)
+        layer = Reverse()(layer)
+        return inputs, layer
+
+    bwm = Janggu.create(_cnn_model, modelparams=(2,),
+                        inputs=dna,
+                        outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+
+    bwm.save()
+    bwm.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn')
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_use_dnaconv_none(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'scored_sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    @inputlayer
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode=None, name='bothstrands')(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    p1 = bwm1.predict(dna[1:2])
+    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
+
+    @inputlayer
+    def _cnn_model2(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            conv = Conv2D(5, (3, 1), name='singlestrand')
+            fl = conv(layer)
+            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
+        return inputs, [fl, rl]
+
+    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn2')
+
+    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
+
+    p2 = bwm2.predict(dna[1:2])
+    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
+
+    bwm1.save()
+    bwm1.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_use_dnaconv_concat(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'positive.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    @inputlayer
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode='concat', name='bothstrands')(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    p1 = bwm1.predict(dna[1:2])
+    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
+
+    @inputlayer
+    def _cnn_model2(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            conv = Conv2D(5, (3, 1), name='singlestrand')
+            fl = conv(layer)
+            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
+            layer = Concatenate()([fl, rl])
+        return inputs, layer
+
+    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn2')
+
+    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
+
+    p2 = bwm2.predict(dna[1:2])
+    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
+
+    bwm1.save()
+    bwm1.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_use_dnaconv_ave(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'positive.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    @inputlayer
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode='ave', name='bothstrands')(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    p1 = bwm1.predict(dna[1:2])
+    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
+
+    @inputlayer
+    def _cnn_model2(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            conv = Conv2D(5, (3, 1), name='singlestrand')
+            fl = conv(layer)
+            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
+            layer = Average()([fl, rl])
+        return inputs, layer
+
+    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn2')
+
+    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
+
+    p2 = bwm2.predict(dna[1:2])
+    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
+
+    bwm1.save()
+    bwm1.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
+
+
+@pytest.mark.filterwarnings("ignore:The truth value")
+def test_janggu_use_dnaconv_max(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'positive.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    storage='ndarray',
+                                    roi=bed_file, order=1)
+
+    @inputlayer
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode='max', name='bothstrands')(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    p1 = bwm1.predict(dna[1:2])
+    w = bwm1.kerasmodel.get_layer('bothstrands').get_weights()
+
+    @inputlayer
+    def _cnn_model2(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            conv = Conv2D(5, (3, 1), name='singlestrand')
+            fl = conv(layer)
+            rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
+            layer = Maximum()([fl, rl])
+        return inputs, layer
+
+    bwm2 = Janggu.create(_cnn_model2, modelparams=(2,),
+                        inputs=dna,
+                        name='dna_ctcf_HepG2-cnn2')
+
+    bwm2.kerasmodel.get_layer('singlestrand').set_weights(w)
+
+    p2 = bwm2.predict(dna[1:2])
+    np.testing.assert_allclose(p1, p2, rtol=1e-4, atol=1e-3)
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    storage = bwm1._storage_path(bwm1.name, outputdir=tmpdir.strpath)
+
+    bwm1.save()
+    bwm1.summary()
+
+    assert os.path.exists(storage)
+
+    Janggu.create_by_name('dna_ctcf_HepG2-cnn1')
+
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_chr2_validation(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+
+    posfile = os.path.join(data_path, 'scored_sample.bed')
+
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                    binsize=200, stepsize=50,
+                                    roi=bed_file, order=1)
+
+    ctcf = Cover.create_from_bed(
+        "positives",
+        bedfiles=posfile,
+        roi=bed_file,
+        binsize=200, stepsize=50,
+        resolution=None,
+        flank=0,
+        collapser='max',
+        storage='ndarray')
+
+    @inputlayer
+    @outputconv('sigmoid')
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode='max', name='bothstrands')(layer)
+            layer = MaxPooling2D((198, 1))(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna, outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    p1 = bwm1.fit(dna, ctcf, validation_data=['chr2'])
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_bedfile_validation(tmpdir):
+    os.environ['JANGGU_OUTPUT']=tmpdir.strpath
+
+    data_path = pkg_resources.resource_filename('janggu', 'resources/')
+    bed_file = os.path.join(data_path, 'sample.bed')
+    posfile = os.path.join(data_path, 'scored_sample.bed')
+    refgenome = os.path.join(data_path, 'sample_genome.fa')
+
+    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
+                                       binsize=200, stepsize=50,
+                                       roi=bed_file, order=1)
+
+    ctcf = Cover.create_from_bed(
+        "positives",
+        bedfiles=posfile,
+        roi=bed_file,
+        binsize=200, stepsize=50,
+        resolution=None,
+        flank=0,
+        collapser='max',
+        storage='ndarray')
+
+    @inputlayer
+    @outputconv('sigmoid')
+    def _cnn_model1(inputs, inp, oup, params):
+        with inputs.use('dna') as inlayer:
+            layer = inlayer
+            layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
+                              merge_mode='max', name='bothstrands')(layer)
+            layer = MaxPooling2D((198, 1))(layer)
+        return inputs, layer
+
+    bwm1 = Janggu.create(_cnn_model1, modelparams=(2,),
+                        inputs=dna, outputs=ctcf,
+                        name='dna_ctcf_HepG2-cnn1')
+
+    bwm1.compile(optimizer='adadelta', loss='binary_crossentropy')
+    p1 = bwm1.fit(dna, ctcf, validation_data=posfile)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option0(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Train, predict and evaluate on dummy data.
+
+    create: by_shape
+    Input args: Dataset
+    """
+
+    inputs = Array("X", np.random.random((100, 10)))
+    outputs = ReduceDim(Array('y', np.random.randint(2, size=(100, 1))[:,None],
+                    conditions=['random']), axis=(1,))
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def test_model(inputs, inp, oup, params):
+        return inputs, inputs[0]
+
+    bwm = Janggu.create(test_model,
+                        inputs=inputs,
+                        outputs=outputs,
+                        name='nptest')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(inputs)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+
+    # test if the condition name is correctly used in the output table
+    bwm.evaluate(inputs, outputs, callbacks=['auc'])
+
+    outputauc = os.path.join(tmpdir.strpath, 'evaluation', 'nptest', 'auc.tsv')
+    assert os.path.exists(outputauc)
+    assert pd.read_csv(outputauc).columns[0] == 'random'
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option1(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Train, predict and evaluate on dummy data.
+
+    create: by_shape
+    Input args: Dataset
+    """
+
+    inputs = Array("X", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def test_model(inputs, inp, oup, params):
+        return inputs, inputs[0]
+
+    bwm = Janggu.create(test_model,
+                        inputs=inputs,
+                        outputs=outputs,
+                        name='nptest')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(inputs)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+
+    # test if the condition name is correctly used in the output table
+    bwm.evaluate(inputs, outputs, callbacks=['auc'])
+
+    outputauc = os.path.join(tmpdir.strpath, 'evaluation', 'nptest', 'auc.tsv')
+    assert os.path.exists(outputauc)
+    assert pd.read_csv(outputauc).columns[0] == 'random'
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option2(tmpdir):
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    """Train, predict and evaluate on dummy data.
+
+    create: NO
+    Input args: list(Dataset)
+    """
+
+    inputs = Array("x", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    def _model():
+        inputs = Input((10,), name='x')
+        output = Dense(1, activation='sigmoid', name='y')(inputs)
+        model = Janggu(inputs=inputs, outputs=output, name='test')
+        model.compile(optimizer='adadelta', loss='binary_crossentropy',
+                      metrics=['accuracy'])
+        return model
+
+    bwm = _model()
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict([inputs])
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate([inputs], [outputs])
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option3(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    Only works without generators and without evaluators.
+
+    create: NO
+    Input args: list(np.array)
+    """
+
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    inputs = np.random.random((100, 10))
+    outputs = np.random.randint(2, size=(100, 1))
+
+    def _model():
+        inputs = Input((10,), name='x')
+        output = Dense(1, activation='sigmoid')(inputs)
+        model = Janggu(inputs=inputs, outputs=output, name='test')
+        model.compile(optimizer='adadelta', loss='binary_crossentropy',
+                      metrics=['accuracy'])
+        return model
+
+    bwm = _model()
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
+
+    bwm.fit([inputs], [outputs], epochs=2, batch_size=32)
+    assert os.path.exists(storage)
+
+    pred = bwm.predict([inputs])
+
+    bwm.predict([inputs], batch_size=32)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate([inputs], [outputs])
+
+    bwm.evaluate([inputs], [outputs], batch_size=32)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option4(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    Only works without generators and without evaluators.
+
+    create: NO
+    Input args: np.array
+    """
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    inputs = np.random.random((100, 10))
+    outputs = np.random.randint(2, size=(100, 1))
+
+    def _model(path):
+        inputs = Input((10,), name='x')
+        output = Dense(1, activation='sigmoid')(inputs)
+        model = Janggu(inputs=inputs, outputs=output, name='test')
+        model.compile(optimizer='adadelta', loss='binary_crossentropy',
+                      metrics=['accuracy'])
+        return model
+
+    bwm = _model(tmpdir.strpath)
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
+
+    # This used to not work with normal numpy arrays,
+    # but now the numpy arrays are matched automatically
+    # with the layer names.
+    bwm.fit(inputs, outputs, epochs=2, batch_size=32)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(inputs)
+
+    bwm.predict(inputs, batch_size=32)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate(inputs, outputs)
+
+    bwm.evaluate(inputs, outputs, batch_size=32)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option5(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    create: NO
+    Input args: list(Dataset)
+    """
+
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+    inputs = Array("x", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    def _model():
+        inputs = Input((10,), name='x')
+        output = Dense(1, name='y', activation='sigmoid')(inputs)
+        model = Janggu(inputs=inputs, outputs=output, name='test_model')
+        model.compile(optimizer='adadelta', loss='binary_crossentropy',
+                      metrics=['accuracy'])
+        return model
+
+    bwm = _model()
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit([inputs], [outputs], epochs=2, batch_size=32,
+            use_multiprocessing=False)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict([inputs],
+                       use_multiprocessing=False)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate([inputs], [outputs],
+                 use_multiprocessing=False)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option6(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    create: YES
+    Input args: Dataset
+    """
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    inputs = Array("x", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _model(inputs, inp, oup, params):
+        return inputs, inputs[0]
+
+    bwm = Janggu.create(_model,
+                        inputs=inputs,
+                        outputs=outputs,
+                        name='nptest')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    assert not os.path.exists(storage)
+
+    bwm.fit(inputs, outputs, epochs=2, batch_size=32,
+            use_multiprocessing=False)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(inputs,
+                       use_multiprocessing=False)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate(inputs, outputs,
+                 use_multiprocessing=False)
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_option7(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    create: YES
+    Input args: Dataset
+    validation_set: YES
+    batch_size: None
+    """
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    inputs = Array("x", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _model(inputs, inp, oup, params):
+        return inputs, inputs[0]
+
+    bwm = Janggu.create(_model,
+                        inputs=inputs,
+                        outputs=outputs,
+                        name='nptest')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    print('storage', storage)
+    print('env', os.environ['JANGGU_OUTPUT'])
+    print('name', bwm.name)
+    print('outputdir', bwm.outputdir)
+    assert not os.path.exists(storage)
+
+    bwm.fit(inputs, outputs, epochs=2,
+            validation_data=(inputs, outputs),
+            use_multiprocessing=False)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(inputs,
+                       use_multiprocessing=False)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
+    np.testing.assert_equal(pred.shape, outputs.shape)
+    bwm.evaluate(inputs, outputs,
+                 use_multiprocessing=False)
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_sequence_config():
+    """Train, predict and evaluate on dummy data.
+
+    create: YES
+    Input args: Dataset
+    validation_set: YES
+    batch_size: None
+    """
+
+    inputs = Array("x", np.random.random((100, 10)))
+    outputs = Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])
+
+    jseq = JangguSequence(inputs.data, outputs.data, batch_size=10, as_dict=False)
+    assert len(jseq) == 10
+    for x, y, _ in jseq:
+        assert x[0].shape == (10, 10)
+        assert y[0].shape == (10, 1)
+        break
+
+    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=False)
+    assert len(jseq) == 10
+    for x, y, _ in jseq:
+        assert x[0].shape == (10, 10)
+        assert y[0].shape == (10, 1)
+        break
+
+    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=True)
+    assert len(jseq) == 10
+    for x, y, _ in jseq:
+        assert x['x'].shape == (10, 10)
+        assert y['y'].shape == (10, 1)
+        break
+
+
+@pytest.mark.filterwarnings("ignore:inspect")
+def test_janggu_train_predict_sequence(tmpdir):
+    """Train, predict and evaluate on dummy data.
+
+    create: YES
+    Input args: Dataset
+    validation_set: YES
+    batch_size: None
+    """
+    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
+
+    inputs = {'x': Array("x", np.random.random((100, 10)))}
+    outputs = {'y': Array('y', np.random.randint(2, size=(100, 1)),
+                    conditions=['random'])}
+
+    jseq = JangguSequence(inputs, outputs, batch_size=10)
+
+    @inputlayer
+    @outputdense('sigmoid')
+    def _model(inputs, inp, oup, params):
+        return inputs, inputs[0]
+
+    bwm = Janggu.create(_model,
+                        inputs=jseq.inputs['x'],
+                        outputs=jseq.outputs['y'],
+                        name='nptest')
+
+    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
+
+    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
+    print('storage', storage)
+    print('env', os.environ['JANGGU_OUTPUT'])
+    print('name', bwm.name)
+    print('outputdir', bwm.outputdir)
+    assert not os.path.exists(storage)
+
+    bwm.fit(jseq, epochs=2,
+            validation_data=jseq,
+            use_multiprocessing=False)
+
+    assert os.path.exists(storage)
+
+    pred = bwm.predict(jseq, use_multiprocessing=False)
+    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs['x']))
+    np.testing.assert_equal(pred.shape, outputs['y'].shape)
+    bwm.evaluate(jseq, use_multiprocessing=False)