--- a
+++ b/src/examples/classify_bedregions_hdf5.py
@@ -0,0 +1,159 @@
+import argparse
+import os
+
+import numpy as np
+from keras import backend as K
+from keras.layers import Conv2D
+from keras.layers import GlobalAveragePooling2D
+from keras.layers import Maximum
+from pkg_resources import resource_filename
+from sklearn.metrics import average_precision_score
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_curve
+
+from janggu import Janggu
+from janggu import Scorer
+from janggu import inputlayer
+from janggu import outputdense
+from janggu.data import Bioseq
+from janggu.data import Cover
+from janggu.data import ReduceDim
+from janggu.layers import Complement
+from janggu.layers import DnaConv2D
+from janggu.layers import Reverse
+from janggu.utils import ExportClustermap
+from janggu.utils import ExportJson
+from janggu.utils import ExportScorePlot
+from janggu.utils import ExportTsne
+from janggu.utils import ExportTsv
+
+np.random.seed(1234)
+
+
+# Fetch parser arguments
+PARSER = argparse.ArgumentParser(description='Command description.')
+PARSER.add_argument('-path', dest='path',
+                    default='tf_results',
+                    help="Output directory for the examples.")
+PARSER.add_argument('-order', dest='order', type=int,
+                    default=1,
+                    help="One-hot order.")
+
+args = PARSER.parse_args()
+
+os.environ['JANGGU_OUTPUT'] = args.path
+
+# load the dataset
+# The pseudo genome represents just a concatenation of all sequences
+# in sample.fa and sample2.fa. Therefore, the results should be almost
+# identically to the models obtained from classify_fasta.py.
+REFGENOME = resource_filename('janggu', 'resources/pseudo_genome.fa')
+# ROI contains regions spanning positive and negative examples
+ROI_TRAIN_FILE = resource_filename('janggu', 'resources/roi_train.bed')
+ROI_TEST_FILE = resource_filename('janggu', 'resources/roi_test.bed')
+
+# PEAK_FILE only contains positive examples
+PEAK_FILE = resource_filename('janggu', 'resources/scores.bed')
+
+# Training input and labels are purely defined genomic coordinates
+# applying a random state internally randomizes the dataset
+# in this case, shuffling of the mini-batches during training
+# becomes obsolete.
+# Importantly, the same state needs to be used for all datasets
+# to enforce synchronized datasets.
+DNA = Bioseq.create_from_refgenome('dna', refgenome=REFGENOME,
+                                   roi=ROI_TRAIN_FILE,
+                                   binsize=200,
+                                   order=args.order,
+                                   storage='hdf5',
+                                   cache=True,
+                                   store_whole_genome=False,
+                                   random_state=43)
+
+LABELS = Cover.create_from_bed('peaks', roi=ROI_TRAIN_FILE,
+                               bedfiles=PEAK_FILE,
+                               binsize=200,
+                               resolution=200,
+                               storage='sparse',
+                               cache=True,
+                               store_whole_genome=True,
+                               random_state=43)
+
+DNA_TEST = Bioseq.create_from_refgenome('dna', refgenome=REFGENOME,
+                                        roi=ROI_TEST_FILE,
+                                        binsize=200,
+                                        order=args.order)
+
+LABELS_TEST = Cover.create_from_bed('peaks',
+                                    bedfiles=PEAK_FILE,
+                                    roi=ROI_TEST_FILE,
+                                    binsize=200,
+                                    resolution=200)
+# Define the model templates
+
+@inputlayer
+@outputdense('sigmoid')
+def double_stranded_model_dnaconv(inputs, inp, oup, params):
+    """ keras model for scanning both DNA strands.
+
+    A more elegant way of scanning both strands for motif occurrences
+    is achieved by the DnaConv2D layer wrapper, which internally
+    performs the convolution operation with the normal kernel weights
+    and the reverse complemented weights.
+    """
+    with inputs.use('dna') as layer:
+        # the name in inputs.use() should be the same as the dataset name.
+        layer = DnaConv2D(Conv2D(params[0], (params[1], 1),
+                                 activation=params[2]))(layer)
+    output = GlobalAveragePooling2D(name='motif')(layer)
+    return inputs, output
+
+modeltemplate = double_stranded_model_dnaconv
+
+K.clear_session()
+
+# create a new model object
+model = Janggu.create(template=modeltemplate,
+                      modelparams=(30, 21, 'relu'),
+                      inputs=DNA,
+                      outputs=ReduceDim(LABELS))
+
+model.compile(optimizer='adadelta', loss='binary_crossentropy',
+              metrics=['acc'])
+
+hist = model.fit(DNA, ReduceDim(LABELS), epochs=100, shuffle=False)
+
+print('#' * 40)
+print('loss: {}, acc: {}'.format(hist.history['loss'][-1],
+                                 hist.history['acc'][-1]))
+print('#' * 40)
+
+
+
+
+
+# clustering plots based on hidden features
+heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.))
+tsne_eval = Scorer('tsne', exporter=ExportTsne())
+
+# do the evaluation on the independent test data
+model.evaluate(DNA_TEST, ReduceDim(LABELS_TEST), datatags=['test'],
+               callbacks=['auc', 'auprc', 'roc', 'prc'])
+
+pred = model.predict(DNA_TEST)
+pred = pred[:, None, None, :]
+cov_pred = Cover.create_from_array('BindingProba', pred, LABELS_TEST.gindexer)
+
+print('Oct4 predictions scores should be greater than Mafk scores:')
+print('Prediction score examples for Oct4')
+for i in range(4):
+    print('{}.: {}'.format(i, cov_pred[i]))
+print('Prediction score examples for Mafk')
+for i in range(1, 5):
+    print('{}.: {}'.format(i, cov_pred[-i]))
+
+model.predict(DNA_TEST, datatags=['test'],
+              callbacks=[heatmap_eval, tsne_eval],
+              layername='motif')
+