Diff of /tests/test_seqmodel.py [000000] .. [d45a3a]

Switch to unified view

a b/tests/test_seqmodel.py
1
"""Test sequence model
2
"""
3
from bpnet.seqmodel import SeqModel
4
from bpnet.heads import ScalarHead, BinaryClassificationHead, ProfileHead
5
import numpy as np
6
import keras.layers as kl
7
8
9
class TopDense:
10
    """Class to be used as functional model interpretation
11
    """
12
13
    def __init__(self, pool_size=2):
14
        self.pool_size = pool_size
15
16
    def __call__(self, inp):
17
        x = kl.GlobalAvgPool1D()(inp)
18
        return kl.Dense(1)(x)
19
20
21
class TopConv:
22
    """Class to be used as functional model interpretation
23
    """
24
25
    def __init__(self, n_output=2):
26
        self.n_output = n_output
27
28
    def __call__(self, inp):
29
        return kl.Conv1D(self.n_output, 1)(inp)
30
31
32
class BaseNet:
33
    """Class to be used as functional model interpretation
34
    """
35
36
    def __init__(self, activation='relu'):
37
        self.activation = activation
38
39
    def __call__(self, inp):
40
        x = kl.Conv1D(16, kernel_size=3, activation=self.activation, padding='same')(inp)
41
        return x
42
43
44
def test_interpret_wo_bias():
45
    from bpnet.metrics import RegressionMetrics, ClassificationMetrics, PeakPredictionProfileMetric
46
    from concise.preprocessing import encodeDNA
47
    # test the model
48
    seqs = encodeDNA(['ACAGA'] * 100)
49
50
    inputs = {"seq": seqs,
51
              "bias/a/profile": np.random.randn(100, 5, 2)}
52
53
    # Let's use regression
54
    targets = {"a/class": np.random.randint(low=0, high=2, size=(100, 1)).astype(float),
55
               "a/counts": 1 + np.ceil(np.abs(np.random.randn(100))),
56
               "a/profile": 1 + np.ceil(np.abs(np.random.randn(100, 5, 2))),
57
               }
58
59
    import keras.backend as K
60
    # K.clear_session()
61
    # use bias
62
    m = SeqModel(
63
        body=BaseNet('relu'),
64
        heads=[BinaryClassificationHead('{task}/class',
65
                                        net=TopDense(pool_size=2),
66
                                        use_bias=False),
67
               ScalarHead('{task}/counts',
68
                          loss='mse',
69
                          metric=RegressionMetrics(),
70
                          net=TopDense(pool_size=2),
71
                          use_bias=False),
72
               ProfileHead('{task}/profile',
73
                           loss='mse',
74
                           metric=PeakPredictionProfileMetric(neg_max_threshold=0.05,
75
                                                              required_min_pos_counts=0),
76
                           net=TopConv(n_output=2),
77
                           use_bias=True,
78
                           bias_shape=(5, 2)),  # NOTE: the shape currently has to be hard-coded to the sequence length
79
               ],
80
        tasks=['a']
81
    )
82
    m.model.fit(inputs, targets)
83
84
    o = m.contrib_score_all(seqs)
85
    assert 'a/profile/wn' in o
86
    assert o['a/profile/wn'].shape == seqs.shape
87
    assert 'a/profile/wn' in o
88
    assert o['a/profile/wn'].shape == seqs.shape
89
90
    # evaluate the dataset -> setup an array dataset (NumpyDataset) -> convert to
91
    from bpnet.data import NumpyDataset
92
    ds = NumpyDataset({"inputs": inputs, "targets": targets})
93
    o = m.evaluate(ds)
94
    assert 'avg/counts/mad' in o