[0ce940]: / tests / test_seqmodel.py

Download this file

95 lines (75 with data), 3.1 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Test sequence model
"""
from bpnet.seqmodel import SeqModel
from bpnet.heads import ScalarHead, BinaryClassificationHead, ProfileHead
import numpy as np
import keras.layers as kl
class TopDense:
"""Class to be used as functional model interpretation
"""
def __init__(self, pool_size=2):
self.pool_size = pool_size
def __call__(self, inp):
x = kl.GlobalAvgPool1D()(inp)
return kl.Dense(1)(x)
class TopConv:
"""Class to be used as functional model interpretation
"""
def __init__(self, n_output=2):
self.n_output = n_output
def __call__(self, inp):
return kl.Conv1D(self.n_output, 1)(inp)
class BaseNet:
"""Class to be used as functional model interpretation
"""
def __init__(self, activation='relu'):
self.activation = activation
def __call__(self, inp):
x = kl.Conv1D(16, kernel_size=3, activation=self.activation, padding='same')(inp)
return x
def test_interpret_wo_bias():
from bpnet.metrics import RegressionMetrics, ClassificationMetrics, PeakPredictionProfileMetric
from concise.preprocessing import encodeDNA
# test the model
seqs = encodeDNA(['ACAGA'] * 100)
inputs = {"seq": seqs,
"bias/a/profile": np.random.randn(100, 5, 2)}
# Let's use regression
targets = {"a/class": np.random.randint(low=0, high=2, size=(100, 1)).astype(float),
"a/counts": 1 + np.ceil(np.abs(np.random.randn(100))),
"a/profile": 1 + np.ceil(np.abs(np.random.randn(100, 5, 2))),
}
import keras.backend as K
# K.clear_session()
# use bias
m = SeqModel(
body=BaseNet('relu'),
heads=[BinaryClassificationHead('{task}/class',
net=TopDense(pool_size=2),
use_bias=False),
ScalarHead('{task}/counts',
loss='mse',
metric=RegressionMetrics(),
net=TopDense(pool_size=2),
use_bias=False),
ProfileHead('{task}/profile',
loss='mse',
metric=PeakPredictionProfileMetric(neg_max_threshold=0.05,
required_min_pos_counts=0),
net=TopConv(n_output=2),
use_bias=True,
bias_shape=(5, 2)), # NOTE: the shape currently has to be hard-coded to the sequence length
],
tasks=['a']
)
m.model.fit(inputs, targets)
o = m.contrib_score_all(seqs)
assert 'a/profile/wn' in o
assert o['a/profile/wn'].shape == seqs.shape
assert 'a/profile/wn' in o
assert o['a/profile/wn'].shape == seqs.shape
# evaluate the dataset -> setup an array dataset (NumpyDataset) -> convert to
from bpnet.data import NumpyDataset
ds = NumpyDataset({"inputs": inputs, "targets": targets})
o = m.evaluate(ds)
assert 'avg/counts/mad' in o