a b/tests/edgepy/test_dispersion.py
1
import unittest
2
3
import numpy as np
4
5
from inmoose.edgepy import (
6
    DGEList,
7
    dispCoxReid,
8
    maximizeInterpolant,
9
    movingAverageByCol,
10
    systematicSubset,
11
)
12
from inmoose.utils import rnbinom
13
14
15
class test_dispersion(unittest.TestCase):
16
    def setUp(self):
17
        y = np.array(rnbinom(80, size=5, mu=20, seed=42)).reshape((20, 4))
18
        y = np.vstack(([0, 0, 0, 0], [0, 0, 2, 2], y))
19
        self.group = np.array([1, 1, 2, 2])
20
        self.d = DGEList(counts=y, group=self.group, lib_size=np.arange(1001, 1005))
21
22
    def test_maximizeInterpolant(self):
23
        spline_pts = np.array([0, 1, 3, 7])
24
        with self.assertRaisesRegex(
25
            ValueError, expected_regex="y is not a matrix: cannot perform interpolation"
26
        ):
27
            maximizeInterpolant(spline_pts, np.array([1, 2, 3]))
28
        with self.assertRaisesRegex(
29
            ValueError,
30
            expected_regex="number of columns must equal number of spline points",
31
        ):
32
            maximizeInterpolant(spline_pts, np.ones((2, 3)))
33
        with self.assertRaisesRegex(
34
            ValueError, expected_regex="spline points must be unique and sorted"
35
        ):
36
            maximizeInterpolant([3, 2, 1], np.ones((2, 3)))
37
        with self.assertRaisesRegex(
38
            ValueError, expected_regex="spline points must be unique and sorted"
39
        ):
40
            maximizeInterpolant([1, 2, 2], np.ones((2, 3)))
41
42
        ref = [
43
            0.0000000,
44
            5.2951564,
45
            0.0000000,
46
            1.2921216,
47
            1.6173398,
48
            5.3493726,
49
            5.1489371,
50
            1.6854523,
51
            5.0732169,
52
            7.0000000,
53
            0.0000000,
54
            1.3839820,
55
            0.0000000,
56
            0.7054761,
57
            0.0000000,
58
            5.2193349,
59
            1.0046347,
60
            1.6395426,
61
            5.0557785,
62
            4.8235934,
63
            7.0000000,
64
            4.3981600,
65
        ]
66
        interpolation = maximizeInterpolant(spline_pts, self.d.counts)
67
        self.assertTrue(np.allclose(interpolation, ref, atol=1e-6, rtol=0))
68
69
    def test_systematicSubset(self):
70
        res = systematicSubset(3, np.arange(1, 10))
71
        self.assertTrue(np.array_equal(res + 1, [2, 5, 8]))
72
        res = systematicSubset(1000, np.arange(1, 10))
73
        self.assertTrue(np.array_equal(res + 1, [1, 2, 3, 4, 5, 6, 7, 8, 9]))
74
        res = systematicSubset(3, np.array([1, 1, 3, 2, 6, 5, 2, 5, 7]))
75
        self.assertTrue(np.array_equal(res + 1, [2, 3, 5]))
76
77
    def test_dispCoxReid(self):
78
        # TODO also test with varying tolerance
79
        self.assertAlmostEqual(
80
            dispCoxReid(self.d.counts, tol=1e-15), 0.158033594104552, 7
81
        )
82
        self.assertAlmostEqual(
83
            dispCoxReid(self.d.counts, subset=5, tol=1e-15), 0.091028284006027, 7
84
        )
85
86
        with self.assertRaisesRegex(
87
            ValueError, expected_regex="no data rows with required number of counts"
88
        ):
89
            dispCoxReid(self.d.counts, min_row_sum=1000)
90
        with self.assertRaisesRegex(
91
            ValueError,
92
            expected_regex="please give a non-negative interval for the dispersion",
93
        ):
94
            dispCoxReid(self.d.counts, interval=(-1, 4))
95
96
    @unittest.skip("TODO")
97
    def test_dispCoxReidInterpolateTagwise(self):
98
        # TODO
99
        self.assertTrue(False)
100
101
    def test_movingAverageByCol(self):
102
        y = self.d.counts
103
        self.assertTrue(np.array_equal(movingAverageByCol(y, width=1), y))
104
        with self.assertLogs("inmoose", level="WARNING") as logChecker:
105
            res = movingAverageByCol(y, width=23)
106
        self.assertRegex(
107
            logChecker.output[0], "reducing moving average width to x.shape\[0\]"
108
        )
109
        ref = np.array(
110
            [
111
                [13.25000, 17.41667, 17.41667, 17.75000],
112
                [14.84615, 17.76923, 16.61538, 18.61538],
113
                [15.92857, 18.78571, 16.64286, 18.21429],
114
                [16.60000, 18.53333, 17.06667, 18.73333],
115
                [16.81250, 18.93750, 18.37500, 19.93750],
116
                [16.82353, 19.00000, 18.05882, 19.64706],
117
                [16.38889, 20.61111, 19.00000, 19.00000],
118
                [17.68421, 20.63158, 20.21053, 18.89474],
119
                [18.75000, 21.25000, 21.20000, 19.20000],
120
                [18.85714, 21.19048, 21.23810, 19.85714],
121
                [18.77273, 21.09091, 21.31818, 19.77273],
122
                [19.66667, 22.09524, 22.33333, 20.71429],
123
                [20.65000, 23.20000, 23.35000, 21.65000],
124
                [20.42105, 23.42105, 24.00000, 21.57895],
125
                [21.00000, 23.77778, 24.77778, 21.83333],
126
                [21.41176, 23.11765, 24.64706, 23.00000],
127
                [21.81250, 24.12500, 24.43750, 22.62500],
128
                [21.93333, 24.73333, 23.86667, 22.46667],
129
                [23.00000, 24.00000, 23.57143, 22.07143],
130
                [23.61538, 24.76923, 23.92308, 22.53846],
131
                [25.16667, 24.83333, 24.08333, 21.91667],
132
                [24.27273, 25.18182, 25.00000, 21.90909],
133
            ]
134
        )
135
        self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-6))
136
137
        res = movingAverageByCol(y)
138
        ref = np.array(
139
            [
140
                [8.333333, 6.333333, 4.333333, 8.333333],
141
                [8.750000, 9.000000, 5.750000, 10.500000],
142
                [9.800000, 14.200000, 10.000000, 8.800000],
143
                [12.800000, 15.600000, 15.600000, 14.600000],
144
                [16.800000, 18.600000, 21.800000, 19.200000],
145
                [13.200000, 21.800000, 25.200000, 20.200000],
146
                [14.200000, 21.200000, 27.000000, 20.000000],
147
                [12.400000, 19.000000, 26.000000, 25.600000],
148
                [16.400000, 21.800000, 23.200000, 24.200000],
149
                [15.000000, 23.200000, 19.600000, 23.000000],
150
                [20.400000, 20.600000, 15.400000, 23.200000],
151
                [23.400000, 24.200000, 15.000000, 22.600000],
152
                [27.600000, 22.400000, 15.200000, 21.800000],
153
                [24.600000, 23.200000, 20.000000, 25.000000],
154
                [25.400000, 22.800000, 19.600000, 24.200000],
155
                [20.400000, 28.000000, 25.200000, 20.000000],
156
                [22.600000, 25.800000, 30.200000, 20.800000],
157
                [25.200000, 29.400000, 33.600000, 20.600000],
158
                [25.400000, 28.400000, 30.400000, 19.600000],
159
                [25.400000, 28.200000, 32.400000, 20.200000],
160
                [29.500000, 23.250000, 31.750000, 23.250000],
161
                [25.666667, 24.000000, 28.333333, 25.333333],
162
            ]
163
        )
164
        self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-7))
165
166
        res = movingAverageByCol(y, full_length=False)
167
        ref = np.array(
168
            [
169
                [9.8, 14.2, 10.0, 8.8],
170
                [12.8, 15.6, 15.6, 14.6],
171
                [16.8, 18.6, 21.8, 19.2],
172
                [13.2, 21.8, 25.2, 20.2],
173
                [14.2, 21.2, 27.0, 20.0],
174
                [12.4, 19.0, 26.0, 25.6],
175
                [16.4, 21.8, 23.2, 24.2],
176
                [15.0, 23.2, 19.6, 23.0],
177
                [20.4, 20.6, 15.4, 23.2],
178
                [23.4, 24.2, 15.0, 22.6],
179
                [27.6, 22.4, 15.2, 21.8],
180
                [24.6, 23.2, 20.0, 25.0],
181
                [25.4, 22.8, 19.6, 24.2],
182
                [20.4, 28.0, 25.2, 20.0],
183
                [22.6, 25.8, 30.2, 20.8],
184
                [25.2, 29.4, 33.6, 20.6],
185
                [25.4, 28.4, 30.4, 19.6],
186
                [25.4, 28.2, 32.4, 20.2],
187
            ]
188
        )
189
        self.assertTrue(np.array_equal(res, ref))
190
191
        res = movingAverageByCol(y, width=22, full_length=False)
192
        ref = np.array([18.77273, 21.09091, 21.31818, 19.77273])
193
        self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-6))
194
195
    def test_estimateGLMCommonDisp(self):
196
        e = self.d.estimateGLMCommonDisp()
197
        self.assertAlmostEqual(e.common_dispersion, 0.16157151, 5)
198
199
    def test_estimateGLMTagwiseDisp(self):
200
        # first initialize d.common_dispersion
201
        self.d.estimateGLMCommonDisp()
202
        e = self.d.estimateGLMTagwiseDisp()
203
        ref = [
204
            0.1615715,
205
            0.1615715,
206
            0.1391734,
207
            0.1337411,
208
            0.3107981,
209
            0.1923997,
210
            0.1378647,
211
            0.1995537,
212
            0.1175669,
213
            0.2083242,
214
            0.1458728,
215
            0.1251945,
216
            0.1974399,
217
            0.1553991,
218
            0.1252869,
219
            0.1348553,
220
            0.1209722,
221
            0.2864446,
222
            0.1629382,
223
            0.1191966,
224
            0.1243685,
225
            0.1157305,
226
        ]
227
        self.assertTrue(np.allclose(e.tagwise_dispersion, ref, atol=1e-6, rtol=0))
228
229
230
if __name__ == "__main__":
231
    unittest.main()