|
a |
|
b/tests/edgepy/test_dispersion.py |
|
|
1 |
import unittest |
|
|
2 |
|
|
|
3 |
import numpy as np |
|
|
4 |
|
|
|
5 |
from inmoose.edgepy import ( |
|
|
6 |
DGEList, |
|
|
7 |
dispCoxReid, |
|
|
8 |
maximizeInterpolant, |
|
|
9 |
movingAverageByCol, |
|
|
10 |
systematicSubset, |
|
|
11 |
) |
|
|
12 |
from inmoose.utils import rnbinom |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
class test_dispersion(unittest.TestCase): |
|
|
16 |
def setUp(self): |
|
|
17 |
y = np.array(rnbinom(80, size=5, mu=20, seed=42)).reshape((20, 4)) |
|
|
18 |
y = np.vstack(([0, 0, 0, 0], [0, 0, 2, 2], y)) |
|
|
19 |
self.group = np.array([1, 1, 2, 2]) |
|
|
20 |
self.d = DGEList(counts=y, group=self.group, lib_size=np.arange(1001, 1005)) |
|
|
21 |
|
|
|
22 |
def test_maximizeInterpolant(self): |
|
|
23 |
spline_pts = np.array([0, 1, 3, 7]) |
|
|
24 |
with self.assertRaisesRegex( |
|
|
25 |
ValueError, expected_regex="y is not a matrix: cannot perform interpolation" |
|
|
26 |
): |
|
|
27 |
maximizeInterpolant(spline_pts, np.array([1, 2, 3])) |
|
|
28 |
with self.assertRaisesRegex( |
|
|
29 |
ValueError, |
|
|
30 |
expected_regex="number of columns must equal number of spline points", |
|
|
31 |
): |
|
|
32 |
maximizeInterpolant(spline_pts, np.ones((2, 3))) |
|
|
33 |
with self.assertRaisesRegex( |
|
|
34 |
ValueError, expected_regex="spline points must be unique and sorted" |
|
|
35 |
): |
|
|
36 |
maximizeInterpolant([3, 2, 1], np.ones((2, 3))) |
|
|
37 |
with self.assertRaisesRegex( |
|
|
38 |
ValueError, expected_regex="spline points must be unique and sorted" |
|
|
39 |
): |
|
|
40 |
maximizeInterpolant([1, 2, 2], np.ones((2, 3))) |
|
|
41 |
|
|
|
42 |
ref = [ |
|
|
43 |
0.0000000, |
|
|
44 |
5.2951564, |
|
|
45 |
0.0000000, |
|
|
46 |
1.2921216, |
|
|
47 |
1.6173398, |
|
|
48 |
5.3493726, |
|
|
49 |
5.1489371, |
|
|
50 |
1.6854523, |
|
|
51 |
5.0732169, |
|
|
52 |
7.0000000, |
|
|
53 |
0.0000000, |
|
|
54 |
1.3839820, |
|
|
55 |
0.0000000, |
|
|
56 |
0.7054761, |
|
|
57 |
0.0000000, |
|
|
58 |
5.2193349, |
|
|
59 |
1.0046347, |
|
|
60 |
1.6395426, |
|
|
61 |
5.0557785, |
|
|
62 |
4.8235934, |
|
|
63 |
7.0000000, |
|
|
64 |
4.3981600, |
|
|
65 |
] |
|
|
66 |
interpolation = maximizeInterpolant(spline_pts, self.d.counts) |
|
|
67 |
self.assertTrue(np.allclose(interpolation, ref, atol=1e-6, rtol=0)) |
|
|
68 |
|
|
|
69 |
def test_systematicSubset(self): |
|
|
70 |
res = systematicSubset(3, np.arange(1, 10)) |
|
|
71 |
self.assertTrue(np.array_equal(res + 1, [2, 5, 8])) |
|
|
72 |
res = systematicSubset(1000, np.arange(1, 10)) |
|
|
73 |
self.assertTrue(np.array_equal(res + 1, [1, 2, 3, 4, 5, 6, 7, 8, 9])) |
|
|
74 |
res = systematicSubset(3, np.array([1, 1, 3, 2, 6, 5, 2, 5, 7])) |
|
|
75 |
self.assertTrue(np.array_equal(res + 1, [2, 3, 5])) |
|
|
76 |
|
|
|
77 |
def test_dispCoxReid(self): |
|
|
78 |
# TODO also test with varying tolerance |
|
|
79 |
self.assertAlmostEqual( |
|
|
80 |
dispCoxReid(self.d.counts, tol=1e-15), 0.158033594104552, 7 |
|
|
81 |
) |
|
|
82 |
self.assertAlmostEqual( |
|
|
83 |
dispCoxReid(self.d.counts, subset=5, tol=1e-15), 0.091028284006027, 7 |
|
|
84 |
) |
|
|
85 |
|
|
|
86 |
with self.assertRaisesRegex( |
|
|
87 |
ValueError, expected_regex="no data rows with required number of counts" |
|
|
88 |
): |
|
|
89 |
dispCoxReid(self.d.counts, min_row_sum=1000) |
|
|
90 |
with self.assertRaisesRegex( |
|
|
91 |
ValueError, |
|
|
92 |
expected_regex="please give a non-negative interval for the dispersion", |
|
|
93 |
): |
|
|
94 |
dispCoxReid(self.d.counts, interval=(-1, 4)) |
|
|
95 |
|
|
|
96 |
@unittest.skip("TODO") |
|
|
97 |
def test_dispCoxReidInterpolateTagwise(self): |
|
|
98 |
# TODO |
|
|
99 |
self.assertTrue(False) |
|
|
100 |
|
|
|
101 |
def test_movingAverageByCol(self): |
|
|
102 |
y = self.d.counts |
|
|
103 |
self.assertTrue(np.array_equal(movingAverageByCol(y, width=1), y)) |
|
|
104 |
with self.assertLogs("inmoose", level="WARNING") as logChecker: |
|
|
105 |
res = movingAverageByCol(y, width=23) |
|
|
106 |
self.assertRegex( |
|
|
107 |
logChecker.output[0], "reducing moving average width to x.shape\[0\]" |
|
|
108 |
) |
|
|
109 |
ref = np.array( |
|
|
110 |
[ |
|
|
111 |
[13.25000, 17.41667, 17.41667, 17.75000], |
|
|
112 |
[14.84615, 17.76923, 16.61538, 18.61538], |
|
|
113 |
[15.92857, 18.78571, 16.64286, 18.21429], |
|
|
114 |
[16.60000, 18.53333, 17.06667, 18.73333], |
|
|
115 |
[16.81250, 18.93750, 18.37500, 19.93750], |
|
|
116 |
[16.82353, 19.00000, 18.05882, 19.64706], |
|
|
117 |
[16.38889, 20.61111, 19.00000, 19.00000], |
|
|
118 |
[17.68421, 20.63158, 20.21053, 18.89474], |
|
|
119 |
[18.75000, 21.25000, 21.20000, 19.20000], |
|
|
120 |
[18.85714, 21.19048, 21.23810, 19.85714], |
|
|
121 |
[18.77273, 21.09091, 21.31818, 19.77273], |
|
|
122 |
[19.66667, 22.09524, 22.33333, 20.71429], |
|
|
123 |
[20.65000, 23.20000, 23.35000, 21.65000], |
|
|
124 |
[20.42105, 23.42105, 24.00000, 21.57895], |
|
|
125 |
[21.00000, 23.77778, 24.77778, 21.83333], |
|
|
126 |
[21.41176, 23.11765, 24.64706, 23.00000], |
|
|
127 |
[21.81250, 24.12500, 24.43750, 22.62500], |
|
|
128 |
[21.93333, 24.73333, 23.86667, 22.46667], |
|
|
129 |
[23.00000, 24.00000, 23.57143, 22.07143], |
|
|
130 |
[23.61538, 24.76923, 23.92308, 22.53846], |
|
|
131 |
[25.16667, 24.83333, 24.08333, 21.91667], |
|
|
132 |
[24.27273, 25.18182, 25.00000, 21.90909], |
|
|
133 |
] |
|
|
134 |
) |
|
|
135 |
self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-6)) |
|
|
136 |
|
|
|
137 |
res = movingAverageByCol(y) |
|
|
138 |
ref = np.array( |
|
|
139 |
[ |
|
|
140 |
[8.333333, 6.333333, 4.333333, 8.333333], |
|
|
141 |
[8.750000, 9.000000, 5.750000, 10.500000], |
|
|
142 |
[9.800000, 14.200000, 10.000000, 8.800000], |
|
|
143 |
[12.800000, 15.600000, 15.600000, 14.600000], |
|
|
144 |
[16.800000, 18.600000, 21.800000, 19.200000], |
|
|
145 |
[13.200000, 21.800000, 25.200000, 20.200000], |
|
|
146 |
[14.200000, 21.200000, 27.000000, 20.000000], |
|
|
147 |
[12.400000, 19.000000, 26.000000, 25.600000], |
|
|
148 |
[16.400000, 21.800000, 23.200000, 24.200000], |
|
|
149 |
[15.000000, 23.200000, 19.600000, 23.000000], |
|
|
150 |
[20.400000, 20.600000, 15.400000, 23.200000], |
|
|
151 |
[23.400000, 24.200000, 15.000000, 22.600000], |
|
|
152 |
[27.600000, 22.400000, 15.200000, 21.800000], |
|
|
153 |
[24.600000, 23.200000, 20.000000, 25.000000], |
|
|
154 |
[25.400000, 22.800000, 19.600000, 24.200000], |
|
|
155 |
[20.400000, 28.000000, 25.200000, 20.000000], |
|
|
156 |
[22.600000, 25.800000, 30.200000, 20.800000], |
|
|
157 |
[25.200000, 29.400000, 33.600000, 20.600000], |
|
|
158 |
[25.400000, 28.400000, 30.400000, 19.600000], |
|
|
159 |
[25.400000, 28.200000, 32.400000, 20.200000], |
|
|
160 |
[29.500000, 23.250000, 31.750000, 23.250000], |
|
|
161 |
[25.666667, 24.000000, 28.333333, 25.333333], |
|
|
162 |
] |
|
|
163 |
) |
|
|
164 |
self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-7)) |
|
|
165 |
|
|
|
166 |
res = movingAverageByCol(y, full_length=False) |
|
|
167 |
ref = np.array( |
|
|
168 |
[ |
|
|
169 |
[9.8, 14.2, 10.0, 8.8], |
|
|
170 |
[12.8, 15.6, 15.6, 14.6], |
|
|
171 |
[16.8, 18.6, 21.8, 19.2], |
|
|
172 |
[13.2, 21.8, 25.2, 20.2], |
|
|
173 |
[14.2, 21.2, 27.0, 20.0], |
|
|
174 |
[12.4, 19.0, 26.0, 25.6], |
|
|
175 |
[16.4, 21.8, 23.2, 24.2], |
|
|
176 |
[15.0, 23.2, 19.6, 23.0], |
|
|
177 |
[20.4, 20.6, 15.4, 23.2], |
|
|
178 |
[23.4, 24.2, 15.0, 22.6], |
|
|
179 |
[27.6, 22.4, 15.2, 21.8], |
|
|
180 |
[24.6, 23.2, 20.0, 25.0], |
|
|
181 |
[25.4, 22.8, 19.6, 24.2], |
|
|
182 |
[20.4, 28.0, 25.2, 20.0], |
|
|
183 |
[22.6, 25.8, 30.2, 20.8], |
|
|
184 |
[25.2, 29.4, 33.6, 20.6], |
|
|
185 |
[25.4, 28.4, 30.4, 19.6], |
|
|
186 |
[25.4, 28.2, 32.4, 20.2], |
|
|
187 |
] |
|
|
188 |
) |
|
|
189 |
self.assertTrue(np.array_equal(res, ref)) |
|
|
190 |
|
|
|
191 |
res = movingAverageByCol(y, width=22, full_length=False) |
|
|
192 |
ref = np.array([18.77273, 21.09091, 21.31818, 19.77273]) |
|
|
193 |
self.assertTrue(np.allclose(res, ref, atol=0, rtol=1e-6)) |
|
|
194 |
|
|
|
195 |
def test_estimateGLMCommonDisp(self): |
|
|
196 |
e = self.d.estimateGLMCommonDisp() |
|
|
197 |
self.assertAlmostEqual(e.common_dispersion, 0.16157151, 5) |
|
|
198 |
|
|
|
199 |
def test_estimateGLMTagwiseDisp(self): |
|
|
200 |
# first initialize d.common_dispersion |
|
|
201 |
self.d.estimateGLMCommonDisp() |
|
|
202 |
e = self.d.estimateGLMTagwiseDisp() |
|
|
203 |
ref = [ |
|
|
204 |
0.1615715, |
|
|
205 |
0.1615715, |
|
|
206 |
0.1391734, |
|
|
207 |
0.1337411, |
|
|
208 |
0.3107981, |
|
|
209 |
0.1923997, |
|
|
210 |
0.1378647, |
|
|
211 |
0.1995537, |
|
|
212 |
0.1175669, |
|
|
213 |
0.2083242, |
|
|
214 |
0.1458728, |
|
|
215 |
0.1251945, |
|
|
216 |
0.1974399, |
|
|
217 |
0.1553991, |
|
|
218 |
0.1252869, |
|
|
219 |
0.1348553, |
|
|
220 |
0.1209722, |
|
|
221 |
0.2864446, |
|
|
222 |
0.1629382, |
|
|
223 |
0.1191966, |
|
|
224 |
0.1243685, |
|
|
225 |
0.1157305, |
|
|
226 |
] |
|
|
227 |
self.assertTrue(np.allclose(e.tagwise_dispersion, ref, atol=1e-6, rtol=0)) |
|
|
228 |
|
|
|
229 |
|
|
|
230 |
if __name__ == "__main__": |
|
|
231 |
unittest.main() |