|
a |
|
b/tests/deseq2/test_nbinomWald.py |
|
|
1 |
import unittest |
|
|
2 |
|
|
|
3 |
import numpy as np |
|
|
4 |
import patsy |
|
|
5 |
|
|
|
6 |
from inmoose.deseq2 import ( |
|
|
7 |
DESeq, |
|
|
8 |
estimateBetaPriorVar, |
|
|
9 |
estimateDispersionsGeneEst, |
|
|
10 |
estimateMLEForBetaPriorVar, |
|
|
11 |
makeExampleDESeqDataSet, |
|
|
12 |
nbinomLRT, |
|
|
13 |
nbinomWaldTest, |
|
|
14 |
) |
|
|
15 |
from inmoose.utils import Factor, pt |
|
|
16 |
|
|
|
17 |
|
|
|
18 |
class Test(unittest.TestCase): |
|
|
19 |
def test_nbinomWald_errors(self): |
|
|
20 |
"""test that nbinomWald throws various errors and works with edge cases""" |
|
|
21 |
dds = makeExampleDESeqDataSet(n=100, m=4) |
|
|
22 |
with self.assertRaisesRegex( |
|
|
23 |
ValueError, |
|
|
24 |
expected_regex="testing requires dispersion estimates, first call estimateDispersions()", |
|
|
25 |
): |
|
|
26 |
nbinomWaldTest(dds) |
|
|
27 |
with self.assertRaisesRegex( |
|
|
28 |
ValueError, |
|
|
29 |
expected_regex="testing requires dispersion estimates, first call estimateDispersions()", |
|
|
30 |
): |
|
|
31 |
nbinomLRT(dds) |
|
|
32 |
|
|
|
33 |
dds = dds.estimateSizeFactors() |
|
|
34 |
dds = dds.estimateDispersions() |
|
|
35 |
mm = patsy.dmatrix("~condition", dds.obs) |
|
|
36 |
# mm0 = patsy.dmatrix("~1", dds.obs) |
|
|
37 |
with self.assertRaisesRegex( |
|
|
38 |
ValueError, |
|
|
39 |
expected_regex="user-supplied model matrix with betaPrior=True requires supplying betaPriorVar", |
|
|
40 |
): |
|
|
41 |
nbinomWaldTest(dds, betaPrior=True, modelMatrix=mm) |
|
|
42 |
# TODO |
|
|
43 |
# with self.assertRaisesRegex(ValueError, expected_regex="unused argument (betaPrior = TRUE)"): |
|
|
44 |
# nbinomLRT(dds, betaPrior=True, full=mm, reduced=mm0) |
|
|
45 |
with self.assertRaisesRegex( |
|
|
46 |
ValueError, expected_regex="expanded model matrices require a beta prior" |
|
|
47 |
): |
|
|
48 |
nbinomWaldTest(dds, betaPrior=False, modelMatrixType="expanded") |
|
|
49 |
# with self.assertRaisesRegex(ValueError, expected_regex="unused arguments (betaPrior = FALSE, modelMatrixType = 'expanded')"): |
|
|
50 |
# nbinomLRT(dds, betaPrior=False, modelMatrixType="expanded") |
|
|
51 |
|
|
|
52 |
dds2 = estimateMLEForBetaPriorVar(dds.copy()) |
|
|
53 |
estimateBetaPriorVar(dds2, betaPriorMethod="quantile") |
|
|
54 |
dds = nbinomWaldTest(dds, modelMatrixType="standard") |
|
|
55 |
# TODO |
|
|
56 |
# covarianceMatrix(dds, 1) |
|
|
57 |
|
|
|
58 |
# changing 'df' |
|
|
59 |
dds = makeExampleDESeqDataSet(n=100, m=4) |
|
|
60 |
dds.X[:, :4] = 0 |
|
|
61 |
dds = dds.estimateSizeFactors() |
|
|
62 |
dds = dds.estimateDispersions() |
|
|
63 |
dds = nbinomWaldTest(dds) |
|
|
64 |
dds.results().pvalue[:8] |
|
|
65 |
dds = nbinomWaldTest(dds, useT=True, df=np.ones(100)) |
|
|
66 |
dds.results().pvalue[:8] |
|
|
67 |
|
|
|
68 |
# try nbinom after no fitted dispersions |
|
|
69 |
dds = makeExampleDESeqDataSet(n=100, m=4) |
|
|
70 |
dds = dds.estimateSizeFactors() |
|
|
71 |
dds = estimateDispersionsGeneEst(dds) |
|
|
72 |
dds.var["dispersion"] = dds.var["dispGeneEst"] |
|
|
73 |
dds = nbinomWaldTest(dds) |
|
|
74 |
|
|
|
75 |
def test_nbinomWald_useT(self): |
|
|
76 |
"""test that useT uses proper degrees of freedom""" |
|
|
77 |
dds = makeExampleDESeqDataSet(n=200, m=15, seed=42) |
|
|
78 |
dds.X[:, 100:105] = 0 |
|
|
79 |
dds.obs["condition"] = Factor(np.repeat(["A", "B", "C"], 5)) |
|
|
80 |
dds.design = "~condition" |
|
|
81 |
dds = DESeq(dds, useT=True) |
|
|
82 |
dds = dds.removeResults() |
|
|
83 |
w = np.ones(dds.shape) |
|
|
84 |
w[0, :100] = 0 |
|
|
85 |
w[:4, 0] = 0 |
|
|
86 |
w[5:9, 0] = 0 |
|
|
87 |
w[10:14, 0] = 0 |
|
|
88 |
dds.layers["weights"] = w |
|
|
89 |
dds = DESeq(dds, useT=True) |
|
|
90 |
res = dds.results() |
|
|
91 |
self.assertTrue(np.isnan(res.pvalue.iloc[0])) |
|
|
92 |
self.assertEqual(dds.var["tDegreesFreedom"].iloc[1], 15 - 1 - 3) |
|
|
93 |
self.assertEqual( |
|
|
94 |
res.pvalue.iloc[1], |
|
|
95 |
2 * pt(np.abs(res.stat.iloc[1]), df=15 - 1 - 3, lower_tail=False), |
|
|
96 |
) |
|
|
97 |
|
|
|
98 |
# also lfcThreshold |
|
|
99 |
res = dds.results(lfcThreshold=1, altHypothesis="greaterAbs") |
|
|
100 |
idx = np.nonzero(((res.log2FoldChange > 1) & ~np.isnan(res.pvalue)).values)[0][ |
|
|
101 |
0 |
|
|
102 |
] |
|
|
103 |
self.assertEqual( |
|
|
104 |
res.pvalue.iloc[idx], |
|
|
105 |
2 * pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False), |
|
|
106 |
) |
|
|
107 |
res = dds.results(lfcThreshold=1, altHypothesis="greater") |
|
|
108 |
idx = np.nonzero(((res.log2FoldChange > 1) & ~np.isnan(res.pvalue)).values)[0][ |
|
|
109 |
0 |
|
|
110 |
] |
|
|
111 |
self.assertEqual( |
|
|
112 |
res.pvalue.iloc[idx], |
|
|
113 |
pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False), |
|
|
114 |
) |
|
|
115 |
|
|
|
116 |
res = dds.results(lfcThreshold=1, altHypothesis="less") |
|
|
117 |
idx = np.nonzero(((res.log2FoldChange < -1) & ~np.isnan(res.pvalue)).values)[0][ |
|
|
118 |
0 |
|
|
119 |
] |
|
|
120 |
self.assertEqual( |
|
|
121 |
res.pvalue.iloc[idx], |
|
|
122 |
pt(-res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False), |
|
|
123 |
) |
|
|
124 |
|
|
|
125 |
res = dds.results(lfcThreshold=1, altHypothesis="lessAbs") |
|
|
126 |
idx = np.nonzero( |
|
|
127 |
((np.abs(res.log2FoldChange) < 1) & ~np.isnan(res.pvalue)).values |
|
|
128 |
)[0][0] |
|
|
129 |
self.assertEqual( |
|
|
130 |
res.pvalue.iloc[idx], |
|
|
131 |
pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False), |
|
|
132 |
) |
|
|
133 |
|
|
|
134 |
# also novel contrasts |
|
|
135 |
res = dds.results(contrast=["condition", "C", "B"]) |
|
|
136 |
self.assertTrue(np.isnan(res.pvalue.iloc[0])) |
|
|
137 |
self.assertTrue(dds.var["tDegreesFreedom"].iloc[1] == 15 - 1 - 3) |
|
|
138 |
self.assertTrue( |
|
|
139 |
res.pvalue.iloc[1] |
|
|
140 |
== 2 * pt(abs(res.stat.iloc[1]), df=15 - 1 - 3, lower_tail=False) |
|
|
141 |
) |