a b/tests/deseq2/test_nbinomWald.py
1
import unittest
2
3
import numpy as np
4
import patsy
5
6
from inmoose.deseq2 import (
7
    DESeq,
8
    estimateBetaPriorVar,
9
    estimateDispersionsGeneEst,
10
    estimateMLEForBetaPriorVar,
11
    makeExampleDESeqDataSet,
12
    nbinomLRT,
13
    nbinomWaldTest,
14
)
15
from inmoose.utils import Factor, pt
16
17
18
class Test(unittest.TestCase):
19
    def test_nbinomWald_errors(self):
20
        """test that nbinomWald throws various errors and works with edge cases"""
21
        dds = makeExampleDESeqDataSet(n=100, m=4)
22
        with self.assertRaisesRegex(
23
            ValueError,
24
            expected_regex="testing requires dispersion estimates, first call estimateDispersions()",
25
        ):
26
            nbinomWaldTest(dds)
27
        with self.assertRaisesRegex(
28
            ValueError,
29
            expected_regex="testing requires dispersion estimates, first call estimateDispersions()",
30
        ):
31
            nbinomLRT(dds)
32
33
        dds = dds.estimateSizeFactors()
34
        dds = dds.estimateDispersions()
35
        mm = patsy.dmatrix("~condition", dds.obs)
36
        # mm0 = patsy.dmatrix("~1", dds.obs)
37
        with self.assertRaisesRegex(
38
            ValueError,
39
            expected_regex="user-supplied model matrix with betaPrior=True requires supplying betaPriorVar",
40
        ):
41
            nbinomWaldTest(dds, betaPrior=True, modelMatrix=mm)
42
        # TODO
43
        # with self.assertRaisesRegex(ValueError, expected_regex="unused argument (betaPrior = TRUE)"):
44
        #    nbinomLRT(dds, betaPrior=True, full=mm, reduced=mm0)
45
        with self.assertRaisesRegex(
46
            ValueError, expected_regex="expanded model matrices require a beta prior"
47
        ):
48
            nbinomWaldTest(dds, betaPrior=False, modelMatrixType="expanded")
49
        # with self.assertRaisesRegex(ValueError, expected_regex="unused arguments (betaPrior = FALSE, modelMatrixType = 'expanded')"):
50
        #    nbinomLRT(dds, betaPrior=False, modelMatrixType="expanded")
51
52
        dds2 = estimateMLEForBetaPriorVar(dds.copy())
53
        estimateBetaPriorVar(dds2, betaPriorMethod="quantile")
54
        dds = nbinomWaldTest(dds, modelMatrixType="standard")
55
        # TODO
56
        # covarianceMatrix(dds, 1)
57
58
        # changing 'df'
59
        dds = makeExampleDESeqDataSet(n=100, m=4)
60
        dds.X[:, :4] = 0
61
        dds = dds.estimateSizeFactors()
62
        dds = dds.estimateDispersions()
63
        dds = nbinomWaldTest(dds)
64
        dds.results().pvalue[:8]
65
        dds = nbinomWaldTest(dds, useT=True, df=np.ones(100))
66
        dds.results().pvalue[:8]
67
68
        # try nbinom after no fitted dispersions
69
        dds = makeExampleDESeqDataSet(n=100, m=4)
70
        dds = dds.estimateSizeFactors()
71
        dds = estimateDispersionsGeneEst(dds)
72
        dds.var["dispersion"] = dds.var["dispGeneEst"]
73
        dds = nbinomWaldTest(dds)
74
75
    def test_nbinomWald_useT(self):
76
        """test that useT uses proper degrees of freedom"""
77
        dds = makeExampleDESeqDataSet(n=200, m=15, seed=42)
78
        dds.X[:, 100:105] = 0
79
        dds.obs["condition"] = Factor(np.repeat(["A", "B", "C"], 5))
80
        dds.design = "~condition"
81
        dds = DESeq(dds, useT=True)
82
        dds = dds.removeResults()
83
        w = np.ones(dds.shape)
84
        w[0, :100] = 0
85
        w[:4, 0] = 0
86
        w[5:9, 0] = 0
87
        w[10:14, 0] = 0
88
        dds.layers["weights"] = w
89
        dds = DESeq(dds, useT=True)
90
        res = dds.results()
91
        self.assertTrue(np.isnan(res.pvalue.iloc[0]))
92
        self.assertEqual(dds.var["tDegreesFreedom"].iloc[1], 15 - 1 - 3)
93
        self.assertEqual(
94
            res.pvalue.iloc[1],
95
            2 * pt(np.abs(res.stat.iloc[1]), df=15 - 1 - 3, lower_tail=False),
96
        )
97
98
        # also lfcThreshold
99
        res = dds.results(lfcThreshold=1, altHypothesis="greaterAbs")
100
        idx = np.nonzero(((res.log2FoldChange > 1) & ~np.isnan(res.pvalue)).values)[0][
101
            0
102
        ]
103
        self.assertEqual(
104
            res.pvalue.iloc[idx],
105
            2 * pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False),
106
        )
107
        res = dds.results(lfcThreshold=1, altHypothesis="greater")
108
        idx = np.nonzero(((res.log2FoldChange > 1) & ~np.isnan(res.pvalue)).values)[0][
109
            0
110
        ]
111
        self.assertEqual(
112
            res.pvalue.iloc[idx],
113
            pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False),
114
        )
115
116
        res = dds.results(lfcThreshold=1, altHypothesis="less")
117
        idx = np.nonzero(((res.log2FoldChange < -1) & ~np.isnan(res.pvalue)).values)[0][
118
            0
119
        ]
120
        self.assertEqual(
121
            res.pvalue.iloc[idx],
122
            pt(-res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False),
123
        )
124
125
        res = dds.results(lfcThreshold=1, altHypothesis="lessAbs")
126
        idx = np.nonzero(
127
            ((np.abs(res.log2FoldChange) < 1) & ~np.isnan(res.pvalue)).values
128
        )[0][0]
129
        self.assertEqual(
130
            res.pvalue.iloc[idx],
131
            pt(res.stat.iloc[idx], df=15 - 1 - 3, lower_tail=False),
132
        )
133
134
        # also novel contrasts
135
        res = dds.results(contrast=["condition", "C", "B"])
136
        self.assertTrue(np.isnan(res.pvalue.iloc[0]))
137
        self.assertTrue(dds.var["tDegreesFreedom"].iloc[1] == 15 - 1 - 3)
138
        self.assertTrue(
139
            res.pvalue.iloc[1]
140
            == 2 * pt(abs(res.stat.iloc[1]), df=15 - 1 - 3, lower_tail=False)
141
        )