Switch to unified view

a b/tests/deseq2/test_weights.py
1
import sys
2
import unittest
3
4
import numpy as np
5
import pandas as pd
6
7
from inmoose.deseq2 import DESeq, makeExampleDESeqDataSet, nbinomWaldTest
8
from inmoose.deseq2.fitNbinomGLMs import fitNbinomGLMsOptim
9
from inmoose.utils import Factor
10
11
12
class Test(unittest.TestCase):
13
    def test_weights(self):
14
        """test that weights work"""
15
        dds = makeExampleDESeqDataSet(n=10, seed=42)
16
        w = np.ones(dds.shape)
17
        w[0, 0] = 0
18
        # check that weight to 0 is like a remove sample
19
        dds = DESeq(dds, quiet=True)
20
        dds2 = dds.copy()
21
        dds2.layers["weights"] = w
22
        dds2 = nbinomWaldTest(dds2)
23
        dds3 = dds.copy()[1:, :]
24
        dds3 = nbinomWaldTest(dds3)
25
26
        # in terms of LFC, SE and deviance
27
        self.assertAlmostEqual(
28
            dds2.results().log2FoldChange.iloc[0],
29
            dds3.results().log2FoldChange.iloc[0],
30
            delta=1e-5,
31
        )
32
        self.assertAlmostEqual(
33
            dds2.results().lfcSE.iloc[0], dds3.results().lfcSE.iloc[0], delta=1e-6
34
        )
35
        self.assertAlmostEqual(
36
            dds2.var["deviance"].iloc[0], dds3.var["deviance"].iloc[0], delta=1e-7
37
        )
38
39
        # check weights working in the optim code
40
        nf = np.repeat(dds.sizeFactors.values[:, None], dds.n_vars, axis=1)
41
        o = fitNbinomGLMsOptim(
42
            obj=dds,
43
            modelMatrix=dds.design,
44
            lambda_=np.repeat(1e-6, 2),
45
            colsForOptim=[0],
46
            colStable=np.repeat(True, dds.n_vars),
47
            normalizationFactors=nf,
48
            alpha_hat=dds.var["dispersion"],
49
            weights=w,
50
            useWeights=True,
51
            betaMatrix=pd.DataFrame(np.zeros((dds.n_vars, 2))),
52
            betaSE=pd.DataFrame(np.zeros((dds.n_vars, 2))),
53
            betaConv=np.repeat(False, dds.n_vars),
54
            beta_mat=np.zeros((dds.n_vars, 2)),
55
            mu=np.zeros(dds.shape),
56
            logLike=np.zeros(dds.n_vars),
57
        )
58
        self.assertAlmostEqual(
59
            dds3.results().log2FoldChange.iloc[0],
60
            o["betaMatrix"].iloc[0, 1],
61
            delta=1e-4,
62
        )
63
64
    def test_weights_with_beta_prior(self):
65
        """test that weights can be used with betaPrior=True"""
66
        dds = makeExampleDESeqDataSet(n=10, seed=42)
67
        w = np.ones(dds.shape)
68
        w[0, 0] = 0
69
        dds.layers["weights"] = w
70
        dds = DESeq(dds, betaPrior=True, quiet=True)
71
72
        # check weights working for intercept only
73
        dds.design = "~1"
74
        dds = DESeq(dds, quiet=True)
75
        dds2 = dds
76
        dds2.layers["weights"] = w
77
        dds2 = nbinomWaldTest(dds2)
78
        dds3 = dds[1:, :]
79
        dds3 = nbinomWaldTest(dds3)
80
81
        self.assertEqual(
82
            dds2.results().log2FoldChange.iloc[0], dds3.results().log2FoldChange.iloc[0]
83
        )
84
        self.assertEqual(dds2.results().lfcSE.iloc[0], dds3.results().lfcSE.iloc[0])
85
        self.assertEqual(dds2.var["deviance"].iloc[0], dds3.var["deviance"].iloc[0])
86
87
    def test_weights_downweight_outliers(self):
88
        """test that weights downweight outlier in dispersion estimation"""
89
        dds = makeExampleDESeqDataSet(n=10, seed=42)
90
        dds.counts()[0, 0] = 100
91
        dds.sizeFactors = np.ones(dds.n_obs)
92
        dds = dds.estimateDispersions()
93
        dds2 = dds.copy()
94
        w = np.ones(dds.shape)
95
        w[0, 0] = 0
96
        dds2.layers["weights"] = w
97
        dds2 = dds2.estimateDispersions()
98
        dds3 = dds.copy()[1:, :]
99
        dds3 = dds3.estimateDispersions()
100
101
        self.assertAlmostEqual(
102
            dds2.var["dispGeneEst"].iloc[0], dds3.var["dispGeneEst"].iloc[0], delta=1e-1
103
        )
104
        # MAP estimates won't be equal because of different dispersion prior widths
105
        self.assertGreater(dds.var["dispMAP"].iloc[0], dds2.var["dispMAP"].iloc[0])
106
107
    def test_weights_warning(self):
108
        """test that weights failing check gives warning, passes them through"""
109
        dds = makeExampleDESeqDataSet(n=10, seed=42)
110
        w = np.ones(dds.shape)
111
        w[0:6, 0] = 0
112
        dds.layers["weights"] = w
113
        with self.assertLogs("inmoose", level="WARNING") as logChecker:
114
            dds = DESeq(dds)
115
        self.assertRegex(
116
            # account for https://github.com/python/cpython/issues/86109
117
            logChecker.output[0]
118
            if sys.version_info >= (3, 10)
119
            else logChecker.output[3],
120
            "for 1 genes, the weights as supplied won't allow parameter estimation",
121
        )
122
        self.assertTrue(dds.var["allZero"].iloc[0])
123
        self.assertTrue(dds.var["weightsFail"].iloc[0])
124
        dds.results()
125
126
    @unittest.skip("not sure what is tested here")
127
    def test_weights_CR(self):
128
        """test that weights with and without CR term included"""
129
        alpha = 0.25
130
131
        def dmr(x):
132
            return alpha
133
134
        dds = makeExampleDESeqDataSet(
135
            n=50, m=100, betaSD=1, interceptMean=10, interceptSD=0.5, dispMeanRel=dmr
136
        )
137
        dds.obs["group"] = Factor(np.repeat(np.arange(50), 2))
138
        dds.design = "~0 + group + condition"
139
        w = np.ones(dds.shape)
140
        o = 35
141
        w[0:o, :] = 1e-6
142
        w[50 : (50 + o), :] = 1e-6
143
        dds.layers["weights"] = w
144
        dds.counts()[1:o, :] = 1
145
        dds.counts()[50 : (50 + o), :] = 1
146
        dds.sizeFactors = 1
147
        dds = dds.estimateDispersions(fitType="mean")
148
        dds.estimateDispersions(fitType="mean", useCR=False)