--- a +++ b/tests/edgepy/test_exactTest.py @@ -0,0 +1,369 @@ +import unittest + +import numpy as np +import pandas as pd + +from inmoose.edgepy import DGEList, binomTest, exactTest, exactTestBetaApprox, topTags +from inmoose.utils import rnbinom + + +class Test(unittest.TestCase): + def setUp(self): + y = np.array(rnbinom(80, size=5, mu=20, seed=42)).reshape((20, 4)) + y = np.vstack(([0, 0, 0, 0], [0, 0, 2, 2], y)) + self.group = np.array([1, 2, 2, 2]) + self.d = DGEList(counts=y, group=self.group) + self.d = self.d.estimateGLMCommonDisp() + + def test_exactTest_doubletail(self): + e = exactTest(self.d, rejection_region="doubletail") + table_ref = pd.DataFrame( + { + "log2FoldChange": [ + 0.00000000, + 3.51533881, + -0.63058208, + 0.41122894, + 0.43984489, + 0.37139369, + 0.13977308, + 1.95460401, + -0.01979911, + 2.17873153, + -1.01346677, + 0.37844670, + -0.93741820, + -0.68416066, + -0.41992156, + 0.60973257, + -0.22824454, + 1.57918440, + -0.77088316, + -0.40296414, + 0.11776420, + 0.08922781, + ], + "lfcSE": [ + 4.879406, + 4.343170, + 0.748468, + 0.831304, + 0.786678, + 0.780016, + 0.755259, + 0.868254, + 0.786410, + 0.934109, + 0.730749, + 0.796331, + 0.731355, + 0.735686, + 0.741946, + 0.750018, + 0.776667, + 0.830651, + 0.717838, + 0.716268, + 0.751390, + 0.771176, + ], + "logCPM": [ + 12.12019, + 12.70317, + 15.56153, + 15.07428, + 15.52507, + 15.56517, + 15.77696, + 15.83033, + 15.29041, + 15.58364, + 15.78337, + 15.38442, + 15.78799, + 15.76965, + 15.74505, + 16.12701, + 15.31314, + 15.84442, + 16.13715, + 16.30238, + 15.82620, + 15.52722, + ], + "pvalue": [ + 1.00000000, + 0.67966025, + 0.37574431, + 0.77735744, + 0.69008766, + 0.72780471, + 0.99603624, + 0.04941292, + 1.00000000, + 0.02552397, + 0.14116664, + 0.74391410, + 0.16836537, + 0.33562846, + 0.55601398, + 0.52617216, + 0.79259676, + 0.09778055, + 0.25246831, + 0.53871925, + 0.99845052, + 1.00000000, + ], + }, + index=[f"gene{i}" for i in range(22)], + ) + pd.testing.assert_frame_equal(table_ref, e, check_frame_type=False) + + @unittest.skip("R returns NAs, need to find meaningful test data") + def test_exactTest_deviance(self): + e = exactTest(self.d, rejection_region="deviance") + table_ref = pd.DataFrame( + { + "logFC": [ + 0.00000000, + 3.51533881, + -0.63058208, + 0.41122894, + 0.43984489, + 0.37139369, + 0.13977308, + 1.95460401, + -0.01979911, + 2.17873153, + -1.01346677, + 0.37844670, + -0.93741820, + -0.68416066, + -0.41992156, + 0.60973257, + -0.22824454, + 1.57918440, + -0.77088316, + -0.40296414, + 0.11776420, + 0.08922781, + ], + "logCPM": [ + 12.12019, + 12.70317, + 15.56153, + 15.07428, + 15.52507, + 15.56517, + 15.77696, + 15.83033, + 15.29041, + 15.58364, + 15.78337, + 15.38442, + 15.78799, + 15.76965, + 15.74505, + 16.12701, + 15.31314, + 15.84442, + 16.13715, + 16.30238, + 15.82620, + 15.52722, + ], + "PValue": [ + 1.00000, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ], + } + ) + self.assertTrue(np.allclose(table_ref["logFC"], e["logFC"], atol=1e-6, rtol=0)) + self.assertTrue( + np.allclose(table_ref["logCPM"], e["logCPM"], atol=1e-5, rtol=0) + ) + self.assertTrue( + np.allclose(table_ref["PValue"], e["PValue"], atol=1e-6, rtol=0) + ) + + def test_exactTest_smallp(self): + e = exactTest(self.d, rejection_region="smallp") + table_ref = pd.DataFrame( + { + "log2FoldChange": [ + 0.00000000, + 3.51533881, + -0.63058208, + 0.41122894, + 0.43984489, + 0.37139369, + 0.13977308, + 1.95460401, + -0.01979911, + 2.17873153, + -1.01346677, + 0.37844670, + -0.93741820, + -0.68416066, + -0.41992156, + 0.60973257, + -0.22824454, + 1.57918440, + -0.77088316, + -0.40296414, + 0.11776420, + 0.08922781, + ], + "lfcSE": [ + 4.879406, + 4.343170, + 0.748468, + 0.831304, + 0.786678, + 0.780016, + 0.755259, + 0.868254, + 0.786410, + 0.934109, + 0.730749, + 0.796331, + 0.731355, + 0.735686, + 0.741946, + 0.750018, + 0.776667, + 0.830651, + 0.717838, + 0.716268, + 0.751390, + 0.771176, + ], + "logCPM": [ + 12.12019, + 12.70317, + 15.56153, + 15.07428, + 15.52507, + 15.56517, + 15.77696, + 15.83033, + 15.29041, + 15.58364, + 15.78337, + 15.38442, + 15.78799, + 15.76965, + 15.74505, + 16.12701, + 15.31314, + 15.84442, + 16.13715, + 16.30238, + 15.82620, + 15.52722, + ], + "pvalue": [ + 1.00000000, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + 0.03903554, + ], + }, + index=[f"gene{i}" for i in range(22)], + ) + pd.testing.assert_frame_equal(table_ref, e, check_frame_type=False) + + def test_exactTestBetaApprox(self): + pref = [6.988452e-145, 5.981632e-124] + pval = exactTestBetaApprox( + np.array([[1000, 1200], [1300, 1400]]), + np.array([[2000, 2200], [2300, 2400]]), + ) + self.assertTrue(np.allclose(pref, pval, atol=1e-6, rtol=0)) + + def test_binomTest(self): + pval = binomTest(self.d.counts.iloc[:, 0], self.d.counts.iloc[:, 1]) + pref = [ + 1.000000e00, + 1.000000e00, + 2.275075e-01, + 3.385877e-01, + 9.734914e-03, + 5.510056e-02, + 2.413184e-01, + 6.708009e-05, + 7.109008e-01, + 1.204722e-03, + 2.277012e-02, + 3.097011e-01, + 4.480458e-02, + 8.989952e-01, + 4.168068e-02, + 7.667345e-01, + 1.000000e00, + 8.761834e-07, + 3.168496e-03, + 2.395773e-01, + 6.406607e-01, + 1.000000e00, + ] + self.assertTrue(np.allclose(pval, pref, atol=1e-6, rtol=0)) + + def test_topTags(self): + t = topTags(exactTest(self.d)) + self.assertTrue( + np.array_equal( + t.table.index, + [ + "gene9", + "gene7", + "gene17", + "gene10", + "gene12", + "gene18", + "gene13", + "gene2", + "gene15", + "gene19", + ], + ) + )