inmoose / Git / [ea0fd6] /tests/limma/test

Models:
AlyssaS/
inmoose
Downloads: 1
[ea0fd6]: / tests / limma / test_lmFit.py
History
Download this file
875 lines (846 with data), 27.7 kB

import unittest

import numpy as np
import pandas as pd
from patsy import dmatrix
from scipy.stats import norm

from inmoose.diffexp import DEResults
from inmoose.limma import (
    MArrayLM,
    contrasts_fit,
    eBayes,
    lmFit,
    makeContrasts,
    nonEstimable,
    topTable,
)
from inmoose.utils import Factor


class Test(unittest.TestCase):
    def setUp(self):
        y = norm.rvs(size=(10, 6), scale=0.3, random_state=42)
        y[0, :2] += 2
        self.y = pd.DataFrame(
            y,
            index=[f"gene{i}" for i in range(10)],
            columns=[f"sample{i}" for i in range(6)],
        )
        group = Factor([1, 1, 1, 2, 2, 2])  # noqa: F841
        self.design = dmatrix("0+group")
        self.contrast_matrix = pd.DataFrame(
            {"First3": [1, 0], "Last3": [0, 1], "Last3-First3": [-1, 1]},
            index=["group[1]", "group[2]"],
        )

        condition = Factor([1, 2, 1, 2, 1, 2])  # noqa: F841
        self.design_covariates = dmatrix("0+group+condition")
        self.contrast_covariates = makeContrasts(
            "group[1]-group[2]", self.design_covariates
        )

    def test_nonEstimable(self):
        d = dmatrix("x", data={"x": ["A", "B"]})
        self.assertEqual(nonEstimable(d), None)
        d = dmatrix("x+y", data={"x": ["A", "B"], "y": ["B", "A"]})
        self.assertEqual(nonEstimable(d), ["x[T.B]"])

    def test_lmFit(self):
        with self.assertRaisesRegex(
            ValueError, expected_regex="the correlation must be set"
        ):
            lmFit(self.y, self.design, ndups=2)

        # fit = lmFit(self.y, self.design, ndups=2, correlation=0.9)
        # coef_ref = np.array(
        #    [
        #        [1.144165577, 0.03340760],
        #        [-0.212545740, -0.14219338],
        #        [-0.017368967, -0.09863220],
        #        [-0.248713327, 0.04916677],
        #        [0.001386212, 0.02736341],
        #    ]
        # )
        # self.assertTrue(np.allclose(fit.coefficients, coef_ref))

        fit = lmFit(self.y, self.design)
        coef_ref = pd.DataFrame(
            {
                "group[1]": [
                    1.4339472,
                    0.1877173,
                    -0.3396236,
                    -0.0854679,
                    -0.1584454,
                    0.1237074,
                    -0.3078993,
                    -0.1895274,
                    -0.1095338,
                    0.1123062,
                ],
                "group[2]": [
                    0.10547395,
                    -0.03865874,
                    -0.12608713,
                    -0.15829963,
                    -0.05166344,
                    -0.14560097,
                    0.11066961,
                    -0.01233607,
                    -0.04503280,
                    0.09975962,
                ],
            },
            index=self.y.index,
        )

        pd.testing.assert_frame_equal(fit.coefficients, coef_ref, rtol=1e-6)

        self.assertTrue(np.array_equal(fit.df_residual, [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]))

        sigma_ref = pd.Series(
            [
                0.7919070,
                0.2512174,
                0.2908815,
                0.3666233,
                0.1706735,
                0.3631793,
                0.2461618,
                0.2569999,
                0.2941130,
                0.2615085,
            ],
            index=self.y.index,
        )
        pd.testing.assert_series_equal(fit.sigma, sigma_ref)

        cov_ref = pd.DataFrame(
            {"group[1]": [0.3333333, 0.0], "group[2]": [0.0, 0.3333333]},
            index=["group[1]", "group[2]"],
        )
        pd.testing.assert_frame_equal(fit.cov_coefficients, cov_ref)

        self.assertTrue(np.allclose(fit.stdev_unscaled, 0.5773503))
        self.assertTrue(np.array_equal(fit.stdev_unscaled.index, self.y.index))
        self.assertTrue(
            np.array_equal(fit.stdev_unscaled.columns, fit.coefficients.columns)
        )

        Amean_ref = pd.Series(
            [
                0.76971056,
                0.07452928,
                -0.23285536,
                -0.12188377,
                -0.10505440,
                -0.01094676,
                -0.09861482,
                -0.10093174,
                -0.07728329,
                0.10603292,
            ],
            index=self.y.index,
        )
        pd.testing.assert_series_equal(fit.Amean, Amean_ref)

        fit = lmFit(self.y, self.design_covariates)
        coef_ref = pd.DataFrame(
            {
                "group[1]": [
                    1.25887380,
                    0.15199740,
                    -0.32547401,
                    0.04372743,
                    -0.23850795,
                    0.11380454,
                    -0.21018122,
                    -0.24579036,
                    -0.04420018,
                    0.07044161,
                ],
                "group[2]": [
                    -0.24467280,
                    -0.11009858,
                    -0.09778799,
                    0.10009102,
                    -0.21178860,
                    -0.16540676,
                    0.30610568,
                    -0.12486200,
                    0.08563443,
                    0.01603041,
                ],
                "condition[T.2]": [
                    0.52522013,
                    0.10715975,
                    -0.04244871,
                    -0.38758597,
                    0.24018774,
                    0.02970869,
                    -0.29315411,
                    0.16878888,
                    -0.19600084,
                    0.12559381,
                ],
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(fit.coefficients, coef_ref)

        std_ref = pd.DataFrame(
            {
                "group[1]": [
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                    0.6454972,
                ],
                "group[2]": [
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                    0.8164966,
                ],
                "condition[T.2]": [
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                ],
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(std_ref, fit.stdev_unscaled)

    def test_ebayes(self):
        fit = lmFit(self.y, self.design)
        fit2 = eBayes(fit)

        pd.testing.assert_frame_equal(fit2.coefficients, fit.coefficients)
        pd.testing.assert_series_equal(fit2.sigma, fit.sigma)
        pd.testing.assert_frame_equal(fit2.stdev_unscaled, fit.stdev_unscaled)
        pd.testing.assert_series_equal(fit2.Amean, fit.Amean)

        self.assertTrue(np.abs(fit2.df_prior - 71047.76) / 71047.76 < 1e-3)
        self.assertAlmostEqual(fit2.s2_prior, 0.1198481)
        self.assertTrue(np.allclose(fit2.var_prior, [36.68822975, 0.08343894]))
        pd.testing.assert_series_equal(
            fit2.s2_post,
            pd.Series(
                [
                    0.1198767,
                    0.1198449,
                    0.1198461,
                    0.1198489,
                    0.1198430,
                    0.1198488,
                    0.1198448,
                    0.1198451,
                    0.1198462,
                    0.1198452,
                ],
                index=self.y.index,
            ),
        )
        pd.testing.assert_frame_equal(
            fit2.t,
            pd.DataFrame(
                {
                    "group[1]": [
                        7.1734231,
                        0.9391936,
                        -1.6992077,
                        -0.4276087,
                        -0.7927456,
                        0.6189273,
                        -1.5404929,
                        -0.9482493,
                        -0.5480202,
                        0.5618936,
                    ],
                    "group[2]": [
                        0.52764098,
                        -0.19341874,
                        -0.63084025,
                        -0.79199683,
                        -0.25848634,
                        -0.72846394,
                        0.55370627,
                        -0.06172022,
                        -0.22530838,
                        0.49912011,
                    ],
                },
                index=self.y.index,
            ),
        )
        pd.testing.assert_frame_equal(
            fit2.p_value,
            pd.DataFrame(
                {
                    "group[1]": [
                        1.076797e-08,
                        3.532684e-01,
                        9.704589e-02,
                        6.712294e-01,
                        4.326031e-01,
                        5.394734e-01,
                        1.313148e-01,
                        3.486966e-01,
                        5.867236e-01,
                        5.773237e-01,
                    ],
                    "group[2]": [
                        0.6006634,
                        0.8476099,
                        0.5317331,
                        0.4330345,
                        0.7973571,
                        0.4705731,
                        0.5828621,
                        0.9510930,
                        0.8228865,
                        0.6204289,
                    ],
                },
                index=self.y.index,
            ),
        )
        pd.testing.assert_frame_equal(
            fit2.lods,
            pd.DataFrame(
                {
                    "group[1]": [
                        9.767247,
                        -6.507090,
                        -5.534718,
                        -6.857523,
                        -6.633502,
                        -6.756554,
                        -5.779612,
                        -6.498600,
                        -6.798220,
                        -6.790460,
                    ],
                    "group[2]": [
                        -4.678431,
                        -4.702983,
                        -4.666348,
                        -4.643355,
                        -4.699973,
                        -4.653013,
                        -4.675576,
                        -4.706428,
                        -4.701616,
                        -4.681400,
                    ],
                },
                index=self.y.index,
            ),
        )
        self.assertTrue(
            np.allclose(
                fit2.F,
                [
                    25.8682018,
                    0.4597478,
                    1.6426331,
                    0.4050541,
                    0.3476304,
                    0.4568653,
                    1.3398546,
                    0.4514930,
                    0.1755450,
                    0.2824226,
                ],
            )
        )
        F_p_value_ref = [
            5.883976e-12,
            6.314448e-01,
            1.934773e-01,
            6.669423e-01,
            7.063611e-01,
            6.332675e-01,
            2.618904e-01,
            6.366787e-01,
            8.390000e-01,
            7.539558e-01,
        ]
        self.assertTrue(np.allclose(fit2.F_p_value, F_p_value_ref, rtol=1e-4))

    def test_contrasts_fit(self):
        fit = MArrayLM(
            np.arange(20, dtype=float).reshape((10, 2)),
            np.arange(10, 30, dtype=float).reshape((10, 2)) / 30.0,
            None,
            None,
            None,
        )
        fit.coefficients = pd.DataFrame(
            fit.coefficients, index=self.y.index, columns=self.contrast_matrix.index
        )
        fit.stdev_unscaled = pd.DataFrame(
            fit.stdev_unscaled, index=self.y.index, columns=self.contrast_matrix.index
        )
        fit2 = contrasts_fit(fit, self.contrast_matrix)

        coef_ref = pd.DataFrame(
            {
                "First3": [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
                "Last3": [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0],
                "Last3-First3": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(coef_ref, fit2.coefficients)
        std_ref = pd.DataFrame(
            {
                "First3": fit.stdev_unscaled.iloc[:, 0],
                "Last3": fit.stdev_unscaled.iloc[:, 1],
                "Last3-First3": [
                    0.4955356,
                    0.5897269,
                    0.6839428,
                    0.7781745,
                    0.8724168,
                    0.9666667,
                    1.0609220,
                    1.1551816,
                    1.2494443,
                    1.3437096,
                ],
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(std_ref, fit2.stdev_unscaled)
        cov_ref = pd.DataFrame(
            {
                "First3": [0.4377778, 0.0, -0.4377778],
                "Last3": [0.0, 0.4811111, 0.4811111],
                "Last3-First3": [-0.4377778, 0.4811111, 0.9188889],
            },
            index=["First3", "Last3", "Last3-First3"],
        )
        pd.testing.assert_frame_equal(cov_ref, fit2.cov_coefficients)

        fit.coefficients.iloc[0, 0] = np.nan
        fit3 = contrasts_fit(fit, self.contrast_matrix)
        coef_ref = pd.DataFrame(
            {
                "First3": [np.nan, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
                "Last3": [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0],
                "Last3-First3": [np.nan, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            },
            index=self.y.index,
        )
        # NB: np.allclose instead of pd.testing.assert_frame_equal here to also
        # check columns with nan values
        self.assertTrue(np.allclose(coef_ref, fit3.coefficients, equal_nan=True))
        self.assertTrue(np.array_equal(coef_ref.index, fit3.coefficients.index))
        self.assertTrue(np.array_equal(coef_ref.columns, fit3.coefficients.columns))

        fit.coefficients.iloc[0, 0] = 10
        fit.cov_coefficients = np.cov(fit.coefficients.T)
        fit4 = contrasts_fit(fit, self.contrast_matrix)
        cov_ref = pd.DataFrame(
            {
                "First3": [26.66667, 26.66667, 4.586534e-15],
                "Last3": [26.66667, 36.66667, 10.0],
                "Last3-First3": [4.586534e-15, 10.0, 10.0],
            },
            index=["First3", "Last3", "Last3-First3"],
        )
        pd.testing.assert_frame_equal(cov_ref, fit4.cov_coefficients)

        fit_covariates = lmFit(self.y, self.design_covariates)
        fit5 = contrasts_fit(fit_covariates, self.contrast_covariates)
        coef_ref = pd.DataFrame(
            {
                "group[1]-group[2]": [
                    1.50354659,
                    0.26209598,
                    -0.22768602,
                    -0.05636359,
                    -0.02671935,
                    0.27921130,
                    -0.51628690,
                    -0.12092836,
                    -0.12983461,
                    0.05441120,
                ]
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(coef_ref, fit5.coefficients)
        std_ref = pd.DataFrame(
            {
                "group[1]-group[2]": [
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                    0.8660254,
                ]
            },
            index=self.y.index,
        )
        pd.testing.assert_frame_equal(std_ref, fit5.stdev_unscaled)

    def test_topTable(self):
        fit = lmFit(self.y, self.design)
        fit2 = eBayes(fit)
        tt = topTable(fit2)
        tt_ref = pd.DataFrame(
            {
                "group[1]": [
                    1.4339472,
                    -0.3396236,
                    -0.3078993,
                    0.1877173,
                    0.1237074,
                    -0.1895274,
                    -0.0854679,
                    -0.1584454,
                    0.1123062,
                    -0.1095338,
                ],
                "group[2]": [
                    0.10547395,
                    -0.12608713,
                    0.11066961,
                    -0.03865874,
                    -0.14560097,
                    -0.01233607,
                    -0.15829963,
                    -0.05166344,
                    0.09975962,
                    -0.04503280,
                ],
                "AveExpr": [
                    0.76971056,
                    -0.23285536,
                    -0.09861482,
                    0.07452928,
                    -0.01094676,
                    -0.10093174,
                    -0.12188377,
                    -0.10505440,
                    0.10603292,
                    -0.07728329,
                ],
                "F": [
                    25.8682018,
                    1.6426331,
                    1.3398546,
                    0.4597478,
                    0.4568653,
                    0.4514930,
                    0.4050541,
                    0.3476304,
                    0.2824226,
                    0.1755450,
                ],
                "pvalue": [
                    5.883976e-12,
                    1.934773e-01,
                    2.618904e-01,
                    6.314448e-01,
                    6.332675e-01,
                    6.366787e-01,
                    6.669423e-01,
                    7.063611e-01,
                    7.539558e-01,
                    8.390000e-01,
                ],
                "adj_P_Val": [
                    5.883976e-11,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.377287e-01,
                    8.390000e-01,
                ],
            },
            index=[
                "gene0",
                "gene2",
                "gene6",
                "gene1",
                "gene5",
                "gene7",
                "gene3",
                "gene4",
                "gene9",
                "gene8",
            ],
        )
        pd.testing.assert_frame_equal(tt, tt_ref, rtol=1e-4)

        fit_con = eBayes(contrasts_fit(fit, self.contrast_matrix))
        tt = topTable(
            fit_con, sort_by="logFC", number=np.inf, coef="Last3-First3", confint=0.9
        )
        tt_ref = pd.DataFrame(
            {
                "log2FoldChange": [
                    -1.32847322,
                    0.41856886,
                    -0.26930840,
                    -0.22637606,
                    0.21353645,
                    0.17719132,
                    0.10678193,
                    -0.07283173,
                    0.06450099,
                    -0.01254659,
                ],
                "lfcSE": [
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                    0.816496580927726,
                ],
                "CI_L": [
                    -1.80449340,
                    -0.05738799,
                    -0.74527323,
                    -0.70233319,
                    -0.26242309,
                    -0.29876614,
                    -0.36917141,
                    -0.54879684,
                    -0.41145876,
                    -0.48850432,
                ],
                "CI_R": [
                    -0.8524530,
                    0.8945257,
                    0.2066564,
                    0.2495811,
                    0.6894960,
                    0.6531488,
                    0.5827353,
                    0.4031334,
                    0.5404607,
                    0.4634111,
                ],
                "AveExpr": [
                    0.76971056,
                    -0.09861482,
                    -0.01094676,
                    0.07452928,
                    -0.23285536,
                    -0.10093174,
                    -0.10505440,
                    -0.12188377,
                    -0.07728329,
                    0.10603292,
                ],
                "stat": [
                    -4.69927758,
                    1.48082246,
                    -0.95274945,
                    -0.80087790,
                    0.75544985,
                    0.62687070,
                    0.37777834,
                    -0.25766129,
                    0.22819170,
                    -0.04438754,
                ],
                "pvalue": [
                    3.070827e-05,
                    1.464885e-01,
                    3.464392e-01,
                    4.279339e-01,
                    4.544048e-01,
                    5.343058e-01,
                    7.075926e-01,
                    7.979893e-01,
                    8.206597e-01,
                    9.648163e-01,
                ],
                "adj_P_Val": [
                    0.0003070827,
                    0.7324425460,
                    0.8905096522,
                    0.8905096522,
                    0.8905096522,
                    0.8905096522,
                    0.9118441666,
                    0.9118441666,
                    0.9118441666,
                    0.9648163452,
                ],
                "B": [
                    2.248778,
                    -5.456045,
                    -6.076312,
                    -6.207346,
                    -6.242278,
                    -6.330372,
                    -6.455078,
                    -6.493272,
                    -6.500445,
                    -6.525565,
                ],
            },
            index=[
                "gene0",
                "gene6",
                "gene5",
                "gene1",
                "gene2",
                "gene7",
                "gene4",
                "gene3",
                "gene8",
                "gene9",
            ],
        )
        self.assertTrue(isinstance(tt, DEResults))
        pd.testing.assert_frame_equal(tt, tt_ref)

        fit_covariates = lmFit(self.y, self.design_covariates)
        fit_con = eBayes(contrasts_fit(fit_covariates, self.contrast_covariates))
        tt = topTable(
            fit_con,
            sort_by="logFC",
            number=np.inf,
            coef="group[1]-group[2]",
            confint=0.9,
        )
        tt_ref = pd.DataFrame(
            {
                "log2FoldChange": [
                    1.50354659,
                    -0.51628690,
                    0.27921130,
                    0.26209598,
                    -0.22768602,
                    -0.12983461,
                    -0.12092836,
                    -0.05636359,
                    0.05441120,
                    -0.02671935,
                ],
                "lfcSE": [
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                    0.8660254037844387,
                ],
                "CI_L": [
                    0.8648869,
                    -1.0243969,
                    -0.2572811,
                    -0.2539541,
                    -0.7508157,
                    -0.6500231,
                    -0.6361908,
                    -0.5795862,
                    -0.4627464,
                    -0.5283325,
                ],
                "CI_R": [
                    2.14220625,
                    -0.00817689,
                    0.81570373,
                    0.77814600,
                    0.29544369,
                    0.39035389,
                    0.39433411,
                    0.46685905,
                    0.57156875,
                    0.47489385,
                ],
                "AveExpr": [
                    0.76971056,
                    -0.09861482,
                    -0.01094676,
                    0.07452928,
                    -0.23285536,
                    -0.07728329,
                    -0.10093174,
                    -0.12188377,
                    0.10603292,
                    -0.10505440,
                ],
                "stat": [
                    3.99964506,
                    -1.72626474,
                    0.88418560,
                    0.86286448,
                    -0.73943674,
                    -0.42403702,
                    -0.39872522,
                    -0.18301479,
                    0.17874729,
                    -0.09049633,
                ],
                "pvalue": [
                    0.0003984447,
                    0.0949021452,
                    0.3838419907,
                    0.3952665823,
                    0.4655642755,
                    0.6746565581,
                    0.6930059814,
                    0.8560553406,
                    0.8593745912,
                    0.9285127413,
                ],
                "adj_P_Val": [
                    0.003984447,
                    0.474510726,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                    0.928512741,
                ],
                "B": [
                    -0.004229386,
                    -4.932498541,
                    -5.967152704,
                    -5.985354042,
                    -6.082385044,
                    -6.264205210,
                    -6.274594085,
                    -6.337355229,
                    -6.338129087,
                    -6.350044879,
                ],
            },
            index=[
                "gene0",
                "gene6",
                "gene5",
                "gene1",
                "gene2",
                "gene8",
                "gene7",
                "gene3",
                "gene9",
                "gene4",
            ],
        )
        self.assertTrue(isinstance(tt, DEResults))
        pd.testing.assert_frame_equal(tt, tt_ref, rtol=1e-3)