--- a
+++ b/tests/pycombat/test_pycombatseq.py
@@ -0,0 +1,263 @@
+import unittest
+
+import numpy as np
+import pandas as pd
+from anndata import AnnData
+
+from inmoose.pycombat import pycombat_seq
+from inmoose.utils import rnbinom
+
+
+class test_pycombatseq(unittest.TestCase):
+    def setUp(self):
+        y = np.array(rnbinom(100, size=5, mu=20, seed=42)).reshape((20, 5))
+        self.y = np.vstack(([0, 0, 0, 0, 0], [0, 0, 2, 2, 2], y))
+        self.batch = np.array([1, 1, 2, 2, 2])
+
+    def test_pycombat_seq(self):
+        ref = np.array(
+            [
+                [0, 0, 0, 0, 0],
+                [0, 0, 2, 2, 2],
+                [21, 14, 14, 26, 14],
+                [24, 13, 15, 13, 28],
+                [29, 5, 13, 3, 30],
+                [30, 20, 16, 33, 26],
+                [12, 36, 31, 26, 8],
+                [10, 20, 17, 8, 24],
+                [18, 31, 34, 24, 18],
+                [23, 14, 22, 15, 19],
+                [32, 15, 11, 32, 33],
+                [27, 15, 15, 30, 17],
+                [25, 28, 20, 25, 36],
+                [28, 14, 26, 15, 18],
+                [12, 47, 36, 6, 42],
+                [23, 39, 15, 44, 34],
+                [31, 20, 26, 24, 26],
+                [29, 17, 21, 26, 18],
+                [37, 20, 14, 37, 33],
+                [21, 19, 13, 28, 21],
+                [23, 14, 16, 14, 25],
+                [13, 33, 16, 45, 11],
+            ]
+        )
+        res = pycombat_seq(self.y, self.batch)
+        self.assertTrue(np.array_equal(res, ref))
+
+        ref = np.array(
+            [
+                [0, 0, 0, 0, 0],
+                [0, 0, 2, 2, 2],
+                [19, 14, 14, 29, 13],
+                [22, 13, 15, 13, 30],
+                [34, 2, 15, 8, 25],
+                [24, 18, 17, 41, 24],
+                [11, 43, 24, 25, 12],
+                [14, 21, 16, 7, 23],
+                [31, 39, 29, 19, 10],
+                [28, 18, 18, 12, 17],
+                [17, 12, 11, 45, 47],
+                [24, 14, 16, 33, 17],
+                [24, 26, 20, 22, 42],
+                [34, 18, 21, 13, 16],
+                [6, 67, 33, 10, 35],
+                [19, 31, 20, 51, 33],
+                [34, 23, 23, 21, 24],
+                [30, 18, 19, 26, 16],
+                [32, 17, 19, 38, 32],
+                [17, 16, 14, 36, 20],
+                [23, 14, 15, 13, 25],
+                [8, 30, 18, 49, 15],
+            ]
+        )
+        res = pycombat_seq(self.y, self.batch, covar_mod=[1, 1, 1, 2, 2])
+        self.assertTrue(np.array_equal(res, ref))
+
+        # test with reference batch
+        res = pycombat_seq(self.y, self.batch, ref_batch=1)
+        ref_col = self.batch == 1
+        non_ref_col = self.batch != 1
+        # make sure that reference batch counts have not been adjusted
+        self.assertTrue(np.array_equal(res[:, ref_col], self.y[:, ref_col]))
+        # make sure that batch effects have still been corrected
+        self.assertFalse(np.array_equal(res[:, non_ref_col], self.y[:, non_ref_col]))
+
+        # also test with non-integer batch ids
+        res2 = pycombat_seq(self.y, ["a", "a", "b", "b", "b"], ref_batch="a")
+        self.assertTrue(np.array_equal(res, res2))
+
+        # test raise error for incorect counts format
+        with self.assertRaisesRegex(
+            ValueError,
+            expected_regex="counts must be a pandas DataFrame or a numpy nd array",
+        ):
+            pycombat_seq(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 2, 2, 2],
+                    [19, 14, 14, 29, 13],
+                    [22, 13, 15, 13, 30],
+                    [34, 2, 15, 8, 25],
+                    [24, 18, 17, 41, 24],
+                    [11, 43, 24, 25, 12],
+                    [14, 21, 16, 7, 23],
+                    [31, 39, 29, 19, 10],
+                    [28, 18, 18, 12, 17],
+                    [17, 12, 11, 45, 47],
+                    [24, 14, 16, 33, 17],
+                    [24, 26, 20, 22, 42],
+                    [34, 18, 21, 13, 16],
+                    [6, 67, 33, 10, 35],
+                    [19, 31, 20, 51, 33],
+                    [34, 23, 23, 21, 24],
+                    [30, 18, 19, 26, 16],
+                    [32, 17, 19, 38, 32],
+                    [17, 16, 14, 36, 20],
+                    [23, 14, 15, 13, 25],
+                    [8, 30, 18, 49, 15],
+                ],
+                ["a", "b", "b", "b", "b"],
+            )
+
+        # test raise error for single sample batch
+        with self.assertRaisesRegex(
+            ValueError, expected_regex="Batches a contain a single sample"
+        ):
+            pycombat_seq(self.y, ["a", "b", "b", "b", "b"])
+
+        # test with incomplete group
+        with self.assertRaisesRegex(
+            ValueError,
+            expected_regex="2 values are missing in covariates cov_0. Correct your covariates or use the cov_missing_value parameters",
+        ):
+            pycombat_seq(self.y, self.batch, covar_mod=["a", np.nan, "a", np.nan, "a"])
+
+        ref = pycombat_seq(self.y, self.batch, covar_mod=["a", "b", "a", "c", "a"])
+        with self.assertLogs("inmoose", level="WARNING") as logChecker:
+            res = pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=[1, 2, 1, np.nan, 1],
+                na_cov_action="fill",
+            )
+        self.assertRegex(
+            logChecker.output[0],
+            r"1 missing covariates in covar_mod. Creating a distinct covariate per batch for the missing values. You may want to double check your covariates.",
+        )
+        self.assertTrue(np.array_equal(res, ref))
+
+        with self.assertLogs("inmoose", level="WARNING") as logChecker:
+            res = pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=[1, 2, 1, np.nan, 1],
+                na_cov_action="remove",
+            )
+        self.assertRegex(
+            logChecker.output[0],
+            r"1 samples with missing covariates in covar_mod. They are removed from the data. You may want to double check your covariates.",
+        )
+
+        # with count as dataframe
+        # check remove option warning message
+        with self.assertLogs("inmoose", level="WARNING") as logChecker:
+            res = pycombat_seq(
+                pd.DataFrame(
+                    self.y,
+                    columns=["sample1", "sample2", "sample3", "sample4", "sample5"],
+                ),
+                self.batch,
+                covar_mod=pd.DataFrame(
+                    [1, 2, 1, np.nan, 1],
+                    columns=["test"],
+                    index=["sample1", "sample2", "sample3", "sample4", "sample5"],
+                ),
+                na_cov_action="remove",
+            )
+        self.assertRegex(
+            logChecker.output[0],
+            r"1 samples with missing covariates in covar_mod. They are removed from the data. You may want to double check your covariates.",
+        )
+
+        # check remove option results
+        res2 = pycombat_seq(
+            pd.DataFrame(
+                self.y[:, [0, 1, 2, 4]],
+                columns=["sample1", "sample2", "sample3", "sample5"],
+            ),
+            np.array([1, 1, 2, 2]),
+            covar_mod=pd.DataFrame(
+                [1, 2, 1, 1],
+                columns=["test"],
+                index=["sample1", "sample2", "sample3", "sample5"],
+            ),
+        )
+        pd.testing.assert_frame_equal(res, res2)
+
+        # check confounded covariates
+        with self.assertRaisesRegex(
+            ValueError,
+            expected_regex=r"Covariates test are confounded with the batches. Please review your covariates before proceeding with batch effect correction.",
+        ):
+            pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=pd.DataFrame(
+                    [2, 2, 1, 1, 1],
+                    columns=["test"],
+                ),
+            )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            expected_regex=r"Covariates cov_0 are confounded with the batches. Please review your covariates before proceeding with batch effect correction.",
+        ):
+            pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=pd.DataFrame(
+                    [2, 2, 1, 1, 1],
+                ),
+            )
+
+        with self.assertLogs("inmoose", level="WARNING") as logChecker:
+            res = pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=pd.DataFrame(["a", "b", "a", np.nan, "a"], columns=["test"]),
+                na_cov_action="remove",
+            )
+        self.assertRegex(
+            logChecker.output[0],
+            r"1 samples with missing covariates in covar_mod. They are removed from the data. You may want to double check your covariates.",
+        )
+
+        ref_y = np.delete(self.y, (3), axis=1)
+        ref_batch = np.array([1, 1, 2, 2])
+        ref = pycombat_seq(ref_y, ref_batch, covar_mod=[1, 2, 1, 1])
+        self.assertTrue(np.array_equal(res, ref))
+
+        # test error/warning message for data type of covariates
+        with self.assertRaisesRegex(
+            ValueError,
+            expected_regex=r"Cannot create new categories for numerical covariates cov_0. Please fix the NA in those covariates manually.",
+        ):
+            pycombat_seq(
+                self.y,
+                self.batch,
+                covar_mod=[1, 2.9, 1, 1, np.nan],
+                na_cov_action="fill",
+            )
+
+    def test_pycombat_anndata(self):
+        ad = AnnData(
+            self.y.T,
+            obs=pd.DataFrame({"batch": self.batch}),
+        )
+        res = pycombat_seq(ad, batch="batch")
+        ref = pycombat_seq(self.y, self.batch)
+        self.assertTrue(np.allclose(res.X.T, ref))
+        with self.assertRaisesRegex(
+            ValueError, 'the batch column "foo" must appear in'
+        ):
+            pycombat_seq(ad, batch="foo")