--- a
+++ b/inmoose/deseq2/replicates.py
@@ -0,0 +1,105 @@
+# -----------------------------------------------------------------------------
+# Copyright (C) 2013-2022 Michael I. Love, Constantin Ahlmann-Eltze
+# Copyright (C) 2023 Maximilien Colange
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------
+
+# This file is based on the file 'R/core.R' of the Bioconductor DESeq2 package
+# (version 3.16).
+
+
+import numpy as np
+
+from ..utils import Factor
+
+
+def collapseReplicates(obj, groupby, run, renameCols=True):
+    """
+    Collapse technical replicates in an AnnData or DESeqDataSet
+
+    Collapse the samples (rows) in `obj` by summing within levels
+    of a grouping factor `groupby`. The purpose of this function
+    if to sum up read counts from technical replicates to create an object
+    with a single row of read counts for each sample.
+    Note: by "technical replicates", we mean multiple sequencing runs of the same
+    library, in contrast to "biological replicates" in which multiple
+    libraries are prepared from separate biological units.
+    Optionally renames the columns of returned object with the levels of the
+    grouping factor.
+
+    Arguments
+    ---------
+    obj : AnnData or DESeqDataSet
+    groupby : Factor
+        a grouping factor, as long as the rows of obj
+    run : ??, optional
+        the names of each unique row in obj
+        if provided, a new row "runsCollapsed" will be added to obj.obs
+        which pastes together the names of `run`
+    renameCols : bool, optional
+        whether to rename the rows of the returned object using the levels
+        of the grouping factor
+
+    Returns
+    -------
+    AnnData or DESeqDataSet
+        an object with as many rows as levels in `groupby`.
+        This object has count data which is summed from the various rows which
+        are grouped together, and its .obs is subset using the first row
+        for each group in `groupby`.
+
+    Examples
+    --------
+    >>> dds = makeExampleDESeqDataSet(m=12)
+    >>> # make data with two technical replicates for three samples
+    >>> dds.obs["sample"] = Factor(np.repeat(np.arange(1,10), [2,1,1,2,1,1,2,1,1]))
+    >>> dds.obs["run"] = [f"run{i}" for i in range(12)]
+    >>> ddsColl = collapseReplicates(dds, dds.obs["sample"], dds.obs["run"])
+    >>> # examine the clinical data and rows names of the collapsed data
+    >>> ddsColl.obs
+    ???
+    >>> ddsColl.index
+    ???
+    >>> # check that the sum of the counts for "sample0" is the same
+    ... # as the counts in the "sample0" row in ddsColl
+    >>> matchFirstLevel = dds.obs["sample"] == dds.obs["sample"].categories[0]
+    >>> np.all(np.sum(dds[matchFirstLevel,:].counts(), axis=0) == ddsColl[0,:].counts())
+    True
+    """
+    if not isinstance(groupby, Factor):
+        groupby = Factor(groupby)
+    groupby = groupby.droplevels()
+    if len(groupby) != obj.n_obs:
+        raise ValueError("groupby should have as many elements as observations in obj")
+
+    countdata = [
+        np.sum(obj.counts()[groupby == c, :], axis=0) for c in groupby.categories
+    ]
+    rowsToKeep = [np.nonzero(groupby == c)[0][0] for c in groupby.categories]
+    collapsed = obj[rowsToKeep, :]
+    collapsed.X = countdata
+    if run is not None:
+        if len(groupby) != len(run):
+            raise ValueError("groupby and run should have the same length")
+        collapsed.obs["runsCollapsed"] = [
+            ",".join([f"{e}" for e in run[groupby == c]]) for c in groupby.categories
+        ]
+
+    if renameCols:
+        collapsed.index = groupby.categories
+
+    assert np.sum(obj.counts()) == np.sum(collapsed.counts())
+
+    return collapsed