inmoose / Git / Diff of /inmoose/edgepy/mglmOneGroup.py

Models:
AlyssaS/
inmoose
Downloads: 1
Diff of /inmoose/edgepy/mglmOneGroup.py [000000] .. [ea0fd6]
Switch to side-by-side view

--- a
+++ b/inmoose/edgepy/mglmOneGroup.py
@@ -0,0 +1,122 @@
+# -----------------------------------------------------------------------------
+# Copyright (C) 2008-2022 Yunshun Chen, Aaron TL Lun, Davis J McCarthy, Matthew E Ritchie, Belinda Phipson, Yifang Hu, Xiaobei Zhou, Mark D Robinson, Gordon K Smyth
+# Copyright (C) 2022-2023 Maximilien Colange
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------
+
+# This file is based on the file 'R/mglmOneGroup.R' of the Bioconductor edgeR package (version 3.38.4).
+
+
+import numpy as np
+
+from ..utils import LOGGER
+from .glm_one_group import fit_one_group
+from .makeCompressedMatrix import (
+    _compressDispersions,
+    _compressOffsets,
+    _compressWeights,
+)
+from .utils import _isAllZero
+
+
+def mglmOneGroup(
+    y,
+    dispersion=0,
+    offset=0,
+    weights=None,
+    coef_start=None,
+    maxit=50,
+    tol=1e-10,
+    verbose=False,
+):
+    """
+    Fit single-group negative-binomial GLMs genewise.
+
+    This is a low-level work-horse used by higher-level functions, especially
+    :func:`glmFit`. Most users will not need to call this function directly.
+
+    This function fits a negative binomial GLM to each row of :code:`y`. The
+    row-wise GLMs all have the same design matrix but possibly different
+    dispersions, offsets and weights. It is low-level in that it
+    operates on atomic objects (matrices and vectors).
+
+    This function fits an intercept only model to each response vector. In other
+    words, it treats all the libraries as belonging to one group. It implements
+    Fisher scoring with a score-statistic stopping criterion for each gene.  It
+    treats the dispersion parameter of the negative binomial distribution as a
+    known input.  Excellent starting values are available for the null model so
+    this function seldom has any problems with convergence. It is used by other
+    functions to compute the overall abundance for each gene.
+
+    Arguments
+    ---------
+    y : array_like
+        matrix of negative binomial counts. Rows for genes and columns for
+        libraries.
+    dispersion : float or array_like
+        scalar or vector giving the dispersion parameter for each GLM. Can be a
+        scalar giving one value for all genes, or a vector of length equal to
+        the number of genes giving genewise dispersions.
+    offset : array_like
+        vector or matrix giving the offset that is to be included in the log
+        linear model predictor. Can be a scalar, a vector of length equal to the
+        number of libraries, or a matrix of the same shape as :code:`y`.
+    weights : matrix, optional
+        vector or matrix of non-negative quantitative weights. Can be a vector
+        of length equal to the number of libraries, or a matrix of the same
+        shape as :code:`y`.
+    coef_start : array_like, optional
+        matrix of starting values for the linear model coefficient. Number of
+        rows should agree with :code:`y` and a single column. This argument does
+        not usually need to be set as the automatic starting values perform well.
+    maxit : int
+        the maximum number of iterations for the Fisher scoring algorithm. The
+        iteration will be stopped when this limit is reached even if the
+        convergence criterion has not been satisfied.
+    tol : float
+        the convergence tolerance. Convergence if judged successful when the
+        step size falls below :code:`tol` in absolute size.
+
+    Returns
+    -------
+    ndarray
+        vector of coefficients
+    """
+    # Check y
+    # TODO check that y is a matrix and numeric
+    _isAllZero(y)
+
+    # Check dispersion
+    dispersion = _compressDispersions(y, dispersion)
+
+    # Check offset
+    offset = _compressOffsets(y, offset=offset)
+
+    # Check starting values
+    if coef_start is None:
+        coef_start = np.nan
+    coef_start = np.full((y.shape[0],), coef_start, dtype="double")
+
+    # Check weights
+    weights = _compressWeights(y, weights)
+
+    # Fisher scoring iteration
+    output = fit_one_group(y, offset, dispersion, weights, maxit, tol, coef_start)
+
+    # Convergence achieved for all tags?
+    if np.count_nonzero(output[1]) > 0:
+        LOGGER.debug(f"max iterations exceeded for {np.count_nonzero(output[1])} tags")
+
+    return output[0]