[ea0fd6]: / inmoose / edgepy / mglmOneGroup.py

Download this file

123 lines (101 with data), 4.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2022 Yunshun Chen, Aaron TL Lun, Davis J McCarthy, Matthew E Ritchie, Belinda Phipson, Yifang Hu, Xiaobei Zhou, Mark D Robinson, Gordon K Smyth
# Copyright (C) 2022-2023 Maximilien Colange
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
# This file is based on the file 'R/mglmOneGroup.R' of the Bioconductor edgeR package (version 3.38.4).
import numpy as np
from ..utils import LOGGER
from .glm_one_group import fit_one_group
from .makeCompressedMatrix import (
_compressDispersions,
_compressOffsets,
_compressWeights,
)
from .utils import _isAllZero
def mglmOneGroup(
y,
dispersion=0,
offset=0,
weights=None,
coef_start=None,
maxit=50,
tol=1e-10,
verbose=False,
):
"""
Fit single-group negative-binomial GLMs genewise.
This is a low-level work-horse used by higher-level functions, especially
:func:`glmFit`. Most users will not need to call this function directly.
This function fits a negative binomial GLM to each row of :code:`y`. The
row-wise GLMs all have the same design matrix but possibly different
dispersions, offsets and weights. It is low-level in that it
operates on atomic objects (matrices and vectors).
This function fits an intercept only model to each response vector. In other
words, it treats all the libraries as belonging to one group. It implements
Fisher scoring with a score-statistic stopping criterion for each gene. It
treats the dispersion parameter of the negative binomial distribution as a
known input. Excellent starting values are available for the null model so
this function seldom has any problems with convergence. It is used by other
functions to compute the overall abundance for each gene.
Arguments
---------
y : array_like
matrix of negative binomial counts. Rows for genes and columns for
libraries.
dispersion : float or array_like
scalar or vector giving the dispersion parameter for each GLM. Can be a
scalar giving one value for all genes, or a vector of length equal to
the number of genes giving genewise dispersions.
offset : array_like
vector or matrix giving the offset that is to be included in the log
linear model predictor. Can be a scalar, a vector of length equal to the
number of libraries, or a matrix of the same shape as :code:`y`.
weights : matrix, optional
vector or matrix of non-negative quantitative weights. Can be a vector
of length equal to the number of libraries, or a matrix of the same
shape as :code:`y`.
coef_start : array_like, optional
matrix of starting values for the linear model coefficient. Number of
rows should agree with :code:`y` and a single column. This argument does
not usually need to be set as the automatic starting values perform well.
maxit : int
the maximum number of iterations for the Fisher scoring algorithm. The
iteration will be stopped when this limit is reached even if the
convergence criterion has not been satisfied.
tol : float
the convergence tolerance. Convergence if judged successful when the
step size falls below :code:`tol` in absolute size.
Returns
-------
ndarray
vector of coefficients
"""
# Check y
# TODO check that y is a matrix and numeric
_isAllZero(y)
# Check dispersion
dispersion = _compressDispersions(y, dispersion)
# Check offset
offset = _compressOffsets(y, offset=offset)
# Check starting values
if coef_start is None:
coef_start = np.nan
coef_start = np.full((y.shape[0],), coef_start, dtype="double")
# Check weights
weights = _compressWeights(y, weights)
# Fisher scoring iteration
output = fit_one_group(y, offset, dispersion, weights, maxit, tol, coef_start)
# Convergence achieved for all tags?
if np.count_nonzero(output[1]) > 0:
LOGGER.debug(f"max iterations exceeded for {np.count_nonzero(output[1])} tags")
return output[0]