inmoose / Git / [ea0fd6] /inmoose/limma/squeezeVar.py

Models:
AlyssaS/
inmoose
Downloads: 1
[ea0fd6]: / inmoose / limma / squeezeVar.py
History
Download this file
151 lines (119 with data), 6.0 kB

# -----------------------------------------------------------------------------
# Copyright (C) 2004-2022 Gordon Smyth, Yifang Hu, Matthew Ritchie, Jeremy Silver, James Wettenhall, Davis McCarthy, Di Wu, Wei Shi, Belinda Phipson, Aaron Lun, Natalie Thorne, Alicia Oshlack, Carolyn de Graaf, Yunshun Chen, Mette Langaas, Egil Ferkingstad, Marcus Davy, Francois Pepin, Dongseok Choi
# Copyright (C) 2024 Maximilien Colange

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------

# This file is based on the file 'R/squeezeVar.R' of the Bioconductor limma package (version 3.55.1).


import numpy as np

from .fitFDist import fitFDist


def squeezeVar(var, df, covariate=None, robust=False, winsor_tail_p=(0.05, 0.1)):
    """
    Squeeze a set of sample variances together by computing empirical Bayes posterior means
    This function implements empirical Bayes algorithms proposed by
    [Smyth2004]_ and [Phipson2016]_.

    A conjugate Bayesian hierarchical model is assumed for a set of sample
    variances. The hyperparameters are estimated by fitting a scaled
    F-distribution to the sample variances. The function returns the posterior
    variances and the estimated hyperparameters.

    Specifically, the sample variances :code:`var` are assumed to follow scaled
    chi-squared distributions, conditional on the true variances, and a scaled
    inverse chi-squared prior is assumed for the true variances.  The scale and
    degrees of freedom of this prior distribution are estimated from the values
    of :code:`var`.

    The effect of this function is to squeeze the variances towards a common
    value, or to a global trend if a :code:`covariate` is provided. The
    squeezed variances have a smaller expected mean square error to the true
    variances than do the sample variances themselves.

    If :code:`covariate` is not :code:`None`, then the scale parameter of the
    prior distribution is assumed to depend on the covariate. If the covariate
    is average log-expression, then the effect is an intensity-dependent trend
    similar to that in [Sartor2006]_.

    :code:`robust=True` implements the robust empirical Bayes procedure of
    [Phipson2016]_ which allows some of the :code:`var` values to be outliers.

    Arguments
    ---------
    var : array_like
        1-D array of independent sample variances
    df : array_like
        1-D array of degrees of freedom for the sample variances
    covariate : ??
        if not :code:`None`, :code:`var_prior` will depend on this numeric
        covariate. Otherwise, :code:`var_prior` is constant.
    robust : bool
        whether the estimation of :code:`df_prior` and :code:`var_prior` be
        robustified against outlier sample variances
    winsor_tail_p : float or pair of floats
        left and right tail proportions of :code:`x` to Winsorize. Only used
        when :code:`robust=True`

    Returns
    -------
    dict
        a dictionnary with keys:

        - :code:`"var_post"`, 1-D array of posterior variances. Same length as
          :code:`var`.
        - :code:`"var_prior"`, location or scale of prior distribution. 1-D
          array of same length as :code:`var` if :code:`covariate` is not
          :code:`None`, otherwise a single value.
        - :code:`"df_prior"`, degrees of freedom of prior distribution. 1-D
          array of same length as :code:`var` if :code:`robust=True`, otherwise
          a single value.
    """
    n = len(var)

    # Degenerate special cases
    if n == 0:
        raise ValueError("var is empty")
    if n == 1:
        return {"var_post": var, "var_prior": var, "df_prior": 0}

    # When df==0, guard against missing or infinite values in var
    df = np.broadcast_to(df, var.shape)
    var[df == 0] = 0

    # Estimate hyperparameters
    if robust:
        raise NotImplementedError("Robust estimation in squeezeVar is not implemented")
    else:
        fit = fitFDist(var, df1=df, covariate=covariate)
        df_prior = fit["df2"]
    if np.isnan(df_prior).any():
        raise RuntimeError("Could not estimate prior df")

    # Posterior variances
    var_post = _squeezeVar(var=var, df=df, var_prior=fit["scale"], df_prior=df_prior)

    return {"var_post": var_post, "var_prior": fit["scale"], "df_prior": df_prior}


def _squeezeVar(var, df, var_prior, df_prior):
    """
    Squeeze posterior variances given hyperparameters

    NAs not allowed in :code:`df_prior`

    Arguments
    ---------
    var : array_like
        1-D array of independent sample variances
    df : array_like
        1-D array of degrees of freedom for the sample variances
    var_prior : array_like
        array of prior variances
    df_prior :
        array of degrees of freedom for the prior variances

    Returns
    -------
    ndarray
        array of posterior variances
    """
    df = np.broadcast_to(df, var.shape)
    var_prior = np.broadcast_to(var_prior, var.shape)
    df_prior = np.broadcast_to(df_prior, var.shape)

    isfin = np.isfinite(df_prior)
    var_post = np.zeros(var.shape)

    # For infinite df_prior, return var_prior
    var_post[~isfin] = var_prior[~isfin]

    var_post[isfin] = (df[isfin] * var[isfin] + df_prior[isfin] * var_prior[isfin]) / (
        df[isfin] + df_prior[isfin]
    )

    return var_post