Switch to unified view

a b/inmoose/edgepy/exactTestByDeviance.py
1
# -----------------------------------------------------------------------------
2
# Copyright (C) 2008-2022 Yunshun Chen, Aaron TL Lun, Davis J McCarthy, Matthew E Ritchie, Belinda Phipson, Yifang Hu, Xiaobei Zhou, Mark D Robinson, Gordon K Smyth
3
# Copyright (C) 2024 Maximilien Colange
4
5
# This program is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
9
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
15
# You should have received a copy of the GNU General Public License
16
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
17
# -----------------------------------------------------------------------------
18
19
# This file is based on the file 'R/exactTestByDeviance.R' of the Bioconductor edgeR package (version 3.38.4).
20
21
import numpy as np
22
from scipy.stats import nbinom
23
24
from .binomTest import binomTest
25
from .edgepy_cpp import compute_unit_nb_deviance
26
from .exactTestDoubleTail import exactTestDoubleTail
27
28
29
def exactTestByDeviance(y1, y2, dispersion=0.0):
30
    """
31
    Compute genewise *p*-values for differences in the means between two groups of negative-binomially distributed counts.
32
33
    This function uses the deviance goodness of fit statistics to define the
34
    rejection region, and is therefore equivalent to a conditional likelihood
35
    ratio test.
36
37
    See also
38
    --------
39
    exactTest
40
41
    Arguments
42
    ---------
43
    y1 : matrix
44
        matrix of counts for the first of the two experimental groups to be
45
        tested for differences. Rows correspond to genes and columns to
46
        libraries. Libraries are assumed to be equal in size -- *e.g.* adjusted
47
        pseudocounts from the output of :func:`equalizeLibSizes`.
48
    y2 : matrix
49
        matrix of counts for the second of the two experimental groups to be
50
        tested for differences. Rows correspond to genes and columns to
51
        libraries. Libraries are assumed to be equal in size -- *e.g.* adjusted
52
        pseudocounts from the output of :func:`equalizeLibSizes`.
53
    dispersion : array_like of floats
54
        an array of dispersions, either of length one or of length equal to the
55
        number of genes.
56
57
    Returns
58
    -------
59
    ndarray
60
        array of genewise *p*-values, one for each row of :code:`y1` and :code:`y2`
61
    """
62
    y1 = np.asarray(y1)
63
    y2 = np.asarray(y2)
64
    if y1.shape[0] != y2.shape[0]:
65
        raise ValueError("Number of rows of y1 not equal to number of rows of y2")
66
    ntags = y1.shape[0]
67
    if np.isnan(y1).any() or np.isnan(y2).any():
68
        raise ValueError("NAs not allowed")
69
    n1 = y1.shape[1]
70
    n2 = y2.shape[1]
71
72
    if n1 == n2:
73
        return exactTestDoubleTail(y1=y1, y2=y2, dispersion=dispersion)
74
75
    dispersion = np.asarray(dispersion)
76
    sum1 = np.round(y1.sum(axis=1))
77
    sum2 = np.round(y2.sum(axis=1))
78
    if (dispersion == 0).all():
79
        return binomTest(sum1, sum2, p=n1 / (n1 + n2))
80
    if (dispersion == 0).any():
81
        raise ValueError("dispersion must be either all zero or all positive")
82
    dispersion = np.broadcast_to(dispersion, (ntags,))
83
84
    pvals = np.zeros(ntags)
85
    if ntags == 0:
86
        return pvals
87
88
    # Eliminate all zero rows
89
    all_zeros = (sum1 == 0) & (sum2 == 0)
90
    if all_zeros.any():
91
        pvals[~all_zeros] = exactTestByDeviance(
92
            y1=y1[~all_zeros, :],
93
            y2=y2[~all_zeros, :],
94
            dispersion=dispersion[~all_zeros],
95
        )
96
        pvals[all_zeros] = 1
97
        return pvals
98
99
    # The code below was originally written in C++
100
    nlibs = n1 + n2
101
    stotal = sum1 + sum2
102
    mu = stotal / nlibs
103
    mu1 = mu * n1
104
    mu2 = mu * n2
105
    r1 = n1 / dispersion
106
    r2 = n2 / dispersion
107
    p = r1 / (r1 + mu1)
108
109
    # The aim is to sum conditional probabilities for all partitions of the
110
    # total sum with deviances greater than that observed for the current
111
    # partition. We start computing from the extremes in both cases
112
    phi1 = 1 / r1
113
    phi2 = 1 / r2
114
115
    for i in range(ntags):
116
        obsdev = compute_unit_nb_deviance(
117
            sum1[i], mu1[i], phi1[i]
118
        ) + compute_unit_nb_deviance(sum2[i], mu2[i], phi2[i])
119
120
        # Going from the left
121
        for j in range(int(stotal[i]) + 1):
122
            if obsdev <= compute_unit_nb_deviance(
123
                j, mu1[i], phi1[i]
124
            ) + compute_unit_nb_deviance(stotal[i] - j, mu2[i], phi2[i]):
125
                pvals[i] += nbinom.pmf(j, r1[i], p[i]) * nbinom.pmf(
126
                    stotal[i] - j, r2[i], p[i]
127
                )
128
            else:
129
                break
130
131
        # Going from the right, or what's left of it
132
        for k in range(int(stotal[i]) - j + 1):
133
            if obsdev <= compute_unit_nb_deviance(
134
                k, mu2[i], phi2[i]
135
            ) + compute_unit_nb_deviance(stotal[i] - k, mu1[i], phi1[i]):
136
                pvals[i] += nbinom.pmf(k, r2[i], p[i]) * nbinom.pmf(
137
                    stotal[i] - k, r1[i], p[i]
138
                )
139
            else:
140
                break
141
142
    totalr = r1 + r2
143
    pvals /= nbinom.pmf(stotal, totalr, totalr / (totalr + mu1 + mu2))
144
145
    return np.minimum(pvals, 1)