|
a |
|
b/inmoose/edgepy/exactTestByDeviance.py |
|
|
1 |
# ----------------------------------------------------------------------------- |
|
|
2 |
# Copyright (C) 2008-2022 Yunshun Chen, Aaron TL Lun, Davis J McCarthy, Matthew E Ritchie, Belinda Phipson, Yifang Hu, Xiaobei Zhou, Mark D Robinson, Gordon K Smyth |
|
|
3 |
# Copyright (C) 2024 Maximilien Colange |
|
|
4 |
|
|
|
5 |
# This program is free software: you can redistribute it and/or modify |
|
|
6 |
# it under the terms of the GNU General Public License as published by |
|
|
7 |
# the Free Software Foundation, either version 3 of the License, or |
|
|
8 |
# (at your option) any later version. |
|
|
9 |
|
|
|
10 |
# This program is distributed in the hope that it will be useful, |
|
|
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
13 |
# GNU General Public License for more details. |
|
|
14 |
|
|
|
15 |
# You should have received a copy of the GNU General Public License |
|
|
16 |
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|
|
17 |
# ----------------------------------------------------------------------------- |
|
|
18 |
|
|
|
19 |
# This file is based on the file 'R/exactTestByDeviance.R' of the Bioconductor edgeR package (version 3.38.4). |
|
|
20 |
|
|
|
21 |
import numpy as np |
|
|
22 |
from scipy.stats import nbinom |
|
|
23 |
|
|
|
24 |
from .binomTest import binomTest |
|
|
25 |
from .edgepy_cpp import compute_unit_nb_deviance |
|
|
26 |
from .exactTestDoubleTail import exactTestDoubleTail |
|
|
27 |
|
|
|
28 |
|
|
|
29 |
def exactTestByDeviance(y1, y2, dispersion=0.0): |
|
|
30 |
""" |
|
|
31 |
Compute genewise *p*-values for differences in the means between two groups of negative-binomially distributed counts. |
|
|
32 |
|
|
|
33 |
This function uses the deviance goodness of fit statistics to define the |
|
|
34 |
rejection region, and is therefore equivalent to a conditional likelihood |
|
|
35 |
ratio test. |
|
|
36 |
|
|
|
37 |
See also |
|
|
38 |
-------- |
|
|
39 |
exactTest |
|
|
40 |
|
|
|
41 |
Arguments |
|
|
42 |
--------- |
|
|
43 |
y1 : matrix |
|
|
44 |
matrix of counts for the first of the two experimental groups to be |
|
|
45 |
tested for differences. Rows correspond to genes and columns to |
|
|
46 |
libraries. Libraries are assumed to be equal in size -- *e.g.* adjusted |
|
|
47 |
pseudocounts from the output of :func:`equalizeLibSizes`. |
|
|
48 |
y2 : matrix |
|
|
49 |
matrix of counts for the second of the two experimental groups to be |
|
|
50 |
tested for differences. Rows correspond to genes and columns to |
|
|
51 |
libraries. Libraries are assumed to be equal in size -- *e.g.* adjusted |
|
|
52 |
pseudocounts from the output of :func:`equalizeLibSizes`. |
|
|
53 |
dispersion : array_like of floats |
|
|
54 |
an array of dispersions, either of length one or of length equal to the |
|
|
55 |
number of genes. |
|
|
56 |
|
|
|
57 |
Returns |
|
|
58 |
------- |
|
|
59 |
ndarray |
|
|
60 |
array of genewise *p*-values, one for each row of :code:`y1` and :code:`y2` |
|
|
61 |
""" |
|
|
62 |
y1 = np.asarray(y1) |
|
|
63 |
y2 = np.asarray(y2) |
|
|
64 |
if y1.shape[0] != y2.shape[0]: |
|
|
65 |
raise ValueError("Number of rows of y1 not equal to number of rows of y2") |
|
|
66 |
ntags = y1.shape[0] |
|
|
67 |
if np.isnan(y1).any() or np.isnan(y2).any(): |
|
|
68 |
raise ValueError("NAs not allowed") |
|
|
69 |
n1 = y1.shape[1] |
|
|
70 |
n2 = y2.shape[1] |
|
|
71 |
|
|
|
72 |
if n1 == n2: |
|
|
73 |
return exactTestDoubleTail(y1=y1, y2=y2, dispersion=dispersion) |
|
|
74 |
|
|
|
75 |
dispersion = np.asarray(dispersion) |
|
|
76 |
sum1 = np.round(y1.sum(axis=1)) |
|
|
77 |
sum2 = np.round(y2.sum(axis=1)) |
|
|
78 |
if (dispersion == 0).all(): |
|
|
79 |
return binomTest(sum1, sum2, p=n1 / (n1 + n2)) |
|
|
80 |
if (dispersion == 0).any(): |
|
|
81 |
raise ValueError("dispersion must be either all zero or all positive") |
|
|
82 |
dispersion = np.broadcast_to(dispersion, (ntags,)) |
|
|
83 |
|
|
|
84 |
pvals = np.zeros(ntags) |
|
|
85 |
if ntags == 0: |
|
|
86 |
return pvals |
|
|
87 |
|
|
|
88 |
# Eliminate all zero rows |
|
|
89 |
all_zeros = (sum1 == 0) & (sum2 == 0) |
|
|
90 |
if all_zeros.any(): |
|
|
91 |
pvals[~all_zeros] = exactTestByDeviance( |
|
|
92 |
y1=y1[~all_zeros, :], |
|
|
93 |
y2=y2[~all_zeros, :], |
|
|
94 |
dispersion=dispersion[~all_zeros], |
|
|
95 |
) |
|
|
96 |
pvals[all_zeros] = 1 |
|
|
97 |
return pvals |
|
|
98 |
|
|
|
99 |
# The code below was originally written in C++ |
|
|
100 |
nlibs = n1 + n2 |
|
|
101 |
stotal = sum1 + sum2 |
|
|
102 |
mu = stotal / nlibs |
|
|
103 |
mu1 = mu * n1 |
|
|
104 |
mu2 = mu * n2 |
|
|
105 |
r1 = n1 / dispersion |
|
|
106 |
r2 = n2 / dispersion |
|
|
107 |
p = r1 / (r1 + mu1) |
|
|
108 |
|
|
|
109 |
# The aim is to sum conditional probabilities for all partitions of the |
|
|
110 |
# total sum with deviances greater than that observed for the current |
|
|
111 |
# partition. We start computing from the extremes in both cases |
|
|
112 |
phi1 = 1 / r1 |
|
|
113 |
phi2 = 1 / r2 |
|
|
114 |
|
|
|
115 |
for i in range(ntags): |
|
|
116 |
obsdev = compute_unit_nb_deviance( |
|
|
117 |
sum1[i], mu1[i], phi1[i] |
|
|
118 |
) + compute_unit_nb_deviance(sum2[i], mu2[i], phi2[i]) |
|
|
119 |
|
|
|
120 |
# Going from the left |
|
|
121 |
for j in range(int(stotal[i]) + 1): |
|
|
122 |
if obsdev <= compute_unit_nb_deviance( |
|
|
123 |
j, mu1[i], phi1[i] |
|
|
124 |
) + compute_unit_nb_deviance(stotal[i] - j, mu2[i], phi2[i]): |
|
|
125 |
pvals[i] += nbinom.pmf(j, r1[i], p[i]) * nbinom.pmf( |
|
|
126 |
stotal[i] - j, r2[i], p[i] |
|
|
127 |
) |
|
|
128 |
else: |
|
|
129 |
break |
|
|
130 |
|
|
|
131 |
# Going from the right, or what's left of it |
|
|
132 |
for k in range(int(stotal[i]) - j + 1): |
|
|
133 |
if obsdev <= compute_unit_nb_deviance( |
|
|
134 |
k, mu2[i], phi2[i] |
|
|
135 |
) + compute_unit_nb_deviance(stotal[i] - k, mu1[i], phi1[i]): |
|
|
136 |
pvals[i] += nbinom.pmf(k, r2[i], p[i]) * nbinom.pmf( |
|
|
137 |
stotal[i] - k, r1[i], p[i] |
|
|
138 |
) |
|
|
139 |
else: |
|
|
140 |
break |
|
|
141 |
|
|
|
142 |
totalr = r1 + r2 |
|
|
143 |
pvals /= nbinom.pmf(stotal, totalr, totalr / (totalr + mu1 + mu2)) |
|
|
144 |
|
|
|
145 |
return np.minimum(pvals, 1) |