[ea0fd6]: / inmoose / utils / splines.py

Download this file

208 lines (179 with data), 8.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# -----------------------------------------------------------------------------
# Copyright (C) 2024 Maximilien Colange
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
# original code was contributed to StackOverflow: https://stackoverflow.com/questions/71550468/does-python-have-an-analogue-to-rs-splinesns
# code was improved to better match the R code
# As per StackOverflow terms of service (see https://stackoverflow.com/help/licensing), original code is licensed under CC BY SA 4.0, which is compatible with GPL3 (see https://creativecommons.org/2015/10/08/cc-by-sa-4-0-now-one-way-compatible-with-gplv3/)
import numpy as np
from scipy.interpolate import splev
from ..utils import LOGGER
def spline_design(knots, x, order, derivs=0):
"""
Evaluate the design matrix for the B-splines defined by :code:`knots` at the values in :code:`x`.
Arguments
---------
knots : array_like
vector of knot positions (which will be sorted increasingly if needed).
x : array_like
vector of values at which to evaluate the B-spline functions or
derivatives. The values in x must be between the “inner” knots
:code:`knots[ord]` and :code:`knots[ length(knots) - (ord-1)]`
order : int
a positive integer giving the order of the spline function. This is the
number of coefficients in each piecewise polynomial segment, thus a
cubic spline has order 4.
derivs : array_like
an integer vector with values between 0 and :code:`ord` - 1,
conceptually recycled to the length of :code:`x`. The derivative of the
given order is evaluated at the :code:`x` positions. Defaults to zero.
Returns
-------
ndarray
a matrix with :code:`len(x)` rows and :code:`len(knots) - ord` columns.
The :code:`i`'th row of the matrix contains the coefficients of the
B-splines (or the indicated derivative of the B-splines) defined by the
knot vector and evaluated at the :code:`i`'th value of :code:`x`. Each
B-spline is defined by a set of :code:`ord` successive knots so the
total number of B-splines is :code:`len(knots) - ord`.
"""
derivs = np.asarray(derivs)
if derivs.ndim == 0:
der = np.repeat(derivs, len(x))
else:
der = np.zeros(len(x), dtype=int)
der[: len(derivs)] = derivs
n_bases = len(knots) - order
res = np.empty((len(x), n_bases), dtype=float)
for i in range(n_bases):
coefs = np.zeros((n_bases,))
coefs[i] = 1
for j in range(len(x)):
res[j, i] = splev(x, (knots, coefs, order - 1), der=der[j])[j]
return res
class ns:
"""
Class storing the B-spline basis matrix for a natural cubic spline and info used to generate it.
Attributes
----------
knots : array_like
breakpoints that define the spline.
include_intercept : bool
whether an intercept is included in the basis
boundary_knots : array_like
boundary points at which to impose the natural boundary conditions and
anchor the B-spline basis.
basis : ndarray
a matrix of dimension :code:`(len(x), df)`, where :code:`df =
len(knots)-1-intercept` if :code:`df` was not supplied.
"""
def __init__(
self, x, df=None, knots=None, boundary_knots=None, include_intercept=False
):
"""
Generate the B-spline basis matrix for a natural cubic spline.
This function intends to provide the same functionality as R splines::ns.
Arguments
---------
x : array_like
the predictor variable
df : int, optional
degrees of freedom. If :code:`knots` is not specified, then the
function chooses :code:`df - 1 - intercept` knots at suitably chosen
quantiles of :code:`x`. If :code:`None`, the number of inner knots is
set to :code:`len(knots)`.
knots : array_like, optional
breakpoints that define the spline. The default is no knots; together
with the natural boundary conditions this results in a basis for linear
regression on :code:`x`. Typical values are the mean or median for one
knot, quantiles for more knots. See also :code:`boundary_knots`.
include_intercept : bool, optional
if :code:`True`, an intercept is included in the basis; default is
:code:`False`.
boundary_knots : array_like, optional
boundary points at which to impose the natural boundary conditions and
anchor the B-spline basis (default the range of the data). If both
:code:`knots` and :code:`boundary_knots` are supplied, the basis
parameters do not depend on :code:`x`. Data can extend beyond
:code:`boundary_knots`.
Returns
-------
ndarray
a matrix of dimension :code:`(len(x), df)`, where :code:`df =
len(knots)-1-intercept` if :code:`df` was not supplied.
"""
self.include_intercept = include_intercept
x = np.asarray(x)
if boundary_knots is None:
boundary_knots = [np.min(x), np.max(x)]
outside = False
else:
boundary_knots = list(np.sort(boundary_knots))
out_left = x < boundary_knots[0]
out_right = x > boundary_knots[1]
outside = out_left | out_right
self.boundary_knots = boundary_knots
if df is not None and knots is None:
nIknots = df - 1 - include_intercept
if nIknots < 0:
nIknots = 0
LOGGER.warning("df was too small, used {1+include_intercept}")
if nIknots > 0:
knots = np.linspace(0, 1, num=nIknots + 2)[1:-1]
knots = np.quantile(x, knots)
else:
nIknots = len(knots)
self.knots = knots
Aknots = np.sort(np.concatenate((boundary_knots * 4, knots)))
if np.any(outside):
basis = np.empty((x.shape[0], nIknots + 4), dtype=float)
if np.any(out_left):
k_pivot = boundary_knots[0]
xl = np.ones((np.sum(out_left), 2))
xl[:, 1] = x[out_left] - k_pivot
tt = spline_design(Aknots, [k_pivot, k_pivot], 4, [0, 1])
basis[out_left, :] = xl @ tt
if np.any(out_right):
k_pivot = boundary_knots[1]
xr = np.ones((np.sum(out_right), 2))
xr[:, 1] = x[out_right] - k_pivot
tt = spline_design(Aknots, [k_pivot, k_pivot], 4, [0, 1])
basis[out_right, :] = xr @ tt
inside = ~outside
if np.any(inside):
basis[inside, :] = spline_design(Aknots, x[inside], 4)
else:
basis = spline_design(Aknots, x, 4)
const = spline_design(Aknots, boundary_knots, 4, [2, 2])
if include_intercept is False:
basis = basis[:, 1:]
const = const[:, 1:]
qr_const = np.linalg.qr(const.T, mode="complete")[0]
self.basis = (qr_const.T @ basis.T).T[:, 2:]
def predict(self, newx):
"""
Evaluate the spline basis at given values
Arguments
---------
newx : ndarray
new predictor variable to regenerate the spline from
Returns
-------
ns
a new natural spline object, evaluated at the given values
"""
return ns(
newx,
knots=self.knots,
boundary_knots=self.boundary_knots,
include_intercept=self.include_intercept,
)