--- a +++ b/inmoose/utils/splines.py @@ -0,0 +1,207 @@ +# ----------------------------------------------------------------------------- +# Copyright (C) 2024 Maximilien Colange + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# ----------------------------------------------------------------------------- + +# original code was contributed to StackOverflow: https://stackoverflow.com/questions/71550468/does-python-have-an-analogue-to-rs-splinesns +# code was improved to better match the R code + +# As per StackOverflow terms of service (see https://stackoverflow.com/help/licensing), original code is licensed under CC BY SA 4.0, which is compatible with GPL3 (see https://creativecommons.org/2015/10/08/cc-by-sa-4-0-now-one-way-compatible-with-gplv3/) + +import numpy as np +from scipy.interpolate import splev + +from ..utils import LOGGER + + +def spline_design(knots, x, order, derivs=0): + """ + Evaluate the design matrix for the B-splines defined by :code:`knots` at the values in :code:`x`. + + Arguments + --------- + knots : array_like + vector of knot positions (which will be sorted increasingly if needed). + x : array_like + vector of values at which to evaluate the B-spline functions or + derivatives. The values in x must be between the “inner” knots + :code:`knots[ord]` and :code:`knots[ length(knots) - (ord-1)]` + order : int + a positive integer giving the order of the spline function. This is the + number of coefficients in each piecewise polynomial segment, thus a + cubic spline has order 4. + derivs : array_like + an integer vector with values between 0 and :code:`ord` - 1, + conceptually recycled to the length of :code:`x`. The derivative of the + given order is evaluated at the :code:`x` positions. Defaults to zero. + + Returns + ------- + ndarray + a matrix with :code:`len(x)` rows and :code:`len(knots) - ord` columns. + The :code:`i`'th row of the matrix contains the coefficients of the + B-splines (or the indicated derivative of the B-splines) defined by the + knot vector and evaluated at the :code:`i`'th value of :code:`x`. Each + B-spline is defined by a set of :code:`ord` successive knots so the + total number of B-splines is :code:`len(knots) - ord`. + """ + derivs = np.asarray(derivs) + if derivs.ndim == 0: + der = np.repeat(derivs, len(x)) + else: + der = np.zeros(len(x), dtype=int) + der[: len(derivs)] = derivs + n_bases = len(knots) - order + res = np.empty((len(x), n_bases), dtype=float) + for i in range(n_bases): + coefs = np.zeros((n_bases,)) + coefs[i] = 1 + for j in range(len(x)): + res[j, i] = splev(x, (knots, coefs, order - 1), der=der[j])[j] + return res + + +class ns: + """ + Class storing the B-spline basis matrix for a natural cubic spline and info used to generate it. + + Attributes + ---------- + knots : array_like + breakpoints that define the spline. + include_intercept : bool + whether an intercept is included in the basis + boundary_knots : array_like + boundary points at which to impose the natural boundary conditions and + anchor the B-spline basis. + basis : ndarray + a matrix of dimension :code:`(len(x), df)`, where :code:`df = + len(knots)-1-intercept` if :code:`df` was not supplied. + """ + + def __init__( + self, x, df=None, knots=None, boundary_knots=None, include_intercept=False + ): + """ + Generate the B-spline basis matrix for a natural cubic spline. + + This function intends to provide the same functionality as R splines::ns. + + Arguments + --------- + x : array_like + the predictor variable + df : int, optional + degrees of freedom. If :code:`knots` is not specified, then the + function chooses :code:`df - 1 - intercept` knots at suitably chosen + quantiles of :code:`x`. If :code:`None`, the number of inner knots is + set to :code:`len(knots)`. + knots : array_like, optional + breakpoints that define the spline. The default is no knots; together + with the natural boundary conditions this results in a basis for linear + regression on :code:`x`. Typical values are the mean or median for one + knot, quantiles for more knots. See also :code:`boundary_knots`. + include_intercept : bool, optional + if :code:`True`, an intercept is included in the basis; default is + :code:`False`. + boundary_knots : array_like, optional + boundary points at which to impose the natural boundary conditions and + anchor the B-spline basis (default the range of the data). If both + :code:`knots` and :code:`boundary_knots` are supplied, the basis + parameters do not depend on :code:`x`. Data can extend beyond + :code:`boundary_knots`. + + Returns + ------- + ndarray + a matrix of dimension :code:`(len(x), df)`, where :code:`df = + len(knots)-1-intercept` if :code:`df` was not supplied. + """ + self.include_intercept = include_intercept + x = np.asarray(x) + if boundary_knots is None: + boundary_knots = [np.min(x), np.max(x)] + outside = False + else: + boundary_knots = list(np.sort(boundary_knots)) + out_left = x < boundary_knots[0] + out_right = x > boundary_knots[1] + outside = out_left | out_right + self.boundary_knots = boundary_knots + + if df is not None and knots is None: + nIknots = df - 1 - include_intercept + if nIknots < 0: + nIknots = 0 + LOGGER.warning("df was too small, used {1+include_intercept}") + + if nIknots > 0: + knots = np.linspace(0, 1, num=nIknots + 2)[1:-1] + knots = np.quantile(x, knots) + else: + nIknots = len(knots) + self.knots = knots + + Aknots = np.sort(np.concatenate((boundary_knots * 4, knots))) + + if np.any(outside): + basis = np.empty((x.shape[0], nIknots + 4), dtype=float) + if np.any(out_left): + k_pivot = boundary_knots[0] + xl = np.ones((np.sum(out_left), 2)) + xl[:, 1] = x[out_left] - k_pivot + tt = spline_design(Aknots, [k_pivot, k_pivot], 4, [0, 1]) + basis[out_left, :] = xl @ tt + if np.any(out_right): + k_pivot = boundary_knots[1] + xr = np.ones((np.sum(out_right), 2)) + xr[:, 1] = x[out_right] - k_pivot + tt = spline_design(Aknots, [k_pivot, k_pivot], 4, [0, 1]) + basis[out_right, :] = xr @ tt + inside = ~outside + if np.any(inside): + basis[inside, :] = spline_design(Aknots, x[inside], 4) + else: + basis = spline_design(Aknots, x, 4) + + const = spline_design(Aknots, boundary_knots, 4, [2, 2]) + + if include_intercept is False: + basis = basis[:, 1:] + const = const[:, 1:] + + qr_const = np.linalg.qr(const.T, mode="complete")[0] + self.basis = (qr_const.T @ basis.T).T[:, 2:] + + def predict(self, newx): + """ + Evaluate the spline basis at given values + + Arguments + --------- + newx : ndarray + new predictor variable to regenerate the spline from + + Returns + ------- + ns + a new natural spline object, evaluated at the given values + """ + return ns( + newx, + knots=self.knots, + boundary_knots=self.boundary_knots, + include_intercept=self.include_intercept, + )