From c096c5c0c9f1e4bbed852faf3749d05299fbd087 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 6 Nov 2023 08:47:53 +0100 Subject: [PATCH] Update for numpy 2.0 deprecations (#13103) - Replace `np.trapz` with vendored `trapezoid` from scipy - Replace `np.float_` with `np.float64` --- licenses/3rd_party_licenses.txt | 42 ++++++++++ spacy/scorer.py | 138 +++++++++++++++++++++++++++++++- spacy/tokens/doc.pyi | 4 +- 3 files changed, 180 insertions(+), 4 deletions(-) diff --git a/licenses/3rd_party_licenses.txt b/licenses/3rd_party_licenses.txt index 851e09585..9b037a496 100644 --- a/licenses/3rd_party_licenses.txt +++ b/licenses/3rd_party_licenses.txt @@ -158,3 +158,45 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +SciPy +----- + +* Files: scorer.py + +The implementation of trapezoid() is adapted from SciPy, which is distributed +under the following license: + +New BSD License + +Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/spacy/scorer.py b/spacy/scorer.py index 48d9f03ab..9ab116deb 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -802,6 +802,140 @@ def get_ner_prf(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: } +# The following implementation of trapezoid() is adapted from SciPy, +# which is distributed under the New BSD License. +# Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. +# See licenses/3rd_party_licenses.txt +def trapezoid(y, x=None, dx=1.0, axis=-1): + r""" + Integrate along the given axis using the composite trapezoidal rule. + + If `x` is provided, the integration happens in sequence along its + elements - they are not sorted. + + Integrate `y` (`x`) along each 1d slice on the given axis, compute + :math:`\int y(x) dx`. + When `x` is specified, this integrates along the parametric curve, + computing :math:`\int_t y(t) dt = + \int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`. + + Parameters + ---------- + y : array_like + Input array to integrate. + x : array_like, optional + The sample points corresponding to the `y` values. If `x` is None, + the sample points are assumed to be evenly spaced `dx` apart. The + default is None. + dx : scalar, optional + The spacing between sample points when `x` is None. The default is 1. + axis : int, optional + The axis along which to integrate. + + Returns + ------- + trapezoid : float or ndarray + Definite integral of `y` = n-dimensional array as approximated along + a single axis by the trapezoidal rule. If `y` is a 1-dimensional array, + then the result is a float. If `n` is greater than 1, then the result + is an `n`-1 dimensional array. + + See Also + -------- + cumulative_trapezoid, simpson, romb + + Notes + ----- + Image [2]_ illustrates trapezoidal rule -- y-axis locations of points + will be taken from `y` array, by default x-axis distances between + points will be 1.0, alternatively they can be provided with `x` array + or with `dx` scalar. Return value will be equal to combined area under + the red lines. + + References + ---------- + .. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule + + .. [2] Illustration image: + https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png + + Examples + -------- + Use the trapezoidal rule on evenly spaced points: + + >>> import numpy as np + >>> from scipy import integrate + >>> integrate.trapezoid([1, 2, 3]) + 4.0 + + The spacing between sample points can be selected by either the + ``x`` or ``dx`` arguments: + + >>> integrate.trapezoid([1, 2, 3], x=[4, 6, 8]) + 8.0 + >>> integrate.trapezoid([1, 2, 3], dx=2) + 8.0 + + Using a decreasing ``x`` corresponds to integrating in reverse: + + >>> integrate.trapezoid([1, 2, 3], x=[8, 6, 4]) + -8.0 + + More generally ``x`` is used to integrate along a parametric curve. We can + estimate the integral :math:`\int_0^1 x^2 = 1/3` using: + + >>> x = np.linspace(0, 1, num=50) + >>> y = x**2 + >>> integrate.trapezoid(y, x) + 0.33340274885464394 + + Or estimate the area of a circle, noting we repeat the sample which closes + the curve: + + >>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True) + >>> integrate.trapezoid(np.cos(theta), x=np.sin(theta)) + 3.141571941375841 + + ``trapezoid`` can be applied along a specified axis to do multiple + computations in one call: + + >>> a = np.arange(6).reshape(2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) + >>> integrate.trapezoid(a, axis=0) + array([1.5, 2.5, 3.5]) + >>> integrate.trapezoid(a, axis=1) + array([2., 8.]) + """ + y = np.asanyarray(y) + if x is None: + d = dx + else: + x = np.asanyarray(x) + if x.ndim == 1: + d = np.diff(x) + # reshape to correct shape + shape = [1] * y.ndim + shape[axis] = d.shape[0] + d = d.reshape(shape) + else: + d = np.diff(x, axis=axis) + nd = y.ndim + slice1 = [slice(None)] * nd + slice2 = [slice(None)] * nd + slice1[axis] = slice(1, None) + slice2[axis] = slice(None, -1) + try: + ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis) + except ValueError: + # Operations didn't work, cast to ndarray + d = np.asarray(d) + y = np.asarray(y) + ret = np.add.reduce(d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0, axis) + return ret + + # The following implementation of roc_auc_score() is adapted from # scikit-learn, which is distributed under the New BSD License. # Copyright (c) 2007–2019 The scikit-learn developers. @@ -1024,9 +1158,9 @@ def _auc(x, y): else: raise ValueError(Errors.E164.format(x=x)) - area = direction * np.trapz(y, x) + area = direction * trapezoid(y, x) if isinstance(area, np.memmap): - # Reductions such as .sum used internally in np.trapz do not return a + # Reductions such as .sum used internally in trapezoid do not return a # scalar by default for numpy.memmap instances contrary to # regular numpy.ndarray instances. area = area.dtype.type(area) diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index 55222f8aa..365859d89 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -42,7 +42,7 @@ class Doc: user_hooks: Dict[str, Callable[..., Any]] user_token_hooks: Dict[str, Callable[..., Any]] user_span_hooks: Dict[str, Callable[..., Any]] - tensor: np.ndarray[Any, np.dtype[np.float_]] + tensor: np.ndarray[Any, np.dtype[np.float64]] user_data: Dict[str, Any] has_unknown_spaces: bool _context: Any @@ -166,7 +166,7 @@ class Doc: ) -> Doc: ... def to_array( self, py_attr_ids: Union[int, str, List[Union[int, str]]] - ) -> np.ndarray[Any, np.dtype[np.float_]]: ... + ) -> np.ndarray[Any, np.dtype[np.float64]]: ... @staticmethod def from_docs( docs: List[Doc],