Merge branch 'master' into docs/memory-management

2025-07-14 10:12:22 +03:00 · 2024-10-23 12:07:25 +02:00 · 2024-10-23 12:07:25 +02:00 · c0a6696cba
commit c0a6696cba
parent c3a28e6d34 15fbf5ef36
12 changed files with 82 additions and 30 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -12,7 +12,6 @@ on:
      - "*.md"
      - "*.mdx"
      - "website/**"
      - ".github/workflows/**"
  pull_request:
    types: [opened, synchronize, reopened, edited]
    paths-ignore:
@ -32,7 +31,7 @@ jobs:
      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
-          python-version: "3.7"
+          python-version: "3.10"
      - name: black
        run: |
@ -59,18 +58,7 @@ jobs:
      fail-fast: true
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
-        python_version: ["3.12"]
+        python_version: ["3.9", "3.11", "3.12"]
        include:
          - os: windows-latest
            python_version: "3.7"
          - os: macos-latest
            python_version: "3.8"
          - os: ubuntu-latest
            python_version: "3.9"
          - os: windows-latest
            python_version: "3.10"
          - os: macos-latest
            python_version: "3.11"
    runs-on: ${{ matrix.os }}
@ -159,7 +147,9 @@ jobs:
      - name: "Test assemble CLI"
        run: |
          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
-          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+          python -m spacy assemble ner_source_sm.cfg output_dir
        env:
          PYTHONWARNINGS: "error,ignore::DeprecationWarning" 
        if: matrix.python_version == '3.9'
      - name: "Test assemble CLI vectors warning"
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -35,7 +35,7 @@ so that more people can benefit from it.
 When opening an issue, use a **descriptive title** and include your
 **environment** (operating system, Python version, spaCy version). Our
-[issue template](https://github.com/explosion/spaCy/issues/new) helps you
+[issue templates](https://github.com/explosion/spaCy/issues/new/choose) help you
 remember the most important details to include. If you've discovered a bug, you
 can also submit a [regression test](#fixing-bugs) straight away. When you're
 opening an issue to report the bug, simply refer to your pull request in the
--- a/requirements.txt
+++ b/requirements.txt
@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.2.2,<8.3.0
+thinc>=8.3.0,<8.4.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
--- a/setup.cfg
+++ b/setup.cfg
@ -17,8 +17,6 @@ classifiers =
    Operating System :: Microsoft :: Windows
    Programming Language :: Cython
    Programming Language :: Python :: 3
    Programming Language :: Python :: 3.7
    Programming Language :: Python :: 3.8
    Programming Language :: Python :: 3.9
    Programming Language :: Python :: 3.10
    Programming Language :: Python :: 3.11
@ -31,7 +29,7 @@ project_urls =
 [options]
 zip_safe = false
 include_package_data = true
-python_requires = >=3.7
+python_requires = >=3.9
 # NOTE: This section is superseded by pyproject.toml and will be removed in
 # spaCy v4
 setup_requires =
@ -116,7 +114,7 @@ cuda12x =
 cuda-autodetect =
    cupy-wheel>=11.0.0,<13.0.0
 apple =
-    thinc-apple-ops>=0.1.0.dev0,<1.0.0
+    thinc-apple-ops>=1.0.0,<2.0.0
 # Language tokenizers with external dependencies
 ja =
    sudachipy>=0.5.2,!=0.6.1
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,5 +1,5 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.8.0"
+__version__ = "3.8.2"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
--- a/spacy/lang/hr/lemma_lookup_license.txt
+++ b/spacy/lang/hr/lemma_lookup_license.txt
@ -1,5 +1,5 @@
 The list of Croatian lemmas was extracted from the reldi-tagger repository (https://github.com/clarinsi/reldi-tagger).
-Reldi-tagger is licesned under the Apache 2.0 licence.
+Reldi-tagger is licensed under the Apache 2.0 licence.
@InProceedings{ljubesic16-new,
  author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec},
--- a/spacy/language.py
+++ b/spacy/language.py
@ -9,7 +9,6 @@ from contextlib import ExitStack, contextmanager
 from copy import deepcopy
 from dataclasses import dataclass
 from itertools import chain, cycle
 import numpy
 from pathlib import Path
 from timeit import default_timer as timer
 from typing import (
@ -31,6 +30,7 @@ from typing import (
    overload,
 )
 import numpy
 import srsly
 from cymem.cymem import Pool
 from thinc.api import Config, CupyOps, Optimizer, get_current_ops
@ -2143,7 +2143,9 @@ class Language:
        serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(  # type: ignore[union-attr]
            p, exclude=["vocab"]
        )
-        serializers["meta.json"] = lambda p: srsly.write_json(p, _replace_numpy_floats(self.meta))
+        serializers["meta.json"] = lambda p: srsly.write_json(
            p, _replace_numpy_floats(self.meta)
        )
        serializers["config.cfg"] = lambda p: self.config.to_disk(p)
        for name, proc in self._components:
            if name in exclude:
@ -2257,7 +2259,9 @@ class Language:
        serializers: Dict[str, Callable[[], bytes]] = {}
        serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])  # type: ignore[union-attr]
-        serializers["meta.json"] = lambda: srsly.json_dumps(_replace_numpy_floats(self.meta))
+        serializers["meta.json"] = lambda: srsly.json_dumps(
            _replace_numpy_floats(self.meta)
        )
        serializers["config.cfg"] = lambda: self.config.to_bytes()
        for name, proc in self._components:
            if name in exclude:
@ -2309,7 +2313,9 @@ class Language:
 def _replace_numpy_floats(meta_dict: dict) -> dict:
-    return convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict))
+    return convert_recursive(
        lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)
    )
@dataclass
--- a/spacy/tests/training/test_pretraining.py.disabled
+++ b/spacy/tests/training/test_pretraining.py.disabled
--- a/website/docs/api/large-language-models.mdx
+++ b/website/docs/api/large-language-models.mdx
@ -1597,7 +1597,7 @@ The name of the model to be used has to be passed in via the `name` attribute.
 | Argument | Description                                                                                                                                                           |
 | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`   | The name of a mdodel supported by LangChain for this API. ~~str~~                                                                                                     |
+| `name`   | The name of a model supported by LangChain for this API. ~~str~~                                                                                                     |
 | `config` | Configuration passed on to the LangChain model. Defaults to `{}`. ~~Dict[Any, Any]~~                                                                                  |
 | `query`  | Function that executes the prompts. If `None`, defaults to `spacy.CallLangChain.v1`. ~~Optional[Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]]~~ |
--- a/website/docs/usage/rule-based-matching.mdx
+++ b/website/docs/usage/rule-based-matching.mdx
@ -720,7 +720,7 @@ matches = matcher(doc)
 # Serve visualization of sentences containing match with displaCy
 # set manual=True to make displaCy render straight from a dictionary
-# (if you're not running the code within a Jupyer environment, you can
+# (if you're not running the code within a Jupyter environment, you can
 # use displacy.serve instead)
 displacy.render(matched_sents, style="ent", manual=True)
 ```
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -276,6 +276,47 @@
                "ancient Greek"
            ]
        },
        {
            "id": "solipcysme",
            "title": "solipCysme",
            "slogan": "spaCy pipeline for french fictions and first person point of view texts.",
            "description": "__solipCysme__ is a pipeline for french language, designed for the analysis of fictions and first person point of view texts, with a focus on personal pronouns.",
            "github": "thjbdvlt/solipCysme",
            "code_example": [
                "pip install https://huggingface.co/thjbdvlt/fr_solipcysme/resolve/main/fr_solipcysme-any-py3-none-any.whl",
                "",
                "import spacy",
                "",
                "nlp = spacy.load('fr_solipcysme')",
                "for i in nlp(",
                "'la MACHINE à (b)rouiller le temps s'est peut-être déraillée..?'",
                "):",
                "    print(",
                "        i, ",
                "        i.norm_, ",
                "        i.pos_, ",
                "        i.morph, ",
                "        i.lemma_, ",
                "        i.dep_, ",
                "        i._.tokentype,",
                "        i._.vv_pos,",
                "        i._.vv_morph",
                "    )"
            ],
            "code_language": "python",
            "author": "thjbdvlt",
            "author_links": {
                "github": "thjbdvlt"
            },
            "category": [
                "pipeline",
                "research",
                "models"
            ],
            "tags": [
                "french"
            ]
        },
        {
            "id": "spacy-cleaner",
            "title": "spacy-cleaner",
@ -2587,6 +2628,20 @@
                "courses"
            ]
        },
        {
            "type": "education",
            "id": "spacy-quickstart",
            "title": "spaCy Quickstart",
            "slogan": "Learn spaCy basics quickly by visualizing various Doc objects",
            "description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.",
            "url": "https://learnspacy.com/courses/spacy-quickstart/",
            "image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png",
            "thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png",
            "author": "Aravind Mohanoor",
            "category": [
                "courses"
            ]
        },
        {
            "type": "education",
            "id": "video-spacys-ner-model",
--- a/website/src/styles/landing.module.sass
+++ b/website/src/styles/landing.module.sass
@ -87,6 +87,9 @@
    margin-bottom: 0
    height: 100%
    a, a:hover
        color: inherit
 .banner-content-small
    display: block
    margin-bottom: 0 !important