From a61a1d43cf9573a9a7b6d6199732b5cc921f1135 Mon Sep 17 00:00:00 2001 From: DomHudson <10864294+DomHudson@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:45:50 +0100 Subject: [PATCH 01/29] [Documentation] Replace broken URL in _serialization.mdx (#13641) --- website/docs/usage/101/_serialization.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/101/_serialization.mdx b/website/docs/usage/101/_serialization.mdx index ce34ea6e9..d58c068eb 100644 --- a/website/docs/usage/101/_serialization.mdx +++ b/website/docs/usage/101/_serialization.mdx @@ -4,7 +4,7 @@ progress** – for example, everything that's in your `nlp` object. This means you'll have to translate its contents and structure into a format that can be saved, like a file or a byte string. This process is called serialization. spaCy comes with **built-in serialization methods** and supports the -[Pickle protocol](https://www.diveinto.org/python3/serializing.html#dump). +[Pickle protocol](https://docs.python.org/3/library/pickle.html). > #### What's pickle? > From 3a0aadcf86792e69c4dfa6e9f7932a3d06de4ca0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 10:16:35 +0200 Subject: [PATCH 02/29] Update spacy[apple] thinc-apple-ops pin for numpy v2 compatibility --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 855b7e683..baa62e792 100644 --- a/setup.cfg +++ b/setup.cfg @@ -116,7 +116,7 @@ cuda12x = cuda-autodetect = cupy-wheel>=11.0.0,<13.0.0 apple = - thinc-apple-ops>=0.1.0.dev0,<1.0.0 + thinc-apple-ops>=1.0.0,<2.0.0 # Language tokenizers with external dependencies ja = sudachipy>=0.5.2,!=0.6.1 From a8837beab7bc04d480bfa86736a693fcb8cc0731 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 12:37:11 +0200 Subject: [PATCH 03/29] Set version to v3.8.1 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 3ce6b5514..8266773b5 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.0" +__version__ = "3.8.1" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From 725ccbac391e0c688f1e78dbb76c0a03afc830de Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 12:38:02 +0200 Subject: [PATCH 04/29] Format --- spacy/language.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 57b17943a..e8e64a1fc 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -2143,7 +2143,9 @@ class Language: serializers["tokenizer"] = lambda p: self.tokenizer.to_disk( # type: ignore[union-attr] p, exclude=["vocab"] ) - serializers["meta.json"] = lambda p: srsly.write_json(p, _replace_numpy_floats(self.meta)) + serializers["meta.json"] = lambda p: srsly.write_json( + p, _replace_numpy_floats(self.meta) + ) serializers["config.cfg"] = lambda p: self.config.to_disk(p) for name, proc in self._components: if name in exclude: @@ -2257,7 +2259,9 @@ class Language: serializers: Dict[str, Callable[[], bytes]] = {} serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude) serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) # type: ignore[union-attr] - serializers["meta.json"] = lambda: srsly.json_dumps(_replace_numpy_floats(self.meta)) + serializers["meta.json"] = lambda: srsly.json_dumps( + _replace_numpy_floats(self.meta) + ) serializers["config.cfg"] = lambda: self.config.to_bytes() for name, proc in self._components: if name in exclude: @@ -2309,7 +2313,9 @@ class Language: def _replace_numpy_floats(meta_dict: dict) -> dict: - return convert_recursive(lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)) + return convert_recursive( + lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict) + ) @dataclass From 9c5b61bdffb239c88c98640b6aed2a0d8b797656 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 12:38:51 +0200 Subject: [PATCH 05/29] isort --- spacy/language.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/language.py b/spacy/language.py index e8e64a1fc..93840c922 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -9,7 +9,6 @@ from contextlib import ExitStack, contextmanager from copy import deepcopy from dataclasses import dataclass from itertools import chain, cycle -import numpy from pathlib import Path from timeit import default_timer as timer from typing import ( @@ -31,6 +30,7 @@ from typing import ( overload, ) +import numpy import srsly from cymem.cymem import Pool from thinc.api import Config, CupyOps, Optimizer, get_current_ops From ff81bfb8db8296ca117ff16552e3a47c23705eae Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 13:21:10 +0200 Subject: [PATCH 06/29] Update tests --- .github/workflows/tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0188606c0..b3923a8fe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: - name: Configure Python version uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.10" - name: black run: | @@ -61,14 +61,14 @@ jobs: os: [ubuntu-latest, windows-latest, macos-latest] python_version: ["3.12"] include: - - os: windows-latest - python_version: "3.7" - - os: macos-latest - python_version: "3.8" - - os: ubuntu-latest - python_version: "3.9" - os: windows-latest python_version: "3.10" + - os: macos-latest + python_version: "3.10" + - os: ubuntu-latest + python_version: "3.10" + - os: windows-latest + python_version: "3.11" - os: macos-latest python_version: "3.11" From f0084b91439a924b01ddc782ef6347a5a50ad93f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 15:28:22 +0200 Subject: [PATCH 07/29] Fix matrix in tests --- .github/workflows/tests.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b3923a8fe..7d04f400e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,18 +59,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.12"] - include: - - os: windows-latest - python_version: "3.10" - - os: macos-latest - python_version: "3.10" - - os: ubuntu-latest - python_version: "3.10" - - os: windows-latest - python_version: "3.11" - - os: macos-latest - python_version: "3.11" + python_version: ["3.9", "3.10", "3.11", "3.12"] runs-on: ${{ matrix.os }} From 6c038aaae0b95c3536bfd0bf15090b4758ae3766 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 15:32:01 +0200 Subject: [PATCH 08/29] Don't disable tests on workflow changes --- .github/workflows/tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d04f400e..6f7ff9254 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,6 @@ on: - "*.md" - "*.mdx" - "website/**" - - ".github/workflows/**" pull_request: types: [opened, synchronize, reopened, edited] paths-ignore: From e1d050517d736b215e7cdcbaccabf6f3f03bc505 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 15:56:18 +0200 Subject: [PATCH 09/29] Fix requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b61715a8f..c1e4a4508 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0 spacy-loggers>=1.0.0,<2.0.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.2.2,<8.3.0 +thinc>=8.3.0,<8.4.0 ml_datasets>=0.2.0,<0.3.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.2.0 From 924cbc970328af4fcaf0af9ac45bafc4f6d712b3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 16:08:06 +0200 Subject: [PATCH 10/29] Fix environment variable for test --- .github/workflows/tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6f7ff9254..ee2b191a5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -147,7 +147,9 @@ jobs: - name: "Test assemble CLI" run: | python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')" - PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir + python -m spacy assemble ner_source_sm.cfg output_dir + env: + PYTHONWARNINGS: "error,ignore::DeprecationWarning" if: matrix.python_version == '3.9' - name: "Test assemble CLI vectors warning" From 0cdcfe56cb4fc878896d04bed9b1b7ad847f8780 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 16:47:24 +0200 Subject: [PATCH 11/29] Set version to v3.8.2 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 8266773b5..9aabb20eb 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.1" +__version__ = "3.8.2" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From 63f1b53c1a09f3a77fa327813b24a86442291891 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 16:49:49 +0200 Subject: [PATCH 12/29] Check test failure --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ee2b191a5..7bb07754a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -58,7 +58,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.9", "3.10", "3.11", "3.12"] + python_version: ["3.9", "3.11", "3.12"] runs-on: ${{ matrix.os }} From dd47fbb45f2e4121ba2342c4b521118beaec954f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 22:24:25 +0200 Subject: [PATCH 13/29] Remove 'apple' extra --- .github/workflows/tests.yml | 5 ----- setup.cfg | 2 -- 2 files changed, 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0188606c0..8ee046240 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -177,8 +177,3 @@ jobs: python -m pytest --pyargs spacy -W error if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')" - - name: "Run CPU tests with thinc-apple-ops" - run: | - python -m pip install 'spacy[apple]' - python -m pytest --pyargs spacy - if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11' diff --git a/setup.cfg b/setup.cfg index 855b7e683..7b9bd7b28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -115,8 +115,6 @@ cuda12x = cupy-cuda12x>=11.5.0,<13.0.0 cuda-autodetect = cupy-wheel>=11.0.0,<13.0.0 -apple = - thinc-apple-ops>=0.1.0.dev0,<1.0.0 # Language tokenizers with external dependencies ja = sudachipy>=0.5.2,!=0.6.1 From 29232ad3b5ca5a19858fce1af1dd258fc181c91e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 22:51:09 +0200 Subject: [PATCH 14/29] Upd tests workflow --- .github/workflows/tests.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8ee046240..b546fe1b8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: - name: Configure Python version uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.10" - name: black run: | @@ -61,12 +61,8 @@ jobs: os: [ubuntu-latest, windows-latest, macos-latest] python_version: ["3.12"] include: - - os: windows-latest - python_version: "3.7" - - os: macos-latest - python_version: "3.8" - os: ubuntu-latest - python_version: "3.9" + python_version: "3.10" - os: windows-latest python_version: "3.10" - os: macos-latest From 5196366af56dbea18494e69abef93bf67b2f28de Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 22:53:11 +0200 Subject: [PATCH 15/29] Upd tests workflow --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b546fe1b8..219c2196d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.12"] + python_version: ["3.10"] include: - os: ubuntu-latest python_version: "3.10" From 77177d0216af47cefed53e1860ebf3c17d87fa8f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 22:54:12 +0200 Subject: [PATCH 16/29] Upd tests workflow --- .github/workflows/tests.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 219c2196d..1ccc04f4f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -60,13 +60,6 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] python_version: ["3.10"] - include: - - os: ubuntu-latest - python_version: "3.10" - - os: windows-latest - python_version: "3.10" - - os: macos-latest - python_version: "3.11" runs-on: ${{ matrix.os }} From 08705f5a8c3f4edc6fe55275a310e9af7cfd4316 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 22:57:25 +0200 Subject: [PATCH 17/29] Upd tests --- .github/workflows/tests.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1ccc04f4f..f5896fbec 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,6 @@ on: - "*.md" - "*.mdx" - "website/**" - - ".github/workflows/**" pull_request: types: [opened, synchronize, reopened, edited] paths-ignore: @@ -58,9 +57,17 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.10"] - + os: [ubuntu-latest, windows-latest, macos-13] + python_version: ["3.12"] + include: + - os: windows-latest + python_version: "3.9" + - os: macos-13 + python_version: "3.10" + - os: ubuntu-latest + python_version: "3.11" + - os: windows-latest + python_version: "3.11" runs-on: ${{ matrix.os }} steps: From 411b70f5f3b1bd5c7f6affbc36b4ddb7eaaed4d6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 23:46:54 +0200 Subject: [PATCH 18/29] Upd requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b61715a8f..9f01c215f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0 spacy-loggers>=1.0.0,<2.0.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.2.2,<8.3.0 +thinc>=9.0.0,<10.0.0 ml_datasets>=0.2.0,<0.3.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.2.0 From 5230754986e2d73c46cd409efc1bed9a590a011f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 1 Oct 2024 23:49:17 +0200 Subject: [PATCH 19/29] Fix thinc dependncy --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2edbadefc..5ee6894de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=8.3.0,<8.4.0", + "thinc>=9.1.0,<10.0.0", "numpy>=2.0.0,<2.1.0; python_version < '3.9'", "numpy>=2.0.0,<2.1.0; python_version >= '3.9'", ] From 628c973db5c096e476d7e2aa8eb07c897a7dcbd0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 2 Oct 2024 00:49:09 +0200 Subject: [PATCH 20/29] Note minimum python requirement in setup.cfg --- setup.cfg | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index baa62e792..5030729b7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,8 +17,6 @@ classifiers = Operating System :: Microsoft :: Windows Programming Language :: Cython Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -31,7 +29,7 @@ project_urls = [options] zip_safe = false include_package_data = true -python_requires = >=3.7 +python_requires = >=3.9 # NOTE: This section is superseded by pyproject.toml and will be removed in # spaCy v4 setup_requires = From bda4bb018495e87b0289ea383003e1b4a6791f6e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 2 Oct 2024 01:01:40 +0200 Subject: [PATCH 21/29] Try disabling pretraining tests to probe windows ci failure (#13646) --- .../{test_pretraining.py => test_pretraining.py.disabled} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename spacy/tests/training/{test_pretraining.py => test_pretraining.py.disabled} (100%) diff --git a/spacy/tests/training/test_pretraining.py b/spacy/tests/training/test_pretraining.py.disabled similarity index 100% rename from spacy/tests/training/test_pretraining.py rename to spacy/tests/training/test_pretraining.py.disabled From 10a6f508ab624cacdaa3443ec84284bd236739ab Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 11 Oct 2024 11:19:10 +0200 Subject: [PATCH 22/29] Fix landing banner links [ci skip] --- website/src/styles/landing.module.sass | 3 +++ 1 file changed, 3 insertions(+) diff --git a/website/src/styles/landing.module.sass b/website/src/styles/landing.module.sass index 5c2a0754b..6a703194c 100644 --- a/website/src/styles/landing.module.sass +++ b/website/src/styles/landing.module.sass @@ -87,6 +87,9 @@ margin-bottom: 0 height: 100% + a, a:hover + color: inherit + .banner-content-small display: block margin-bottom: 0 !important From 52a4cb0d148f80838277bc7435d64f32b81b4a06 Mon Sep 17 00:00:00 2001 From: sam rxh <118240781+samrxh@users.noreply.github.com> Date: Fri, 11 Oct 2024 04:20:34 -0500 Subject: [PATCH 23/29] Fix 'issue template' link in CONTRIBUTING.md (#13587) [ci skip] --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed75e1fd8..9407881a1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,7 +35,7 @@ so that more people can benefit from it. When opening an issue, use a **descriptive title** and include your **environment** (operating system, Python version, spaCy version). Our -[issue template](https://github.com/explosion/spaCy/issues/new) helps you +[issue templates](https://github.com/explosion/spaCy/issues/new/choose) help you remember the most important details to include. If you've discovered a bug, you can also submit a [regression test](#fixing-bugs) straight away. When you're opening an issue to report the bug, simply refer to your pull request in the From 44d190645307b331649c3081f0a294c6ad0d264d Mon Sep 17 00:00:00 2001 From: aravind-mc Date: Fri, 11 Oct 2024 14:51:57 +0530 Subject: [PATCH 24/29] Update universe.json to add my spaCy online course (#13632) [ci skip] --- website/meta/universe.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index 45b3f625c..92829bc56 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -2587,6 +2587,20 @@ "courses" ] }, + { + "type": "education", + "id": "spacy-quickstart", + "title": "spaCy Quickstart", + "slogan": "Learn spaCy basics quickly by visualizing various Doc objects", + "description":"In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library." + "url": "https://learnspacy.com/courses/spacy-quickstart/", + "image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png", + "thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png", + "author": "Aravind Mohanoor", + "category": [ + "courses" + ] + }, { "type": "education", "id": "video-spacys-ner-model", From 8d2902b0e72d0c6bb644fc6243e95d8487d12574 Mon Sep 17 00:00:00 2001 From: "Andrei (Andrey) Khropov" Date: Fri, 11 Oct 2024 11:23:12 +0200 Subject: [PATCH 25/29] Fix misspelling (#13631) [ci skip] --- spacy/lang/hr/lemma_lookup_license.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/lang/hr/lemma_lookup_license.txt b/spacy/lang/hr/lemma_lookup_license.txt index 04671e404..9cc003a11 100644 --- a/spacy/lang/hr/lemma_lookup_license.txt +++ b/spacy/lang/hr/lemma_lookup_license.txt @@ -1,5 +1,5 @@ The list of Croatian lemmas was extracted from the reldi-tagger repository (https://github.com/clarinsi/reldi-tagger). -Reldi-tagger is licesned under the Apache 2.0 licence. +Reldi-tagger is licensed under the Apache 2.0 licence. @InProceedings{ljubesic16-new, author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec}, @@ -12,4 +12,4 @@ Reldi-tagger is licesned under the Apache 2.0 licence. publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1} - } \ No newline at end of file + } From ae5c3e078dd5326dddf4cbd6cd2f102f56b22267 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 11 Oct 2024 11:24:42 +0200 Subject: [PATCH 26/29] Fix universe.json [ci skip] --- website/meta/universe.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 92829bc56..918a6db01 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -2592,7 +2592,7 @@ "id": "spacy-quickstart", "title": "spaCy Quickstart", "slogan": "Learn spaCy basics quickly by visualizing various Doc objects", - "description":"In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library." + "description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.", "url": "https://learnspacy.com/courses/spacy-quickstart/", "image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png", "thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png", From 0d7e57fc3e57acfeeadf6f4291b2d587dccac247 Mon Sep 17 00:00:00 2001 From: thjbdvlt <109964512+thjbdvlt@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:26:15 +0200 Subject: [PATCH 27/29] universe-pipeline-solipCysme-french (#13627) [ci skip] --- website/meta/universe.json | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index 918a6db01..b35423790 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -276,6 +276,47 @@ "ancient Greek" ] }, + { + "id": "solipcysme", + "title": "solipCysme", + "slogan": "spaCy pipeline for french fictions and first person point of view texts.", + "description": "__solipCysme__ is a pipeline for french language, designed for the analysis of fictions and first person point of view texts, with a focus on personal pronouns.", + "github": "thjbdvlt/solipCysme", + "code_example": [ + "pip install https://huggingface.co/thjbdvlt/fr_solipcysme/resolve/main/fr_solipcysme-any-py3-none-any.whl", + "", + "import spacy", + "", + "nlp = spacy.load('fr_solipcysme')", + "for i in nlp(", + "'la MACHINE à (b)rouiller le temps s'est peut-être déraillée..?'", + "):", + " print(", + " i, ", + " i.norm_, ", + " i.pos_, ", + " i.morph, ", + " i.lemma_, ", + " i.dep_, ", + " i._.tokentype,", + " i._.vv_pos,", + " i._.vv_morph", + " )" + ], + "code_language": "python", + "author": "thjbdvlt", + "author_links": { + "github": "thjbdvlt" + }, + "category": [ + "pipeline", + "research", + "models" + ], + "tags": [ + "french" + ] + }, { "id": "spacy-cleaner", "title": "spacy-cleaner", From 1ee9a1905972055d3760adbc9f7667fdeca9a081 Mon Sep 17 00:00:00 2001 From: Sergei Pashakhin Date: Wed, 23 Oct 2024 10:06:36 +0000 Subject: [PATCH 28/29] Fix typo (#13657) [ci skip] --- website/docs/api/large-language-models.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/api/large-language-models.mdx b/website/docs/api/large-language-models.mdx index cefd5c66e..6e2436cc1 100644 --- a/website/docs/api/large-language-models.mdx +++ b/website/docs/api/large-language-models.mdx @@ -1597,7 +1597,7 @@ The name of the model to be used has to be passed in via the `name` attribute. | Argument | Description | | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `name` | The name of a mdodel supported by LangChain for this API. ~~str~~ | +| `name` | The name of a model supported by LangChain for this API. ~~str~~ | | `config` | Configuration passed on to the LangChain model. Defaults to `{}`. ~~Dict[Any, Any]~~ | | `query` | Function that executes the prompts. If `None`, defaults to `spacy.CallLangChain.v1`. ~~Optional[Callable[["langchain.llms.BaseLLM", Iterable[Any]], Iterable[Any]]]~~ | From 15fbf5ef36689d925549d96cb9b9469b5140d7bb Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Wed, 23 Oct 2024 19:07:01 +0900 Subject: [PATCH 29/29] docs: update rule-based-matching.mdx (#13665) [ci skip] --- website/docs/usage/rule-based-matching.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/rule-based-matching.mdx b/website/docs/usage/rule-based-matching.mdx index e5b98da3a..8632f4573 100644 --- a/website/docs/usage/rule-based-matching.mdx +++ b/website/docs/usage/rule-based-matching.mdx @@ -720,7 +720,7 @@ matches = matcher(doc) # Serve visualization of sentences containing match with displaCy # set manual=True to make displaCy render straight from a dictionary -# (if you're not running the code within a Jupyer environment, you can +# (if you're not running the code within a Jupyter environment, you can # use displacy.serve instead) displacy.render(matched_sents, style="ent", manual=True) ```