From d60997febb1c2710543b46c18e3875317349dcf9 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Thu, 8 Dec 2022 19:45:52 +0900 Subject: [PATCH] Remove old model shortcuts (#11916) * Remove old model shortcuts * Remove error, docs warnings about shortcuts * Fix import in util Accidentally deleted the whole import and not just the old part... * Change universe example to v3 style * Switch ubuntu-latest to ubuntu-20.04 in main tests (#11928) * Switch ubuntu-latest to ubuntu-20.04 in main tests * Only use 20.04 for 3.6 * Update some model loading in Universe * Add v2 tag to neuralcoref * Use the spacy-version feature instead of a v2 tag Co-authored-by: svlandeg --- spacy/cli/download.py | 7 ------- spacy/errors.py | 16 ---------------- spacy/util.py | 4 +--- website/UNIVERSE.md | 2 +- website/docs/usage/models.md | 27 --------------------------- website/meta/universe.json | 20 ++++++++++++-------- 6 files changed, 14 insertions(+), 62 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 0c9a32b93..4c998a6e0 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -8,7 +8,6 @@ from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX from .. import about from ..util import is_package, get_minor_version, run_command from ..util import is_prerelease_version -from ..errors import OLD_MODEL_SHORTCUTS @app.command( @@ -61,12 +60,6 @@ def download( version = components[-1] else: model_name = model - if model in OLD_MODEL_SHORTCUTS: - msg.warn( - f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. Please " - f"use the full pipeline package name '{OLD_MODEL_SHORTCUTS[model]}' instead." - ) - model_name = OLD_MODEL_SHORTCUTS[model] compatibility = get_compatibility() version = get_version(model_name, compatibility) diff --git a/spacy/errors.py b/spacy/errors.py index 31230d7a4..9ad7d1292 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -720,13 +720,6 @@ class Errors(metaclass=ErrorsWithCodes): "method in component '{name}'. If you want to use this " "method, make sure it's overwritten on the subclass.") E940 = ("Found NaN values in scores.") - E941 = ("Can't find model '{name}'. It looks like you're trying to load a " - "model from a shortcut, which is obsolete as of spaCy v3.0. To " - "load the model, use its full name instead:\n\n" - "nlp = spacy.load(\"{full}\")\n\nFor more details on the available " - "models, see the models directory: https://spacy.io/models. If you " - "want to create a blank model, use spacy.blank: " - "nlp = spacy.blank(\"{name}\")") E942 = ("Executing `after_{name}` callback failed. Expected the function to " "return an initialized nlp object but got: {value}. Maybe " "you forgot to return the modified object in your function?") @@ -955,15 +948,6 @@ class Errors(metaclass=ErrorsWithCodes): "but got '{received_type}'") -# Deprecated model shortcuts, only used in errors and warnings -OLD_MODEL_SHORTCUTS = { - "en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm", - "pt": "pt_core_news_sm", "fr": "fr_core_news_sm", "it": "it_core_news_sm", - "nl": "nl_core_news_sm", "el": "el_core_news_sm", "nb": "nb_core_news_sm", - "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm" -} - - # fmt: on diff --git a/spacy/util.py b/spacy/util.py index 4bdde1ad1..d674fb9ce 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -40,7 +40,7 @@ except ImportError: from .symbols import ORTH from .compat import cupy, CudaStream, is_windows, importlib_metadata -from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS +from .errors import Errors, Warnings from . import about if TYPE_CHECKING: @@ -427,8 +427,6 @@ def load_model( return load_model_from_path(Path(name), **kwargs) # type: ignore[arg-type] elif hasattr(name, "exists"): # Path or Path-like to model data return load_model_from_path(name, **kwargs) # type: ignore[arg-type] - if name in OLD_MODEL_SHORTCUTS: - raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SHORTCUTS[name])) # type: ignore[index] raise IOError(Errors.E050.format(name=name)) diff --git a/website/UNIVERSE.md b/website/UNIVERSE.md index 770bbde13..c3e49ba43 100644 --- a/website/UNIVERSE.md +++ b/website/UNIVERSE.md @@ -51,7 +51,7 @@ markup is correct. "import spacy", "import package_name", "", - "nlp = spacy.load('en')", + "nlp = spacy.load('en_core_web_sm')", "nlp.add_pipe(package_name)" ], "code_language": "python", diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md index 3b1558bd8..03d0d535c 100644 --- a/website/docs/usage/models.md +++ b/website/docs/usage/models.md @@ -342,22 +342,6 @@ The easiest way to download a trained pipeline is via spaCy's [`download`](/api/cli#download) command. It takes care of finding the best-matching package compatible with your spaCy installation. -> #### Important note for v3.0 -> -> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially -> brittle) symlinks in your spaCy installation are **deprecated**. To download -> and load an installed pipeline package, use its full name: -> -> ```diff -> - python -m spacy download en -> + python -m spacy download en_core_web_sm -> ``` -> -> ```diff -> - nlp = spacy.load("en") -> + nlp = spacy.load("en_core_web_sm") -> ``` - ```cli # Download best-matching version of a package for your spaCy installation $ python -m spacy download en_core_web_sm @@ -489,17 +473,6 @@ spacy.cli.download("en_core_web_sm") To load a pipeline package, use [`spacy.load`](/api/top-level#spacy.load) with the package name or a path to the data directory: -> #### Important note for v3.0 -> -> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially -> brittle) symlinks in your spaCy installation are **deprecated**. To download -> and load an installed pipeline package, use its full name: -> -> ```diff -> - python -m spacy download en -> + python -m spacy download en_core_web_sm -> ``` - ```python import spacy nlp = spacy.load("en_core_web_sm") # load package "en_core_web_sm" diff --git a/website/meta/universe.json b/website/meta/universe.json index 97b53e9c5..168a39a5f 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1021,7 +1021,8 @@ "author_links": { "github": "mholtzscher" }, - "category": ["pipeline"] + "category": ["pipeline"], + "spacy_version": 2 }, { "id": "spacy-sentence-segmenter", @@ -1045,7 +1046,7 @@ { "id": "spacy_cld", "title": "spaCy-CLD", - "slogan": "Add language detection to your spaCy pipeline using CLD2", + "slogan": "Add language detection to your spaCy v2 pipeline using CLD2", "description": "spaCy-CLD operates on `Doc` and `Span` spaCy objects. When called on a `Doc` or `Span`, the object is given two attributes: `languages` (a list of up to 3 language codes) and `language_scores` (a dictionary mapping language codes to confidence scores between 0 and 1).\n\nspacy-cld is a little extension that wraps the [PYCLD2](https://github.com/aboSamoor/pycld2) Python library, which in turn wraps the [Compact Language Detector 2](https://github.com/CLD2Owners/cld2) C library originally built at Google for the Chromium project. CLD2 uses character n-grams as features and a Naive Bayes classifier to identify 80+ languages from Unicode text strings (or XML/HTML). It can detect up to 3 different languages in a given document, and reports a confidence score (reported in with each language.", "github": "nickdavidhaynes/spacy-cld", "pip": "spacy_cld", @@ -1065,7 +1066,8 @@ "author_links": { "github": "nickdavidhaynes" }, - "category": ["pipeline"] + "category": ["pipeline"], + "spacy_version": 2 }, { "id": "spacy-iwnlp", @@ -1139,7 +1141,8 @@ "github": "sammous" }, "category": ["pipeline"], - "tags": ["pos", "lemmatizer", "french"] + "tags": ["pos", "lemmatizer", "french"], + "spacy_version": 2 }, { "id": "lemmy", @@ -1333,8 +1336,8 @@ }, { "id": "neuralcoref", - "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy", - "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source.", + "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy v2", + "description": "This coreference resolution module is based on the super fast spaCy parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source, and v3+ is not supported.", "github": "huggingface/neuralcoref", "thumb": "https://i.imgur.com/j6FO9O6.jpg", "code_example": [ @@ -1355,7 +1358,8 @@ "github": "huggingface" }, "category": ["standalone", "conversational", "models"], - "tags": ["coref"] + "tags": ["coref"], + "spacy_version": 2 }, { "id": "neuralcoref-vizualizer", @@ -1431,7 +1435,7 @@ "import spacy", "import explacy", "", - "nlp = spacy.load('en')", + "nlp = spacy.load('en_core_web_sm')", "explacy.print_parse_info(nlp, 'The salad was surprisingly tasty.')" ], "author": "Tyler Neylon",