mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Remove old model shortcuts (#11916)
* Remove old model shortcuts * Remove error, docs warnings about shortcuts * Fix import in util Accidentally deleted the whole import and not just the old part... * Change universe example to v3 style * Switch ubuntu-latest to ubuntu-20.04 in main tests (#11928) * Switch ubuntu-latest to ubuntu-20.04 in main tests * Only use 20.04 for 3.6 * Update some model loading in Universe * Add v2 tag to neuralcoref * Use the spacy-version feature instead of a v2 tag Co-authored-by: svlandeg <svlandeg@github.com>
This commit is contained in:
		
							parent
							
								
									6b9af38eeb
								
							
						
					
					
						commit
						d60997febb
					
				| 
						 | 
					@ -8,7 +8,6 @@ from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from ..util import is_package, get_minor_version, run_command
 | 
					from ..util import is_package, get_minor_version, run_command
 | 
				
			||||||
from ..util import is_prerelease_version
 | 
					from ..util import is_prerelease_version
 | 
				
			||||||
from ..errors import OLD_MODEL_SHORTCUTS
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@app.command(
 | 
					@app.command(
 | 
				
			||||||
| 
						 | 
					@ -61,12 +60,6 @@ def download(
 | 
				
			||||||
        version = components[-1]
 | 
					        version = components[-1]
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model_name = model
 | 
					        model_name = model
 | 
				
			||||||
        if model in OLD_MODEL_SHORTCUTS:
 | 
					 | 
				
			||||||
            msg.warn(
 | 
					 | 
				
			||||||
                f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. Please "
 | 
					 | 
				
			||||||
                f"use the full pipeline package name '{OLD_MODEL_SHORTCUTS[model]}' instead."
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            model_name = OLD_MODEL_SHORTCUTS[model]
 | 
					 | 
				
			||||||
        compatibility = get_compatibility()
 | 
					        compatibility = get_compatibility()
 | 
				
			||||||
        version = get_version(model_name, compatibility)
 | 
					        version = get_version(model_name, compatibility)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -720,13 +720,6 @@ class Errors(metaclass=ErrorsWithCodes):
 | 
				
			||||||
            "method in component '{name}'. If you want to use this "
 | 
					            "method in component '{name}'. If you want to use this "
 | 
				
			||||||
            "method, make sure it's overwritten on the subclass.")
 | 
					            "method, make sure it's overwritten on the subclass.")
 | 
				
			||||||
    E940 = ("Found NaN values in scores.")
 | 
					    E940 = ("Found NaN values in scores.")
 | 
				
			||||||
    E941 = ("Can't find model '{name}'. It looks like you're trying to load a "
 | 
					 | 
				
			||||||
            "model from a shortcut, which is obsolete as of spaCy v3.0. To "
 | 
					 | 
				
			||||||
            "load the model, use its full name instead:\n\n"
 | 
					 | 
				
			||||||
            "nlp = spacy.load(\"{full}\")\n\nFor more details on the available "
 | 
					 | 
				
			||||||
            "models, see the models directory: https://spacy.io/models. If you "
 | 
					 | 
				
			||||||
            "want to create a blank model, use spacy.blank: "
 | 
					 | 
				
			||||||
            "nlp = spacy.blank(\"{name}\")")
 | 
					 | 
				
			||||||
    E942 = ("Executing `after_{name}` callback failed. Expected the function to "
 | 
					    E942 = ("Executing `after_{name}` callback failed. Expected the function to "
 | 
				
			||||||
            "return an initialized nlp object but got: {value}. Maybe "
 | 
					            "return an initialized nlp object but got: {value}. Maybe "
 | 
				
			||||||
            "you forgot to return the modified object in your function?")
 | 
					            "you forgot to return the modified object in your function?")
 | 
				
			||||||
| 
						 | 
					@ -955,15 +948,6 @@ class Errors(metaclass=ErrorsWithCodes):
 | 
				
			||||||
             "but got '{received_type}'")
 | 
					             "but got '{received_type}'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Deprecated model shortcuts, only used in errors and warnings
 | 
					 | 
				
			||||||
OLD_MODEL_SHORTCUTS = {
 | 
					 | 
				
			||||||
    "en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm",
 | 
					 | 
				
			||||||
    "pt": "pt_core_news_sm", "fr": "fr_core_news_sm", "it": "it_core_news_sm",
 | 
					 | 
				
			||||||
    "nl": "nl_core_news_sm", "el": "el_core_news_sm", "nb": "nb_core_news_sm",
 | 
					 | 
				
			||||||
    "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# fmt: on
 | 
					# fmt: on
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -40,7 +40,7 @@ except ImportError:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .symbols import ORTH
 | 
					from .symbols import ORTH
 | 
				
			||||||
from .compat import cupy, CudaStream, is_windows, importlib_metadata
 | 
					from .compat import cupy, CudaStream, is_windows, importlib_metadata
 | 
				
			||||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
 | 
					from .errors import Errors, Warnings
 | 
				
			||||||
from . import about
 | 
					from . import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if TYPE_CHECKING:
 | 
					if TYPE_CHECKING:
 | 
				
			||||||
| 
						 | 
					@ -427,8 +427,6 @@ def load_model(
 | 
				
			||||||
            return load_model_from_path(Path(name), **kwargs)  # type: ignore[arg-type]
 | 
					            return load_model_from_path(Path(name), **kwargs)  # type: ignore[arg-type]
 | 
				
			||||||
    elif hasattr(name, "exists"):  # Path or Path-like to model data
 | 
					    elif hasattr(name, "exists"):  # Path or Path-like to model data
 | 
				
			||||||
        return load_model_from_path(name, **kwargs)  # type: ignore[arg-type]
 | 
					        return load_model_from_path(name, **kwargs)  # type: ignore[arg-type]
 | 
				
			||||||
    if name in OLD_MODEL_SHORTCUTS:
 | 
					 | 
				
			||||||
        raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SHORTCUTS[name]))  # type: ignore[index]
 | 
					 | 
				
			||||||
    raise IOError(Errors.E050.format(name=name))
 | 
					    raise IOError(Errors.E050.format(name=name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -51,7 +51,7 @@ markup is correct.
 | 
				
			||||||
        "import spacy",
 | 
					        "import spacy",
 | 
				
			||||||
        "import package_name",
 | 
					        "import package_name",
 | 
				
			||||||
        "",
 | 
					        "",
 | 
				
			||||||
        "nlp = spacy.load('en')",
 | 
					        "nlp = spacy.load('en_core_web_sm')",
 | 
				
			||||||
        "nlp.add_pipe(package_name)"
 | 
					        "nlp.add_pipe(package_name)"
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    "code_language": "python",
 | 
					    "code_language": "python",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -342,22 +342,6 @@ The easiest way to download a trained pipeline is via spaCy's
 | 
				
			||||||
[`download`](/api/cli#download) command. It takes care of finding the
 | 
					[`download`](/api/cli#download) command. It takes care of finding the
 | 
				
			||||||
best-matching package compatible with your spaCy installation.
 | 
					best-matching package compatible with your spaCy installation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
> #### Important note for v3.0
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially
 | 
					 | 
				
			||||||
> brittle) symlinks in your spaCy installation are **deprecated**. To download
 | 
					 | 
				
			||||||
> and load an installed pipeline package, use its full name:
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> ```diff
 | 
					 | 
				
			||||||
> - python -m spacy download en
 | 
					 | 
				
			||||||
> + python -m spacy download en_core_web_sm
 | 
					 | 
				
			||||||
> ```
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> ```diff
 | 
					 | 
				
			||||||
> - nlp = spacy.load("en")
 | 
					 | 
				
			||||||
> + nlp = spacy.load("en_core_web_sm")
 | 
					 | 
				
			||||||
> ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```cli
 | 
					```cli
 | 
				
			||||||
# Download best-matching version of a package for your spaCy installation
 | 
					# Download best-matching version of a package for your spaCy installation
 | 
				
			||||||
$ python -m spacy download en_core_web_sm
 | 
					$ python -m spacy download en_core_web_sm
 | 
				
			||||||
| 
						 | 
					@ -489,17 +473,6 @@ spacy.cli.download("en_core_web_sm")
 | 
				
			||||||
To load a pipeline package, use [`spacy.load`](/api/top-level#spacy.load) with
 | 
					To load a pipeline package, use [`spacy.load`](/api/top-level#spacy.load) with
 | 
				
			||||||
the package name or a path to the data directory:
 | 
					the package name or a path to the data directory:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
> #### Important note for v3.0
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> Note that as of spaCy v3.0, shortcut links like `en` that create (potentially
 | 
					 | 
				
			||||||
> brittle) symlinks in your spaCy installation are **deprecated**. To download
 | 
					 | 
				
			||||||
> and load an installed pipeline package, use its full name:
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> ```diff
 | 
					 | 
				
			||||||
> - python -m spacy download en
 | 
					 | 
				
			||||||
> + python -m spacy download en_core_web_sm
 | 
					 | 
				
			||||||
> ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
import spacy
 | 
					import spacy
 | 
				
			||||||
nlp = spacy.load("en_core_web_sm")           # load package "en_core_web_sm"
 | 
					nlp = spacy.load("en_core_web_sm")           # load package "en_core_web_sm"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1021,7 +1021,8 @@
 | 
				
			||||||
            "author_links": {
 | 
					            "author_links": {
 | 
				
			||||||
                "github": "mholtzscher"
 | 
					                "github": "mholtzscher"
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "category": ["pipeline"]
 | 
					            "category": ["pipeline"],
 | 
				
			||||||
 | 
					            "spacy_version": 2
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "spacy-sentence-segmenter",
 | 
					            "id": "spacy-sentence-segmenter",
 | 
				
			||||||
| 
						 | 
					@ -1045,7 +1046,7 @@
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "spacy_cld",
 | 
					            "id": "spacy_cld",
 | 
				
			||||||
            "title": "spaCy-CLD",
 | 
					            "title": "spaCy-CLD",
 | 
				
			||||||
            "slogan": "Add language detection to your spaCy pipeline using CLD2",
 | 
					            "slogan": "Add language detection to your spaCy v2 pipeline using CLD2",
 | 
				
			||||||
            "description": "spaCy-CLD operates on `Doc` and `Span` spaCy objects. When called on a `Doc` or `Span`, the object is given two attributes: `languages` (a list of up to 3 language codes) and `language_scores` (a dictionary mapping language codes to confidence scores between 0 and 1).\n\nspacy-cld is a little extension that wraps the [PYCLD2](https://github.com/aboSamoor/pycld2) Python library, which in turn wraps the [Compact Language Detector 2](https://github.com/CLD2Owners/cld2) C library originally built at Google for the Chromium project. CLD2 uses character n-grams as features and a Naive Bayes classifier to identify 80+ languages from Unicode text strings (or XML/HTML). It can detect up to 3 different languages in a given document, and reports a confidence score (reported in with each language.",
 | 
					            "description": "spaCy-CLD operates on `Doc` and `Span` spaCy objects. When called on a `Doc` or `Span`, the object is given two attributes: `languages` (a list of up to 3 language codes) and `language_scores` (a dictionary mapping language codes to confidence scores between 0 and 1).\n\nspacy-cld is a little extension that wraps the [PYCLD2](https://github.com/aboSamoor/pycld2) Python library, which in turn wraps the [Compact Language Detector 2](https://github.com/CLD2Owners/cld2) C library originally built at Google for the Chromium project. CLD2 uses character n-grams as features and a Naive Bayes classifier to identify 80+ languages from Unicode text strings (or XML/HTML). It can detect up to 3 different languages in a given document, and reports a confidence score (reported in with each language.",
 | 
				
			||||||
            "github": "nickdavidhaynes/spacy-cld",
 | 
					            "github": "nickdavidhaynes/spacy-cld",
 | 
				
			||||||
            "pip": "spacy_cld",
 | 
					            "pip": "spacy_cld",
 | 
				
			||||||
| 
						 | 
					@ -1065,7 +1066,8 @@
 | 
				
			||||||
            "author_links": {
 | 
					            "author_links": {
 | 
				
			||||||
                "github": "nickdavidhaynes"
 | 
					                "github": "nickdavidhaynes"
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "category": ["pipeline"]
 | 
					            "category": ["pipeline"],
 | 
				
			||||||
 | 
					            "spacy_version": 2
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "spacy-iwnlp",
 | 
					            "id": "spacy-iwnlp",
 | 
				
			||||||
| 
						 | 
					@ -1139,7 +1141,8 @@
 | 
				
			||||||
                "github": "sammous"
 | 
					                "github": "sammous"
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "category": ["pipeline"],
 | 
					            "category": ["pipeline"],
 | 
				
			||||||
            "tags": ["pos", "lemmatizer", "french"]
 | 
					            "tags": ["pos", "lemmatizer", "french"],
 | 
				
			||||||
 | 
					            "spacy_version": 2
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "lemmy",
 | 
					            "id": "lemmy",
 | 
				
			||||||
| 
						 | 
					@ -1333,8 +1336,8 @@
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "neuralcoref",
 | 
					            "id": "neuralcoref",
 | 
				
			||||||
            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
 | 
					            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy v2",
 | 
				
			||||||
            "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source.",
 | 
					            "description": "This coreference resolution module is based on the super fast spaCy parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source, and v3+ is not supported.",
 | 
				
			||||||
            "github": "huggingface/neuralcoref",
 | 
					            "github": "huggingface/neuralcoref",
 | 
				
			||||||
            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
 | 
					            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
 | 
				
			||||||
            "code_example": [
 | 
					            "code_example": [
 | 
				
			||||||
| 
						 | 
					@ -1355,7 +1358,8 @@
 | 
				
			||||||
                "github": "huggingface"
 | 
					                "github": "huggingface"
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "category": ["standalone", "conversational", "models"],
 | 
					            "category": ["standalone", "conversational", "models"],
 | 
				
			||||||
            "tags": ["coref"]
 | 
					            "tags": ["coref"],
 | 
				
			||||||
 | 
					            "spacy_version": 2
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "neuralcoref-vizualizer",
 | 
					            "id": "neuralcoref-vizualizer",
 | 
				
			||||||
| 
						 | 
					@ -1431,7 +1435,7 @@
 | 
				
			||||||
                "import spacy",
 | 
					                "import spacy",
 | 
				
			||||||
                "import explacy",
 | 
					                "import explacy",
 | 
				
			||||||
                "",
 | 
					                "",
 | 
				
			||||||
                "nlp = spacy.load('en')",
 | 
					                "nlp = spacy.load('en_core_web_sm')",
 | 
				
			||||||
                "explacy.print_parse_info(nlp, 'The salad was surprisingly tasty.')"
 | 
					                "explacy.print_parse_info(nlp, 'The salad was surprisingly tasty.')"
 | 
				
			||||||
            ],
 | 
					            ],
 | 
				
			||||||
            "author": "Tyler Neylon",
 | 
					            "author": "Tyler Neylon",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user