mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Merge branch 'develop' into merge-develop-into-v4
This commit is contained in:
		
						commit
						977b847cce
					
				|  | @ -6,7 +6,6 @@ requires = [ | |||
|     "preshed>=3.0.2,<3.1.0", | ||||
|     "murmurhash>=0.28.0,<1.1.0", | ||||
|     "thinc>=8.1.0,<8.2.0", | ||||
|     "pathy", | ||||
|     "numpy>=1.15.0", | ||||
| ] | ||||
| build-backend = "setuptools.build_meta" | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| # Our libraries | ||||
| spacy-legacy>=3.0.9,<3.1.0 | ||||
| spacy-legacy>=3.0.10,<3.1.0 | ||||
| spacy-loggers>=1.0.0,<2.0.0 | ||||
| cymem>=2.0.2,<2.1.0 | ||||
| preshed>=3.0.2,<3.1.0 | ||||
|  | @ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64' | |||
| types-dataclasses>=0.1.3; python_version < "3.7" | ||||
| types-mock>=0.1.1 | ||||
| types-requests | ||||
| types-setuptools>=57.0.0 | ||||
| black>=22.0,<23.0 | ||||
|  |  | |||
|  | @ -33,7 +33,7 @@ include_package_data = true | |||
| python_requires = >=3.6 | ||||
| install_requires = | ||||
|     # Our libraries | ||||
|     spacy-legacy>=3.0.9,<3.1.0 | ||||
|     spacy-legacy>=3.0.10,<3.1.0 | ||||
|     spacy-loggers>=1.0.0,<2.0.0 | ||||
|     murmurhash>=0.28.0,<1.1.0 | ||||
|     cymem>=2.0.2,<2.1.0 | ||||
|  | @ -42,9 +42,9 @@ install_requires = | |||
|     wasabi>=0.9.1,<1.1.0 | ||||
|     srsly>=2.4.3,<3.0.0 | ||||
|     catalogue>=2.0.6,<2.1.0 | ||||
|     # Third-party dependencies | ||||
|     typer>=0.3.0,<0.5.0 | ||||
|     pathy>=0.3.5 | ||||
|     # Third-party dependencies | ||||
|     tqdm>=4.38.0,<5.0.0 | ||||
|     numpy>=1.15.0 | ||||
|     requests>=2.13.0,<3.0.0 | ||||
|  |  | |||
|  | @ -31,21 +31,21 @@ def load( | |||
|     name: Union[str, Path], | ||||
|     *, | ||||
|     vocab: Union[Vocab, bool] = True, | ||||
|     disable: Iterable[str] = util.SimpleFrozenList(), | ||||
|     enable: Iterable[str] = util.SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = util.SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = util.SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = util.SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = util.SimpleFrozenList(), | ||||
|     config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(), | ||||
| ) -> Language: | ||||
|     """Load a spaCy model from an installed package or a local path. | ||||
| 
 | ||||
|     name (str): Package name or model path. | ||||
|     vocab (Vocab): A Vocab object. If True, a vocab is created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. Disabled | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled | ||||
|         pipes will be loaded but they won't be run unless you explicitly | ||||
|         enable them by calling nlp.enable_pipe. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|         pipes will be disabled (but can be enabled later using nlp.enable_pipe). | ||||
|     exclude (Iterable[str]): Names of pipeline components to exclude. Excluded | ||||
|     exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded | ||||
|         components won't be loaded. | ||||
|     config (Dict[str, Any] / Config): Config overrides as nested dict or dict | ||||
|         keyed by section values in dot notation. | ||||
|  |  | |||
|  | @ -20,7 +20,7 @@ def download_cli( | |||
|     ctx: typer.Context, | ||||
|     model: str = Arg(..., help="Name of pipeline package to download"), | ||||
|     direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"), | ||||
|     sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel") | ||||
|     sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"), | ||||
|     # fmt: on | ||||
| ): | ||||
|     """ | ||||
|  | @ -36,7 +36,12 @@ def download_cli( | |||
|     download(model, direct, sdist, *ctx.args) | ||||
| 
 | ||||
| 
 | ||||
| def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None: | ||||
| def download( | ||||
|     model: str, | ||||
|     direct: bool = False, | ||||
|     sdist: bool = False, | ||||
|     *pip_args, | ||||
| ) -> None: | ||||
|     if ( | ||||
|         not (is_package("spacy") or is_package("spacy-nightly")) | ||||
|         and "--no-deps" not in pip_args | ||||
|  | @ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) - | |||
|             "dependencies, you'll have to install them manually." | ||||
|         ) | ||||
|         pip_args = pip_args + ("--no-deps",) | ||||
|     suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX | ||||
|     dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}" | ||||
|     if direct: | ||||
|         components = model.split("-") | ||||
|         model_name = "".join(components[:-1]) | ||||
|         version = components[-1] | ||||
|         download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args) | ||||
|     else: | ||||
|         model_name = model | ||||
|         if model in OLD_MODEL_SHORTCUTS: | ||||
|  | @ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) - | |||
|             model_name = OLD_MODEL_SHORTCUTS[model] | ||||
|         compatibility = get_compatibility() | ||||
|         version = get_version(model_name, compatibility) | ||||
|         download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args) | ||||
| 
 | ||||
|     filename = get_model_filename(model_name, version, sdist) | ||||
| 
 | ||||
|     download_model(filename, pip_args) | ||||
|     msg.good( | ||||
|         "Download and installation successful", | ||||
|         f"You can now load the package via spacy.load('{model_name}')", | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str: | ||||
|     dl_tpl = "{m}-{v}/{m}-{v}{s}" | ||||
|     egg_tpl = "#egg={m}=={v}" | ||||
|     suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX | ||||
|     filename = dl_tpl.format(m=model_name, v=version, s=suffix) | ||||
|     if sdist: | ||||
|         filename += egg_tpl.format(m=model_name, v=version) | ||||
|     return filename | ||||
| 
 | ||||
| 
 | ||||
| def get_compatibility() -> dict: | ||||
|     if is_prerelease_version(about.__version__): | ||||
|         version: Optional[str] = about.__version__ | ||||
|  | @ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str: | |||
|     return comp[model][0] | ||||
| 
 | ||||
| 
 | ||||
| def get_latest_version(model: str) -> str: | ||||
|     comp = get_compatibility() | ||||
|     return get_version(model, comp) | ||||
| 
 | ||||
| 
 | ||||
| def download_model( | ||||
|     filename: str, user_pip_args: Optional[Sequence[str]] = None | ||||
| ) -> None: | ||||
|  |  | |||
|  | @ -1,10 +1,13 @@ | |||
| from typing import Optional, Dict, Any, Union, List | ||||
| import platform | ||||
| import pkg_resources | ||||
| import json | ||||
| from pathlib import Path | ||||
| from wasabi import Printer, MarkdownRenderer | ||||
| import srsly | ||||
| 
 | ||||
| from ._util import app, Arg, Opt, string_to_list | ||||
| from .download import get_model_filename, get_latest_version | ||||
| from .. import util | ||||
| from .. import about | ||||
| 
 | ||||
|  | @ -16,6 +19,7 @@ def info_cli( | |||
|     markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"), | ||||
|     silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"), | ||||
|     exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"), | ||||
|     url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"), | ||||
|     # fmt: on | ||||
| ): | ||||
|     """ | ||||
|  | @ -23,10 +27,19 @@ def info_cli( | |||
|     print its meta information. Flag --markdown prints details in Markdown for easy | ||||
|     copy-pasting to GitHub issues. | ||||
| 
 | ||||
|     Flag --url prints only the download URL of the most recent compatible | ||||
|     version of the pipeline. | ||||
| 
 | ||||
|     DOCS: https://spacy.io/api/cli#info | ||||
|     """ | ||||
|     exclude = string_to_list(exclude) | ||||
|     info(model, markdown=markdown, silent=silent, exclude=exclude) | ||||
|     info( | ||||
|         model, | ||||
|         markdown=markdown, | ||||
|         silent=silent, | ||||
|         exclude=exclude, | ||||
|         url=url, | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def info( | ||||
|  | @ -35,11 +48,20 @@ def info( | |||
|     markdown: bool = False, | ||||
|     silent: bool = True, | ||||
|     exclude: Optional[List[str]] = None, | ||||
|     url: bool = False, | ||||
| ) -> Union[str, dict]: | ||||
|     msg = Printer(no_print=silent, pretty=not silent) | ||||
|     if not exclude: | ||||
|         exclude = [] | ||||
|     if model: | ||||
|     if url: | ||||
|         if model is not None: | ||||
|             title = f"Download info for pipeline '{model}'" | ||||
|             data = info_model_url(model) | ||||
|             print(data["download_url"]) | ||||
|             return data | ||||
|         else: | ||||
|             msg.fail("--url option requires a pipeline name", exits=1) | ||||
|     elif model: | ||||
|         title = f"Info about pipeline '{model}'" | ||||
|         data = info_model(model, silent=silent) | ||||
|     else: | ||||
|  | @ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]: | |||
|         meta["source"] = str(model_path.resolve()) | ||||
|     else: | ||||
|         meta["source"] = str(model_path) | ||||
|     download_url = info_installed_model_url(model) | ||||
|     if download_url: | ||||
|         meta["download_url"] = download_url | ||||
|     return { | ||||
|         k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed") | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def info_installed_model_url(model: str) -> Optional[str]: | ||||
|     """Given a pipeline name, get the download URL if available, otherwise | ||||
|     return None. | ||||
| 
 | ||||
|     This is only available for pipelines installed as modules that have | ||||
|     dist-info available. | ||||
|     """ | ||||
|     try: | ||||
|         dist = pkg_resources.get_distribution(model) | ||||
|         data = json.loads(dist.get_metadata("direct_url.json")) | ||||
|         return data["url"] | ||||
|     except pkg_resources.DistributionNotFound: | ||||
|         # no such package | ||||
|         return None | ||||
|     except Exception: | ||||
|         # something else, like no file or invalid JSON | ||||
|         return None | ||||
| 
 | ||||
| def info_model_url(model: str) -> Dict[str, Any]: | ||||
|     """Return the download URL for the latest version of a pipeline.""" | ||||
|     version = get_latest_version(model) | ||||
| 
 | ||||
|     filename = get_model_filename(model, version) | ||||
|     download_url = about.__download_url__ + "/" + filename | ||||
|     release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}" | ||||
|     release_url = release_tpl.format(m=model, v=version) | ||||
|     return {"download_url": download_url, "release_url": release_url} | ||||
| 
 | ||||
| 
 | ||||
| def get_markdown( | ||||
|     data: Dict[str, Any], | ||||
|     title: Optional[str] = None, | ||||
|  |  | |||
|  | @ -230,8 +230,9 @@ class Errors(metaclass=ErrorsWithCodes): | |||
|             "initialized component.") | ||||
|     E004 = ("Can't set up pipeline component: a factory for '{name}' already " | ||||
|             "exists. Existing factory: {func}. New factory: {new_func}") | ||||
|     E005 = ("Pipeline component '{name}' returned None. If you're using a " | ||||
|             "custom component, maybe you forgot to return the processed Doc?") | ||||
|     E005 = ("Pipeline component '{name}' returned {returned_type} instead of a " | ||||
|             "Doc. If you're using a custom component, maybe you forgot to " | ||||
|             "return the processed Doc?") | ||||
|     E006 = ("Invalid constraints for adding pipeline component. You can only " | ||||
|             "set one of the following: before (component name or index), " | ||||
|             "after (component name or index), first (True) or last (True). " | ||||
|  |  | |||
|  | @ -72,10 +72,10 @@ class CatalanLemmatizer(Lemmatizer): | |||
|                         oov_forms.append(form) | ||||
|         if not forms: | ||||
|             forms.extend(oov_forms) | ||||
|         if not forms and string in lookup_table.keys(): | ||||
|             forms.append(self.lookup_lemmatize(token)[0]) | ||||
| 
 | ||||
|         # use lookups, and fall back to the token itself | ||||
|         if not forms: | ||||
|             forms.append(string) | ||||
|             forms.append(lookup_table.get(string, [string])[0]) | ||||
|         forms = list(dict.fromkeys(forms)) | ||||
|         self.cache[cache_key] = forms | ||||
|         return forms | ||||
|  |  | |||
|  | @ -53,11 +53,16 @@ class FrenchLemmatizer(Lemmatizer): | |||
|         rules = rules_table.get(univ_pos, []) | ||||
|         string = string.lower() | ||||
|         forms = [] | ||||
|         # first try lookup in table based on upos | ||||
|         if string in index: | ||||
|             forms.append(string) | ||||
|             self.cache[cache_key] = forms | ||||
|             return forms | ||||
| 
 | ||||
|         # then add anything in the exceptions table | ||||
|         forms.extend(exceptions.get(string, [])) | ||||
| 
 | ||||
|         # if nothing found yet, use the rules | ||||
|         oov_forms = [] | ||||
|         if not forms: | ||||
|             for old, new in rules: | ||||
|  | @ -69,12 +74,14 @@ class FrenchLemmatizer(Lemmatizer): | |||
|                         forms.append(form) | ||||
|                     else: | ||||
|                         oov_forms.append(form) | ||||
| 
 | ||||
|         # if still nothing, add the oov forms from rules | ||||
|         if not forms: | ||||
|             forms.extend(oov_forms) | ||||
|         if not forms and string in lookup_table.keys(): | ||||
|             forms.append(self.lookup_lemmatize(token)[0]) | ||||
| 
 | ||||
|         # use lookups, which fall back to the token itself | ||||
|         if not forms: | ||||
|             forms.append(string) | ||||
|             forms.append(lookup_table.get(string, [string])[0]) | ||||
|         forms = list(dict.fromkeys(forms)) | ||||
|         self.cache[cache_key] = forms | ||||
|         return forms | ||||
|  |  | |||
							
								
								
									
										18
									
								
								spacy/lang/la/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								spacy/lang/la/__init__.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| from ...language import Language, BaseDefaults | ||||
| from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS | ||||
| from .stop_words import STOP_WORDS | ||||
| from .lex_attrs import LEX_ATTRS | ||||
| 
 | ||||
| 
 | ||||
| class LatinDefaults(BaseDefaults): | ||||
|     tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|     stop_words = STOP_WORDS | ||||
|     lex_attr_getters = LEX_ATTRS | ||||
| 
 | ||||
| 
 | ||||
| class Latin(Language): | ||||
|     lang = "la" | ||||
|     Defaults = LatinDefaults | ||||
| 
 | ||||
| 
 | ||||
| __all__ = ["Latin"] | ||||
							
								
								
									
										34
									
								
								spacy/lang/la/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								spacy/lang/la/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,34 @@ | |||
| from ...attrs import LIKE_NUM | ||||
| import re | ||||
| 
 | ||||
| # cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4 | ||||
| roman_numerals_compile = re.compile( | ||||
|     r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$" | ||||
| ) | ||||
| 
 | ||||
| _num_words = set( | ||||
|     """ | ||||
| unus una unum duo duae tres tria quattuor quinque sex septem octo novem decem | ||||
| """.split() | ||||
| ) | ||||
| 
 | ||||
| _ordinal_words = set( | ||||
|     """ | ||||
| primus prima primum secundus secunda secundum tertius tertia tertium | ||||
| """.split() | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def like_num(text): | ||||
|     if text.isdigit(): | ||||
|         return True | ||||
|     if roman_numerals_compile.match(text): | ||||
|         return True | ||||
|     if text.lower() in _num_words: | ||||
|         return True | ||||
|     if text.lower() in _ordinal_words: | ||||
|         return True | ||||
|     return False | ||||
| 
 | ||||
| 
 | ||||
| LEX_ATTRS = {LIKE_NUM: like_num} | ||||
							
								
								
									
										37
									
								
								spacy/lang/la/stop_words.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								spacy/lang/la/stop_words.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,37 @@ | |||
| # Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin | ||||
| 
 | ||||
| STOP_WORDS = set( | ||||
|     """ | ||||
| ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem  | ||||
| 
 | ||||
| cum cur  | ||||
| 
 | ||||
| de deinde dum  | ||||
| 
 | ||||
| ego enim ergo es est et etiam etsi ex  | ||||
| 
 | ||||
| fio  | ||||
| 
 | ||||
| haud hic  | ||||
| 
 | ||||
| iam idem igitur ille in infra inter interim ipse is ita  | ||||
| 
 | ||||
| magis modo mox  | ||||
| 
 | ||||
| nam ne nec necque neque nisi non nos  | ||||
| 
 | ||||
| o ob  | ||||
| 
 | ||||
| per possum post pro  | ||||
| 
 | ||||
| quae quam quare qui quia quicumque quidem quilibet quis quisnam quisquam quisque quisquis quo quoniam  | ||||
| 
 | ||||
| sed si sic sive sub sui sum super suus  | ||||
| 
 | ||||
| tam tamen trans tu tum  | ||||
| 
 | ||||
| ubi uel uero | ||||
| 
 | ||||
| vel vero | ||||
| """.split() | ||||
| ) | ||||
							
								
								
									
										76
									
								
								spacy/lang/la/tokenizer_exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								spacy/lang/la/tokenizer_exceptions.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,76 @@ | |||
| from ..tokenizer_exceptions import BASE_EXCEPTIONS | ||||
| from ...symbols import ORTH | ||||
| from ...util import update_exc | ||||
| 
 | ||||
| 
 | ||||
| ## TODO: Look into systematically handling u/v | ||||
| _exc = { | ||||
|     "mecum": [{ORTH: "me"}, {ORTH: "cum"}], | ||||
|     "tecum": [{ORTH: "te"}, {ORTH: "cum"}], | ||||
|     "nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}], | ||||
|     "vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}], | ||||
|     "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}], | ||||
| } | ||||
| 
 | ||||
| for orth in [ | ||||
|     "A.", | ||||
|     "Agr.", | ||||
|     "Ap.", | ||||
|     "C.", | ||||
|     "Cn.", | ||||
|     "D.", | ||||
|     "F.", | ||||
|     "K.", | ||||
|     "L.", | ||||
|     "M'.", | ||||
|     "M.", | ||||
|     "Mam.", | ||||
|     "N.", | ||||
|     "Oct.", | ||||
|     "Opet.", | ||||
|     "P.", | ||||
|     "Paul.", | ||||
|     "Post.", | ||||
|     "Pro.", | ||||
|     "Q.", | ||||
|     "S.", | ||||
|     "Ser.", | ||||
|     "Sert.", | ||||
|     "Sex.", | ||||
|     "St.", | ||||
|     "Sta.", | ||||
|     "T.", | ||||
|     "Ti.", | ||||
|     "V.", | ||||
|     "Vol.", | ||||
|     "Vop.", | ||||
|     "U.", | ||||
|     "Uol.", | ||||
|     "Uop.", | ||||
|     "Ian.", | ||||
|     "Febr.", | ||||
|     "Mart.", | ||||
|     "Apr.", | ||||
|     "Mai.", | ||||
|     "Iun.", | ||||
|     "Iul.", | ||||
|     "Aug.", | ||||
|     "Sept.", | ||||
|     "Oct.", | ||||
|     "Nov.", | ||||
|     "Nou.", | ||||
|     "Dec.", | ||||
|     "Non.", | ||||
|     "Id.", | ||||
|     "A.D.", | ||||
|     "Coll.", | ||||
|     "Cos.", | ||||
|     "Ord.", | ||||
|     "Pl.", | ||||
|     "S.C.", | ||||
|     "Suff.", | ||||
|     "Trib.", | ||||
| ]: | ||||
|     _exc[orth] = [{ORTH: orth}] | ||||
| 
 | ||||
| TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) | ||||
|  | @ -1028,8 +1028,8 @@ class Language: | |||
|                 raise ValueError(Errors.E109.format(name=name)) from e | ||||
|             except Exception as e: | ||||
|                 error_handler(name, proc, [doc], e) | ||||
|             if doc is None: | ||||
|                 raise ValueError(Errors.E005.format(name=name)) | ||||
|             if not isinstance(doc, Doc): | ||||
|                 raise ValueError(Errors.E005.format(name=name, returned_type=type(doc))) | ||||
|         return doc | ||||
| 
 | ||||
|     def disable_pipes(self, *names) -> "DisabledPipes": | ||||
|  | @ -1063,7 +1063,7 @@ class Language: | |||
|         """ | ||||
|         if enable is None and disable is None: | ||||
|             raise ValueError(Errors.E991) | ||||
|         if disable is not None and isinstance(disable, str): | ||||
|         if isinstance(disable, str): | ||||
|             disable = [disable] | ||||
|         if enable is not None: | ||||
|             if isinstance(enable, str): | ||||
|  | @ -1698,9 +1698,9 @@ class Language: | |||
|         config: Union[Dict[str, Any], Config] = {}, | ||||
|         *, | ||||
|         vocab: Union[Vocab, bool] = True, | ||||
|         disable: Iterable[str] = SimpleFrozenList(), | ||||
|         enable: Iterable[str] = SimpleFrozenList(), | ||||
|         exclude: Iterable[str] = SimpleFrozenList(), | ||||
|         disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|         enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|         exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|         meta: Dict[str, Any] = SimpleFrozenDict(), | ||||
|         auto_fill: bool = True, | ||||
|         validate: bool = True, | ||||
|  | @ -1711,12 +1711,12 @@ class Language: | |||
| 
 | ||||
|         config (Dict[str, Any] / Config): The loaded config. | ||||
|         vocab (Vocab): A Vocab object. If True, a vocab is created. | ||||
|         disable (Iterable[str]): Names of pipeline components to disable. | ||||
|         disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. | ||||
|             Disabled pipes will be loaded but they won't be run unless you | ||||
|             explicitly enable them by calling nlp.enable_pipe. | ||||
|         enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|         enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|             pipes will be disabled (and can be enabled using `nlp.enable_pipe`). | ||||
|         exclude (Iterable[str]): Names of pipeline components to exclude. | ||||
|         exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. | ||||
|             Excluded components won't be loaded. | ||||
|         meta (Dict[str, Any]): Meta overrides for nlp.meta. | ||||
|         auto_fill (bool): Automatically fill in missing values in config based | ||||
|  | @ -1727,6 +1727,12 @@ class Language: | |||
| 
 | ||||
|         DOCS: https://spacy.io/api/language#from_config | ||||
|         """ | ||||
|         if isinstance(disable, str): | ||||
|             disable = [disable] | ||||
|         if isinstance(enable, str): | ||||
|             enable = [enable] | ||||
|         if isinstance(exclude, str): | ||||
|             exclude = [exclude] | ||||
|         if auto_fill: | ||||
|             config = Config( | ||||
|                 cls.default_config, section_order=CONFIG_SECTION_ORDER | ||||
|  | @ -2031,25 +2037,29 @@ class Language: | |||
| 
 | ||||
|     @staticmethod | ||||
|     def _resolve_component_status( | ||||
|         disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str] | ||||
|         disable: Union[str, Iterable[str]], | ||||
|         enable: Union[str, Iterable[str]], | ||||
|         pipe_names: Iterable[str], | ||||
|     ) -> Tuple[str, ...]: | ||||
|         """Derives whether (1) `disable` and `enable` values are consistent and (2) | ||||
|         resolves those to a single set of disabled components. Raises an error in | ||||
|         case of inconsistency. | ||||
| 
 | ||||
|         disable (Iterable[str]): Names of components or serialization fields to disable. | ||||
|         enable (Iterable[str]): Names of pipeline components to enable. | ||||
|         disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable. | ||||
|         enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. | ||||
|         pipe_names (Iterable[str]): Names of all pipeline components. | ||||
| 
 | ||||
|         RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t. | ||||
|                                    specified includes and excludes. | ||||
|         """ | ||||
| 
 | ||||
|         if disable is not None and isinstance(disable, str): | ||||
|         if isinstance(disable, str): | ||||
|             disable = [disable] | ||||
|         to_disable = disable | ||||
| 
 | ||||
|         if enable: | ||||
|             if isinstance(enable, str): | ||||
|                 enable = [enable] | ||||
|             to_disable = [ | ||||
|                 pipe_name for pipe_name in pipe_names if pipe_name not in enable | ||||
|             ] | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| # cython: infer_types=True, cython: profile=True | ||||
| from typing import List | ||||
| from typing import List, Iterable | ||||
| 
 | ||||
| from libcpp.vector cimport vector | ||||
| from libc.stdint cimport int32_t, int8_t | ||||
|  | @ -868,20 +868,27 @@ class _SetPredicate: | |||
| 
 | ||||
|     def __call__(self, Token token): | ||||
|         if self.is_extension: | ||||
|             value = get_string_id(token._.get(self.attr)) | ||||
|             value = token._.get(self.attr) | ||||
|         else: | ||||
|             value = get_token_attr_for_matcher(token.c, self.attr) | ||||
| 
 | ||||
|         if self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"): | ||||
|         if self.predicate in ("IN", "NOT_IN"): | ||||
|             if isinstance(value, (str, int)): | ||||
|                 value = get_string_id(value) | ||||
|             else: | ||||
|                 return False | ||||
|         elif self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"): | ||||
|             # ensure that all values are enclosed in a set | ||||
|             if self.attr == MORPH: | ||||
|                 # break up MORPH into individual Feat=Val values | ||||
|                 value = set(get_string_id(v) for v in MorphAnalysis.from_id(self.vocab, value)) | ||||
|             elif isinstance(value, (str, int)): | ||||
|                 value = set((get_string_id(value),)) | ||||
|             elif isinstance(value, Iterable) and all(isinstance(v, (str, int)) for v in value): | ||||
|                 value = set(get_string_id(v) for v in value) | ||||
|             else: | ||||
|                 # treat a single value as a list | ||||
|                 if isinstance(value, (str, int)): | ||||
|                     value = set([get_string_id(value)]) | ||||
|                 else: | ||||
|                     value = set(get_string_id(v) for v in value) | ||||
|                 return False | ||||
| 
 | ||||
|         if self.predicate == "IN": | ||||
|             return value in self.value | ||||
|         elif self.predicate == "NOT_IN": | ||||
|  |  | |||
|  | @ -256,6 +256,11 @@ def ko_tokenizer_tokenizer(): | |||
|     return nlp.tokenizer | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture(scope="module") | ||||
| def la_tokenizer(): | ||||
|     return get_lang_class("la")().tokenizer | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture(scope="session") | ||||
| def ko_tokenizer_natto(): | ||||
|     pytest.importorskip("natto") | ||||
|  |  | |||
							
								
								
									
										0
									
								
								spacy/tests/lang/la/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								spacy/tests/lang/la/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										8
									
								
								spacy/tests/lang/la/test_exception.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								spacy/tests/lang/la/test_exception.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,8 @@ | |||
| import pytest | ||||
| 
 | ||||
| 
 | ||||
| def test_la_tokenizer_handles_exc_in_text(la_tokenizer): | ||||
|     text = "scio te omnia facturum, ut nobiscum quam primum sis" | ||||
|     tokens = la_tokenizer(text) | ||||
|     assert len(tokens) == 11 | ||||
|     assert tokens[6].text == "nobis" | ||||
							
								
								
									
										35
									
								
								spacy/tests/lang/la/test_text.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								spacy/tests/lang/la/test_text.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,35 @@ | |||
| import pytest | ||||
| from spacy.lang.la.lex_attrs import like_num | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "text,match", | ||||
|     [ | ||||
|         ("IIII", True), | ||||
|         ("VI", True), | ||||
|         ("vi", True), | ||||
|         ("IV", True), | ||||
|         ("iv", True), | ||||
|         ("IX", True), | ||||
|         ("ix", True), | ||||
|         ("MMXXII", True), | ||||
|         ("0", True), | ||||
|         ("1", True), | ||||
|         ("quattuor", True), | ||||
|         ("decem", True), | ||||
|         ("tertius", True), | ||||
|         ("canis", False), | ||||
|         ("MMXX11", False), | ||||
|         (",", False), | ||||
|     ], | ||||
| ) | ||||
| def test_lex_attrs_like_number(la_tokenizer, text, match): | ||||
|     tokens = la_tokenizer(text) | ||||
|     assert len(tokens) == 1 | ||||
|     assert tokens[0].like_num == match | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("word", ["quinque"]) | ||||
| def test_la_lex_attrs_capitals(word): | ||||
|     assert like_num(word) | ||||
|     assert like_num(word.upper()) | ||||
|  | @ -368,6 +368,16 @@ def test_matcher_intersect_value_operator(en_vocab): | |||
|     doc[0]._.ext = ["A", "B"] | ||||
|     assert len(matcher(doc)) == 1 | ||||
| 
 | ||||
|     # INTERSECTS matches nothing for iterables that aren't all str or int | ||||
|     matcher = Matcher(en_vocab) | ||||
|     pattern = [{"_": {"ext": {"INTERSECTS": ["Abx", "C"]}}}] | ||||
|     matcher.add("M", [pattern]) | ||||
|     doc = Doc(en_vocab, words=["a", "b", "c"]) | ||||
|     doc[0]._.ext = [["Abx"], "B"] | ||||
|     assert len(matcher(doc)) == 0 | ||||
|     doc[0]._.ext = ["Abx", "B"] | ||||
|     assert len(matcher(doc)) == 1 | ||||
| 
 | ||||
|     # INTERSECTS with an empty pattern list matches nothing | ||||
|     matcher = Matcher(en_vocab) | ||||
|     pattern = [{"_": {"ext": {"INTERSECTS": []}}}] | ||||
|  | @ -476,14 +486,22 @@ def test_matcher_extension_set_membership(en_vocab): | |||
|     assert len(matches) == 0 | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail(reason="IN predicate must handle sequence values in extensions") | ||||
| def test_matcher_extension_in_set_predicate(en_vocab): | ||||
|     matcher = Matcher(en_vocab) | ||||
|     Token.set_extension("ext", default=[]) | ||||
|     pattern = [{"_": {"ext": {"IN": ["A", "C"]}}}] | ||||
|     matcher.add("M", [pattern]) | ||||
|     doc = Doc(en_vocab, words=["a", "b", "c"]) | ||||
| 
 | ||||
|     # The IN predicate expects an exact match between the | ||||
|     # extension value and one of the pattern's values. | ||||
|     doc[0]._.ext = ["A", "B"] | ||||
|     assert len(matcher(doc)) == 0 | ||||
| 
 | ||||
|     doc[0]._.ext = ["A"] | ||||
|     assert len(matcher(doc)) == 0 | ||||
| 
 | ||||
|     doc[0]._.ext = "A" | ||||
|     assert len(matcher(doc)) == 1 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -17,6 +17,7 @@ def test_build_dependencies(): | |||
|         "types-dataclasses", | ||||
|         "types-mock", | ||||
|         "types-requests", | ||||
|         "types-setuptools", | ||||
|     ] | ||||
|     # ignore language-specific packages that shouldn't be installed by all | ||||
|     libs_ignore_setup = [ | ||||
|  |  | |||
|  | @ -618,6 +618,7 @@ def test_load_disable_enable() -> None: | |||
|         base_nlp.to_disk(tmp_dir) | ||||
|         to_disable = ["parser", "tagger"] | ||||
|         to_enable = ["tagger", "parser"] | ||||
|         single_str = "tagger" | ||||
| 
 | ||||
|         # Setting only `disable`. | ||||
|         nlp = spacy.load(tmp_dir, disable=to_disable) | ||||
|  | @ -632,6 +633,16 @@ def test_load_disable_enable() -> None: | |||
|             ] | ||||
|         ) | ||||
| 
 | ||||
|         # Loading with a string representing one component | ||||
|         nlp = spacy.load(tmp_dir, exclude=single_str) | ||||
|         assert single_str not in nlp.component_names | ||||
| 
 | ||||
|         nlp = spacy.load(tmp_dir, disable=single_str) | ||||
|         assert single_str in nlp.component_names | ||||
|         assert single_str not in nlp.pipe_names | ||||
|         assert nlp._disabled == {single_str} | ||||
|         assert nlp.disabled == [single_str] | ||||
| 
 | ||||
|         # Testing consistent enable/disable combination. | ||||
|         nlp = spacy.load( | ||||
|             tmp_dir, | ||||
|  |  | |||
|  | @ -670,3 +670,25 @@ def test_dot_in_factory_names(nlp): | |||
| 
 | ||||
|     with pytest.raises(ValueError, match="not permitted"): | ||||
|         Language.factory("my.evil.component.v1", func=evil_component) | ||||
| 
 | ||||
| 
 | ||||
| def test_component_return(): | ||||
|     """Test that an error is raised if components return a type other than a | ||||
|     doc.""" | ||||
|     nlp = English() | ||||
| 
 | ||||
|     @Language.component("test_component_good_pipe") | ||||
|     def good_pipe(doc): | ||||
|         return doc | ||||
| 
 | ||||
|     nlp.add_pipe("test_component_good_pipe") | ||||
|     nlp("text") | ||||
|     nlp.remove_pipe("test_component_good_pipe") | ||||
| 
 | ||||
|     @Language.component("test_component_bad_pipe") | ||||
|     def bad_pipe(doc): | ||||
|         return doc.text | ||||
| 
 | ||||
|     nlp.add_pipe("test_component_bad_pipe") | ||||
|     with pytest.raises(ValueError, match="instead of a Doc"): | ||||
|         nlp("text") | ||||
|  |  | |||
|  | @ -10,7 +10,8 @@ from spacy.ml._precomputable_affine import _backprop_precomputable_affine_paddin | |||
| from spacy.util import dot_to_object, SimpleFrozenList, import_file | ||||
| from spacy.util import to_ternary_int | ||||
| from thinc.api import Config, Optimizer, ConfigValidationError | ||||
| from thinc.api import set_current_ops | ||||
| from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps | ||||
| from thinc.compat import has_cupy_gpu, has_torch_mps_gpu | ||||
| from spacy.training.batchers import minibatch_by_words | ||||
| from spacy.lang.en import English | ||||
| from spacy.lang.nl import Dutch | ||||
|  | @ -18,7 +19,6 @@ from spacy.language import DEFAULT_CONFIG_PATH | |||
| from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema | ||||
| from pydantic import ValidationError | ||||
| 
 | ||||
| from thinc.api import get_current_ops, NumpyOps, CupyOps | ||||
| 
 | ||||
| from .util import get_random_doc, make_tempdir | ||||
| 
 | ||||
|  | @ -111,26 +111,25 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2): | |||
| 
 | ||||
| def test_prefer_gpu(): | ||||
|     current_ops = get_current_ops() | ||||
|     try: | ||||
|         import cupy  # noqa: F401 | ||||
| 
 | ||||
|         prefer_gpu() | ||||
|     if has_cupy_gpu: | ||||
|         assert prefer_gpu() | ||||
|         assert isinstance(get_current_ops(), CupyOps) | ||||
|     except ImportError: | ||||
|     elif has_torch_mps_gpu: | ||||
|         assert prefer_gpu() | ||||
|         assert isinstance(get_current_ops(), MPSOps) | ||||
|     else: | ||||
|         assert not prefer_gpu() | ||||
|     set_current_ops(current_ops) | ||||
| 
 | ||||
| 
 | ||||
| def test_require_gpu(): | ||||
|     current_ops = get_current_ops() | ||||
|     try: | ||||
|         import cupy  # noqa: F401 | ||||
| 
 | ||||
|     if has_cupy_gpu: | ||||
|         require_gpu() | ||||
|         assert isinstance(get_current_ops(), CupyOps) | ||||
|     except ImportError: | ||||
|         with pytest.raises(ValueError): | ||||
|             require_gpu() | ||||
|     elif has_torch_mps_gpu: | ||||
|         require_gpu() | ||||
|         assert isinstance(get_current_ops(), MPSOps) | ||||
|     set_current_ops(current_ops) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										30
									
								
								spacy/tests/training/test_logger.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								spacy/tests/training/test_logger.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| import pytest | ||||
| import spacy | ||||
| 
 | ||||
| from spacy.training import loggers | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def nlp(): | ||||
|     nlp = spacy.blank("en") | ||||
|     nlp.add_pipe("ner") | ||||
|     return nlp | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture() | ||||
| def info(): | ||||
|     return { | ||||
|         "losses": {"ner": 100}, | ||||
|         "other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80}, | ||||
|         "epoch": 100, | ||||
|         "step": 125, | ||||
|         "score": 85, | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def test_console_logger(nlp, info): | ||||
|     console_logger = loggers.console_logger( | ||||
|         progress_bar=True, console_output=True, output_file=None | ||||
|     ) | ||||
|     log_step, finalize = console_logger(nlp) | ||||
|     log_step(info) | ||||
|  | @ -1,4 +1,4 @@ | |||
| from typing import Any, Dict, Iterable | ||||
| from typing import Any, Dict, Iterable, Optional | ||||
| from .doc import Doc | ||||
| from .span import Span | ||||
| 
 | ||||
|  | @ -24,4 +24,4 @@ class SpanGroup: | |||
|     def __getitem__(self, i: int) -> Span: ... | ||||
|     def to_bytes(self) -> bytes: ... | ||||
|     def from_bytes(self, bytes_data: bytes) -> SpanGroup: ... | ||||
|     def copy(self) -> SpanGroup: ... | ||||
|     def copy(self, doc: Optional[Doc] = ...) -> SpanGroup: ... | ||||
|  |  | |||
|  | @ -244,15 +244,18 @@ cdef class SpanGroup: | |||
|     cdef void push_back(self, const shared_ptr[SpanC] &span): | ||||
|         self.c.push_back(span) | ||||
| 
 | ||||
|     def copy(self)  -> SpanGroup: | ||||
|     def copy(self, doc: Optional["Doc"] = None) -> SpanGroup: | ||||
|         """Clones the span group. | ||||
| 
 | ||||
|         doc (Doc): New reference document to which the copy is bound. | ||||
|         RETURNS (SpanGroup): A copy of the span group. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#copy | ||||
|         """ | ||||
|         if doc is None: | ||||
|             doc = self.doc | ||||
|         return SpanGroup( | ||||
|             self.doc, | ||||
|             doc, | ||||
|             name=self.name, | ||||
|             attrs=deepcopy(self.attrs), | ||||
|             spans=list(self), | ||||
|  |  | |||
|  | @ -42,7 +42,8 @@ class SpanGroups(UserDict): | |||
|     def copy(self, doc: Optional["Doc"] = None) -> "SpanGroups": | ||||
|         if doc is None: | ||||
|             doc = self._ensure_doc() | ||||
|         return SpanGroups(doc).from_bytes(self.to_bytes()) | ||||
|         data_copy = ((k, v.copy(doc=doc)) for k, v in self.items()) | ||||
|         return SpanGroups(doc, items=data_copy) | ||||
| 
 | ||||
|     def setdefault(self, key, default=None): | ||||
|         if not isinstance(default, SpanGroup): | ||||
|  |  | |||
|  | @ -1,10 +1,13 @@ | |||
| from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO | ||||
| from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union | ||||
| from wasabi import Printer | ||||
| from pathlib import Path | ||||
| import tqdm | ||||
| import sys | ||||
| import srsly | ||||
| 
 | ||||
| from ..util import registry | ||||
| from ..errors import Errors | ||||
| from .. import util | ||||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     from ..language import Language  # noqa: F401 | ||||
|  | @ -23,13 +26,44 @@ def setup_table( | |||
|     return final_cols, final_widths, ["r" for _ in final_widths] | ||||
| 
 | ||||
| 
 | ||||
| @registry.loggers("spacy.ConsoleLogger.v1") | ||||
| def console_logger(progress_bar: bool = False): | ||||
| @registry.loggers("spacy.ConsoleLogger.v2") | ||||
| def console_logger( | ||||
|     progress_bar: bool = False, | ||||
|     console_output: bool = True, | ||||
|     output_file: Optional[Union[str, Path]] = None, | ||||
| ): | ||||
|     """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file. | ||||
|     progress_bar (bool): Whether the logger should print the progress bar. | ||||
|     console_output (bool): Whether the logger should print the logs on the console. | ||||
|     output_file (Optional[Union[str, Path]]): The file to save the training logs to. | ||||
|     """ | ||||
|     _log_exist = False | ||||
|     if output_file: | ||||
|         output_file = util.ensure_path(output_file)  # type: ignore | ||||
|         if output_file.exists():  # type: ignore | ||||
|             _log_exist = True | ||||
|         if not output_file.parents[0].exists():  # type: ignore | ||||
|             output_file.parents[0].mkdir(parents=True)  # type: ignore | ||||
| 
 | ||||
|     def setup_printer( | ||||
|         nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr | ||||
|     ) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]: | ||||
|         write = lambda text: print(text, file=stdout, flush=True) | ||||
|         msg = Printer(no_print=True) | ||||
| 
 | ||||
|         nonlocal output_file | ||||
|         output_stream = None | ||||
|         if _log_exist: | ||||
|             write( | ||||
|                 msg.warn( | ||||
|                     f"Saving logs is disabled because {output_file} already exists." | ||||
|                 ) | ||||
|             ) | ||||
|             output_file = None | ||||
|         elif output_file: | ||||
|             write(msg.info(f"Saving results to {output_file}")) | ||||
|             output_stream = open(output_file, "w", encoding="utf-8") | ||||
| 
 | ||||
|         # ensure that only trainable components are logged | ||||
|         logged_pipes = [ | ||||
|             name | ||||
|  | @ -40,13 +74,15 @@ def console_logger(progress_bar: bool = False): | |||
|         score_weights = nlp.config["training"]["score_weights"] | ||||
|         score_cols = [col for col, value in score_weights.items() if value is not None] | ||||
|         loss_cols = [f"Loss {pipe}" for pipe in logged_pipes] | ||||
|         spacing = 2 | ||||
|         table_header, table_widths, table_aligns = setup_table( | ||||
|             cols=["E", "#"] + loss_cols + score_cols + ["Score"], | ||||
|             widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6], | ||||
|         ) | ||||
|         write(msg.row(table_header, widths=table_widths, spacing=spacing)) | ||||
|         write(msg.row(["-" * width for width in table_widths], spacing=spacing)) | ||||
| 
 | ||||
|         if console_output: | ||||
|             spacing = 2 | ||||
|             table_header, table_widths, table_aligns = setup_table( | ||||
|                 cols=["E", "#"] + loss_cols + score_cols + ["Score"], | ||||
|                 widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6], | ||||
|             ) | ||||
|             write(msg.row(table_header, widths=table_widths, spacing=spacing)) | ||||
|             write(msg.row(["-" * width for width in table_widths], spacing=spacing)) | ||||
|         progress = None | ||||
| 
 | ||||
|         def log_step(info: Optional[Dict[str, Any]]) -> None: | ||||
|  | @ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False): | |||
|                 if progress is not None: | ||||
|                     progress.update(1) | ||||
|                 return | ||||
|             losses = [ | ||||
|                 "{0:.2f}".format(float(info["losses"][pipe_name])) | ||||
|                 for pipe_name in logged_pipes | ||||
|             ] | ||||
| 
 | ||||
|             losses = [] | ||||
|             log_losses = {} | ||||
|             for pipe_name in logged_pipes: | ||||
|                 losses.append("{0:.2f}".format(float(info["losses"][pipe_name]))) | ||||
|                 log_losses[pipe_name] = float(info["losses"][pipe_name]) | ||||
| 
 | ||||
|             scores = [] | ||||
|             log_scores = {} | ||||
|             for col in score_cols: | ||||
|                 score = info["other_scores"].get(col, 0.0) | ||||
|                 try: | ||||
|  | @ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False): | |||
|                 if col != "speed": | ||||
|                     score *= 100 | ||||
|                 scores.append("{0:.2f}".format(score)) | ||||
|                 log_scores[str(col)] = score | ||||
| 
 | ||||
|             data = ( | ||||
|                 [info["epoch"], info["step"]] | ||||
|  | @ -80,20 +120,36 @@ def console_logger(progress_bar: bool = False): | |||
|                 + scores | ||||
|                 + ["{0:.2f}".format(float(info["score"]))] | ||||
|             ) | ||||
| 
 | ||||
|             if output_stream: | ||||
|                 # Write to log file per log_step | ||||
|                 log_data = { | ||||
|                     "epoch": info["epoch"], | ||||
|                     "step": info["step"], | ||||
|                     "losses": log_losses, | ||||
|                     "scores": log_scores, | ||||
|                     "score": float(info["score"]), | ||||
|                 } | ||||
|                 output_stream.write(srsly.json_dumps(log_data) + "\n") | ||||
| 
 | ||||
|             if progress is not None: | ||||
|                 progress.close() | ||||
|             write( | ||||
|                 msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing) | ||||
|             ) | ||||
|             if progress_bar: | ||||
|                 # Set disable=None, so that it disables on non-TTY | ||||
|                 progress = tqdm.tqdm( | ||||
|                     total=eval_frequency, disable=None, leave=False, file=stderr | ||||
|             if console_output: | ||||
|                 write( | ||||
|                     msg.row( | ||||
|                         data, widths=table_widths, aligns=table_aligns, spacing=spacing | ||||
|                     ) | ||||
|                 ) | ||||
|                 progress.set_description(f"Epoch {info['epoch']+1}") | ||||
|                 if progress_bar: | ||||
|                     # Set disable=None, so that it disables on non-TTY | ||||
|                     progress = tqdm.tqdm( | ||||
|                         total=eval_frequency, disable=None, leave=False, file=stderr | ||||
|                     ) | ||||
|                     progress.set_description(f"Epoch {info['epoch']+1}") | ||||
| 
 | ||||
|         def finalize() -> None: | ||||
|             pass | ||||
|             if output_stream: | ||||
|                 output_stream.close() | ||||
| 
 | ||||
|         return log_step, finalize | ||||
| 
 | ||||
|  |  | |||
|  | @ -398,9 +398,9 @@ def load_model( | |||
|     name: Union[str, Path], | ||||
|     *, | ||||
|     vocab: Union["Vocab", bool] = True, | ||||
|     disable: Iterable[str] = SimpleFrozenList(), | ||||
|     enable: Iterable[str] = SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), | ||||
| ) -> "Language": | ||||
|     """Load a model from a package or data path. | ||||
|  | @ -408,9 +408,9 @@ def load_model( | |||
|     name (str): Package name or model path. | ||||
|     vocab (Vocab / True): Optional vocab to pass in on initialization. If True, | ||||
|         a new Vocab object will be created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled. | ||||
|     exclude (Iterable[str]):  Names of pipeline components to exclude. | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled. | ||||
|     exclude (Union[str, Iterable[str]]):  Name(s) of pipeline component(s) to exclude. | ||||
|     config (Dict[str, Any] / Config): Config overrides as nested dict or dict | ||||
|         keyed by section values in dot notation. | ||||
|     RETURNS (Language): The loaded nlp object. | ||||
|  | @ -440,9 +440,9 @@ def load_model_from_package( | |||
|     name: str, | ||||
|     *, | ||||
|     vocab: Union["Vocab", bool] = True, | ||||
|     disable: Iterable[str] = SimpleFrozenList(), | ||||
|     enable: Iterable[str] = SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), | ||||
| ) -> "Language": | ||||
|     """Load a model from an installed package. | ||||
|  | @ -450,12 +450,12 @@ def load_model_from_package( | |||
|     name (str): The package name. | ||||
|     vocab (Vocab / True): Optional vocab to pass in on initialization. If True, | ||||
|         a new Vocab object will be created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. Disabled | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled | ||||
|         pipes will be loaded but they won't be run unless you explicitly | ||||
|         enable them by calling nlp.enable_pipe. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|         pipes will be disabled (and can be enabled using `nlp.enable_pipe`). | ||||
|     exclude (Iterable[str]): Names of pipeline components to exclude. Excluded | ||||
|     exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded | ||||
|         components won't be loaded. | ||||
|     config (Dict[str, Any] / Config): Config overrides as nested dict or dict | ||||
|         keyed by section values in dot notation. | ||||
|  | @ -470,9 +470,9 @@ def load_model_from_path( | |||
|     *, | ||||
|     meta: Optional[Dict[str, Any]] = None, | ||||
|     vocab: Union["Vocab", bool] = True, | ||||
|     disable: Iterable[str] = SimpleFrozenList(), | ||||
|     enable: Iterable[str] = SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), | ||||
| ) -> "Language": | ||||
|     """Load a model from a data directory path. Creates Language class with | ||||
|  | @ -482,12 +482,12 @@ def load_model_from_path( | |||
|     meta (Dict[str, Any]): Optional model meta. | ||||
|     vocab (Vocab / True): Optional vocab to pass in on initialization. If True, | ||||
|         a new Vocab object will be created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. Disabled | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled | ||||
|         pipes will be loaded but they won't be run unless you explicitly | ||||
|         enable them by calling nlp.enable_pipe. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|         pipes will be disabled (and can be enabled using `nlp.enable_pipe`). | ||||
|     exclude (Iterable[str]): Names of pipeline components to exclude. Excluded | ||||
|     exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded | ||||
|         components won't be loaded. | ||||
|     config (Dict[str, Any] / Config): Config overrides as nested dict or dict | ||||
|         keyed by section values in dot notation. | ||||
|  | @ -516,9 +516,9 @@ def load_model_from_config( | |||
|     *, | ||||
|     meta: Dict[str, Any] = SimpleFrozenDict(), | ||||
|     vocab: Union["Vocab", bool] = True, | ||||
|     disable: Iterable[str] = SimpleFrozenList(), | ||||
|     enable: Iterable[str] = SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     auto_fill: bool = False, | ||||
|     validate: bool = True, | ||||
| ) -> "Language": | ||||
|  | @ -529,12 +529,12 @@ def load_model_from_config( | |||
|     meta (Dict[str, Any]): Optional model meta. | ||||
|     vocab (Vocab / True): Optional vocab to pass in on initialization. If True, | ||||
|         a new Vocab object will be created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. Disabled | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled | ||||
|         pipes will be loaded but they won't be run unless you explicitly | ||||
|         enable them by calling nlp.enable_pipe. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|         pipes will be disabled (and can be enabled using `nlp.enable_pipe`). | ||||
|     exclude (Iterable[str]): Names of pipeline components to exclude. Excluded | ||||
|     exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded | ||||
|         components won't be loaded. | ||||
|     auto_fill (bool): Whether to auto-fill config with missing defaults. | ||||
|     validate (bool): Whether to show config validation errors. | ||||
|  | @ -616,9 +616,9 @@ def load_model_from_init_py( | |||
|     init_file: Union[Path, str], | ||||
|     *, | ||||
|     vocab: Union["Vocab", bool] = True, | ||||
|     disable: Iterable[str] = SimpleFrozenList(), | ||||
|     enable: Iterable[str] = SimpleFrozenList(), | ||||
|     exclude: Iterable[str] = SimpleFrozenList(), | ||||
|     disable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     enable: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     exclude: Union[str, Iterable[str]] = SimpleFrozenList(), | ||||
|     config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), | ||||
| ) -> "Language": | ||||
|     """Helper function to use in the `load()` method of a model package's | ||||
|  | @ -626,12 +626,12 @@ def load_model_from_init_py( | |||
| 
 | ||||
|     vocab (Vocab / True): Optional vocab to pass in on initialization. If True, | ||||
|         a new Vocab object will be created. | ||||
|     disable (Iterable[str]): Names of pipeline components to disable. Disabled | ||||
|     disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled | ||||
|         pipes will be loaded but they won't be run unless you explicitly | ||||
|         enable them by calling nlp.enable_pipe. | ||||
|     enable (Iterable[str]): Names of pipeline components to enable. All other | ||||
|     enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other | ||||
|         pipes will be disabled (and can be enabled using `nlp.enable_pipe`). | ||||
|     exclude (Iterable[str]): Names of pipeline components to exclude. Excluded | ||||
|     exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded | ||||
|         components won't be loaded. | ||||
|     config (Dict[str, Any] / Config): Config overrides as nested dict or dict | ||||
|         keyed by section values in dot notation. | ||||
|  |  | |||
|  | @ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude] | |||
| $ python -m spacy info [model] [--markdown] [--silent] [--exclude] | ||||
| ``` | ||||
| 
 | ||||
| | Name                                             | Description                                                                                   | | ||||
| | ------------------------------------------------ | --------------------------------------------------------------------------------------------- | | ||||
| | `model`                                          | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~         | | ||||
| | `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                                | | ||||
| | `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                                 | | ||||
| | `--exclude`, `-e`                                | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ | | ||||
| | `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                    | | ||||
| | **PRINTS**                                       | Information about your spaCy installation.                                                    | | ||||
| | Name                                             | Description                                                                                                             | | ||||
| | ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- | | ||||
| | `model`                                          | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~                                   | | ||||
| | `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                                                          | | ||||
| | `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                                                           | | ||||
| | `--exclude`, `-e`                                | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~                           | | ||||
| | `--url`, `-u` <Tag variant="new">3.5.0</Tag>     | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ | | ||||
| | `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                                              | | ||||
| | **PRINTS**                                       | Information about your spaCy installation.                                                                              | | ||||
| 
 | ||||
| ## validate {#validate new="2" tag="command"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -63,17 +63,18 @@ spaCy loads a model under the hood based on its | |||
| > nlp = Language.from_config(config) | ||||
| > ``` | ||||
| 
 | ||||
| | Name           | Description                                                                                                                                                                                                                                      | | ||||
| | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `config`       | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                             | | ||||
| | _keyword-only_ |                                                                                                                                                                                                                                                  | | ||||
| | `vocab`        | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                              | | ||||
| | `disable`      | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ | | ||||
| | `exclude`      | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                             | | ||||
| | `meta`         | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                | | ||||
| | `auto_fill`    | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                 | | ||||
| | `validate`     | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                   | | ||||
| | **RETURNS**    | The initialized object. ~~Language~~                                                                                                                                                                                                             | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                          | | ||||
| | ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `config`                              | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                                                 | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                      | | ||||
| | `vocab`                               | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                                                  | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  | | ||||
| | `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             | | ||||
| | `meta`                                | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                                    | | ||||
| | `auto_fill`                           | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                                     | | ||||
| | `validate`                            | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                       | | ||||
| | **RETURNS**                           | The initialized object. ~~Language~~                                                                                                                                                                                                                                 | | ||||
| 
 | ||||
| ## Language.component {#component tag="classmethod" new="3"} | ||||
| 
 | ||||
|  | @ -695,8 +696,8 @@ As of spaCy v3.0, the `disable_pipes` method has been renamed to `select_pipes`: | |||
| | Name           | Description                                                                                            | | ||||
| | -------------- | ------------------------------------------------------------------------------------------------------ | | ||||
| | _keyword-only_ |                                                                                                        | | ||||
| | `disable`      | Name(s) of pipeline components to disable. ~~Optional[Union[str, Iterable[str]]]~~                     | | ||||
| | `enable`       | Name(s) of pipeline components that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~      | | ||||
| | `disable`      | Name(s) of pipeline component(s) to disable. ~~Optional[Union[str, Iterable[str]]]~~                   | | ||||
| | `enable`       | Name(s) of pipeline component(s) that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~    | | ||||
| | **RETURNS**    | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ | | ||||
| 
 | ||||
| ## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"} | ||||
|  |  | |||
|  | @ -248,6 +248,59 @@ added to an existing vectors table. See more details in | |||
| 
 | ||||
| ## Loggers {#loggers} | ||||
| 
 | ||||
| These functions are available from `@spacy.registry.loggers`. | ||||
| 
 | ||||
| ### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1} | ||||
| 
 | ||||
| > #### Example config | ||||
| > | ||||
| > ```ini | ||||
| > [training.logger] | ||||
| > @loggers = "spacy.ConsoleLogger.v1" | ||||
| > progress_bar = true | ||||
| > ``` | ||||
| 
 | ||||
| Writes the results of a training step to the console in a tabular format. | ||||
| 
 | ||||
| <Accordion title="Example console output" spaced> | ||||
| 
 | ||||
| ```cli | ||||
| $ python -m spacy train config.cfg | ||||
| ``` | ||||
| 
 | ||||
| ``` | ||||
| ℹ Using CPU | ||||
| ℹ Loading config and nlp from: config.cfg | ||||
| ℹ Pipeline: ['tok2vec', 'tagger'] | ||||
| ℹ Start training | ||||
| ℹ Training. Initial learn rate: 0.0 | ||||
| 
 | ||||
| E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE | ||||
| ---   ------   ------------   -----------   -------   ------ | ||||
|   0        0           0.00         86.20      0.22     0.00 | ||||
|   0      200           3.08      18968.78     34.00     0.34 | ||||
|   0      400          31.81      22539.06     33.64     0.34 | ||||
|   0      600          92.13      22794.91     43.80     0.44 | ||||
|   0      800         183.62      21541.39     56.05     0.56 | ||||
|   0     1000         352.49      25461.82     65.15     0.65 | ||||
|   0     1200         422.87      23708.82     71.84     0.72 | ||||
|   0     1400         601.92      24994.79     76.57     0.77 | ||||
|   0     1600         662.57      22268.02     80.20     0.80 | ||||
|   0     1800        1101.50      28413.77     82.56     0.83 | ||||
|   0     2000        1253.43      28736.36     85.00     0.85 | ||||
|   0     2200        1411.02      28237.53     87.42     0.87 | ||||
|   0     2400        1605.35      28439.95     88.70     0.89 | ||||
| ``` | ||||
| 
 | ||||
| Note that the cumulative loss keeps increasing within one epoch, but should | ||||
| start decreasing across epochs. | ||||
| 
 | ||||
|  </Accordion> | ||||
| 
 | ||||
| | Name           | Description                                               | | ||||
| | -------------- | --------------------------------------------------------- | | ||||
| | `progress_bar` | Whether the logger should print the progress bar ~~bool~~ | | ||||
| 
 | ||||
| Logging utilities for spaCy are implemented in the | ||||
| [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the | ||||
| functions are typically available from `@spacy.registry.loggers`. | ||||
|  |  | |||
|  | @ -255,9 +255,10 @@ Return a copy of the span group. | |||
| > new_group = doc.spans["errors"].copy() | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                     | | ||||
| | ----------- | ----------------------------------------------- | | ||||
| | **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~ | | ||||
| | Name        | Description                                                                                        | | ||||
| | ----------- | -------------------------------------------------------------------------------------------------- | | ||||
| | `doc`       | The document to which the copy is bound. Defaults to `None` for the current doc. ~~Optional[Doc]~~ | | ||||
| | **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~                                                    | | ||||
| 
 | ||||
| ## SpanGroup.to_bytes {#to_bytes tag="method"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -45,16 +45,16 @@ specified separately using the new `exclude` keyword argument. | |||
| > nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"]) | ||||
| > ``` | ||||
| 
 | ||||
| | Name                                 | Description                                                                                                                                                                                                                                    | | ||||
| | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `name`                               | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                              | | ||||
| | _keyword-only_                       |                                                                                                                                                                                                                                                | | ||||
| | `vocab`                              | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                          | | ||||
| | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ | | ||||
| | `enable`                             | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~                                                                                                               | | ||||
| | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           | | ||||
| | `config` <Tag variant="new">3</Tag>  | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                             | | ||||
| | **RETURNS**                          | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                     | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                        | | ||||
| | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `name`                                | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                                                  | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                    | | ||||
| | `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                              | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~                                                                                                               | | ||||
| | `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                           | | ||||
| | `config` <Tag variant="new">3</Tag>   | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 | | ||||
| | **RETURNS**                           | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                                         | | ||||
| 
 | ||||
| Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's | ||||
| [`config.cfg`](/api/data-formats#config), uses the language and pipeline | ||||
|  | @ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization. | |||
| 
 | ||||
| ### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"} | ||||
| 
 | ||||
| Generate dependency parse in `{'words': [], 'arcs': []}` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with | ||||
| the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`. | |||
| 
 | ||||
| ### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"} | ||||
| 
 | ||||
| Generate named entities in `[{start: i, end: i, label: 'label'}]` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For | ||||
| use with the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`. | |||
| 
 | ||||
| ### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"} | ||||
| 
 | ||||
| Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. | ||||
| For use with the `manual=True` argument in `displacy.render`. | ||||
| Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For | ||||
| use with the `manual=True` argument in `displacy.render`. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -451,7 +451,7 @@ factories. | |||
| | Registry name     | Description                                                                                                                                                                                                                                        | | ||||
| | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `architectures`   | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`.                                                                           | | ||||
| | `augmenters`      | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators.                                                                                                                       | | ||||
| | `augmenters`      | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators.                                                                                                                       | | ||||
| | `batchers`        | Registry for training and evaluation [data batchers](#batchers).                                                                                                                                                                                   | | ||||
| | `callbacks`       | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training.                                                                                                                             | | ||||
| | `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points).                                                                             | | ||||
|  | @ -505,7 +505,7 @@ finished. To log each training step, a | |||
| and the accuracy scores on the development set. | ||||
| 
 | ||||
| The built-in, default logger is the ConsoleLogger, which prints results to the | ||||
| console in tabular format. The | ||||
| console in tabular format and saves them to a `jsonl` file. The | ||||
| [spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as | ||||
| a dependency of spaCy, enables other loggers, such as one that sends results to | ||||
| a [Weights & Biases](https://www.wandb.com/) dashboard. | ||||
|  | @ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard. | |||
| Instead of using one of the built-in loggers, you can | ||||
| [implement your own](/usage/training#custom-logging). | ||||
| 
 | ||||
| #### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"} | ||||
| #### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"} | ||||
| 
 | ||||
| > #### Example config | ||||
| > | ||||
| > ```ini | ||||
| > [training.logger] | ||||
| > @loggers = "spacy.ConsoleLogger.v1" | ||||
| > @loggers = "spacy.ConsoleLogger.v2" | ||||
| > progress_bar = true | ||||
| > console_output = true | ||||
| > output_file = "training_log.jsonl" | ||||
| > ``` | ||||
| 
 | ||||
| Writes the results of a training step to the console in a tabular format. | ||||
| Writes the results of a training step to the console in a tabular format and | ||||
| saves them to a `jsonl` file. | ||||
| 
 | ||||
| <Accordion title="Example console output" spaced> | ||||
| 
 | ||||
|  | @ -536,22 +540,23 @@ $ python -m spacy train config.cfg | |||
| ℹ Pipeline: ['tok2vec', 'tagger'] | ||||
| ℹ Start training | ||||
| ℹ Training. Initial learn rate: 0.0 | ||||
| ℹ Saving results to training_log.jsonl | ||||
| 
 | ||||
| E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE | ||||
| ---   ------   ------------   -----------   -------   ------ | ||||
|   1        0           0.00         86.20      0.22     0.00 | ||||
|   1      200           3.08      18968.78     34.00     0.34 | ||||
|   1      400          31.81      22539.06     33.64     0.34 | ||||
|   1      600          92.13      22794.91     43.80     0.44 | ||||
|   1      800         183.62      21541.39     56.05     0.56 | ||||
|   1     1000         352.49      25461.82     65.15     0.65 | ||||
|   1     1200         422.87      23708.82     71.84     0.72 | ||||
|   1     1400         601.92      24994.79     76.57     0.77 | ||||
|   1     1600         662.57      22268.02     80.20     0.80 | ||||
|   1     1800        1101.50      28413.77     82.56     0.83 | ||||
|   1     2000        1253.43      28736.36     85.00     0.85 | ||||
|   1     2200        1411.02      28237.53     87.42     0.87 | ||||
|   1     2400        1605.35      28439.95     88.70     0.89 | ||||
|   0        0           0.00         86.20      0.22     0.00 | ||||
|   0      200           3.08      18968.78     34.00     0.34 | ||||
|   0      400          31.81      22539.06     33.64     0.34 | ||||
|   0      600          92.13      22794.91     43.80     0.44 | ||||
|   0      800         183.62      21541.39     56.05     0.56 | ||||
|   0     1000         352.49      25461.82     65.15     0.65 | ||||
|   0     1200         422.87      23708.82     71.84     0.72 | ||||
|   0     1400         601.92      24994.79     76.57     0.77 | ||||
|   0     1600         662.57      22268.02     80.20     0.80 | ||||
|   0     1800        1101.50      28413.77     82.56     0.83 | ||||
|   0     2000        1253.43      28736.36     85.00     0.85 | ||||
|   0     2200        1411.02      28237.53     87.42     0.87 | ||||
|   0     2400        1605.35      28439.95     88.70     0.89 | ||||
| ``` | ||||
| 
 | ||||
| Note that the cumulative loss keeps increasing within one epoch, but should | ||||
|  | @ -559,6 +564,12 @@ start decreasing across epochs. | |||
| 
 | ||||
|  </Accordion> | ||||
| 
 | ||||
| | Name             | Description                                                           | | ||||
| | ---------------- | --------------------------------------------------------------------- | | ||||
| | `progress_bar`   | Whether the logger should print the progress bar ~~bool~~             | | ||||
| | `console_output` | Whether the logger should print the logs on the console. ~~bool~~     | | ||||
| | `output_file`    | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ | | ||||
| 
 | ||||
| ## Readers {#readers} | ||||
| 
 | ||||
| ### File readers {#file-readers source="github.com/explosion/srsly" new="3"} | ||||
|  | @ -1038,15 +1049,16 @@ and create a `Language` object. The model data will then be loaded in via | |||
| > nlp = util.load_model("/path/to/data") | ||||
| > ``` | ||||
| 
 | ||||
| | Name                                 | Description                                                                                                                                                                                                                                      | | ||||
| | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ||||
| | `name`                               | Package name or path. ~~str~~                                                                                                                                                                                                                    | | ||||
| | _keyword-only_                       |                                                                                                                                                                                                                                                  | | ||||
| | `vocab`                              | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                            | | ||||
| | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ | | ||||
| | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                             | | ||||
| | `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                   | | ||||
| | **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                          | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                          | | ||||
| | ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `name`                                | Package name or path. ~~str~~                                                                                                                                                                                                                                        | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                      | | ||||
| | `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                                | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  | | ||||
| | `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             | | ||||
| | `config` <Tag variant="new">3</Tag>   | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                                       | | ||||
| | **RETURNS**                           | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                                              | | ||||
| 
 | ||||
| ### util.load_model_from_init_py {#util.load_model_from_init_py tag="function" new="2"} | ||||
| 
 | ||||
|  | @ -1062,15 +1074,16 @@ A helper function to use in the `load()` method of a pipeline package's | |||
| >     return load_model_from_init_py(__file__, **overrides) | ||||
| > ``` | ||||
| 
 | ||||
| | Name                                 | Description                                                                                                                                                                                                                                    | | ||||
| | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `init_file`                          | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                         | | ||||
| | _keyword-only_                       |                                                                                                                                                                                                                                                | | ||||
| | `vocab` <Tag variant="new">3</Tag>   | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                          | | ||||
| | `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ | | ||||
| | `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           | | ||||
| | `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 | | ||||
| | **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                        | | ||||
| | Name                                  | Description                                                                                                                                                                                                                                                          | | ||||
| | ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||
| | `init_file`                           | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                                               | | ||||
| | _keyword-only_                        |                                                                                                                                                                                                                                                                      | | ||||
| | `vocab` <Tag variant="new">3</Tag>    | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                                | | ||||
| | `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ | | ||||
| | `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  | | ||||
| | `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             | | ||||
| | `config` <Tag variant="new">3</Tag>   | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                                       | | ||||
| | **RETURNS**                           | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                                              | | ||||
| 
 | ||||
| ### util.load_config {#util.load_config tag="function" new="3"} | ||||
| 
 | ||||
|  |  | |||
|  | @ -396,15 +396,32 @@ pipeline package can be found. | |||
| To download a trained pipeline directly using | ||||
| [pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local | ||||
| path of the wheel file or archive. Installing the wheel is usually more | ||||
| efficient. To find the direct link to a package, head over to the | ||||
| [releases](https://github.com/explosion/spacy-models/releases), right click on | ||||
| the archive link and copy it to your clipboard. | ||||
| efficient. | ||||
| 
 | ||||
| > #### Pipeline Package URLs {#pipeline-urls} | ||||
| > | ||||
| > Pretrained pipeline distributions are hosted on | ||||
| > [Github Releases](https://github.com/explosion/spacy-models/releases), and you | ||||
| > can find download links there, as well as on the model page. You can also get | ||||
| > URLs directly from the command line by using `spacy info` with the `--url` | ||||
| > flag, which may be useful for automation. | ||||
| > | ||||
| > ```bash | ||||
| > spacy info en_core_web_sm --url | ||||
| > ``` | ||||
| > | ||||
| > This command will print the URL for the latest version of a pipeline | ||||
| > compatible with the version of spaCy you're using. Note that in order to look | ||||
| > up the compatibility information an internet connection is required. | ||||
| 
 | ||||
| ```bash | ||||
| # With external URL | ||||
| $ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl | ||||
| $ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz | ||||
| 
 | ||||
| # Using spacy info to get the external URL | ||||
| $ pip install $(spacy info en_core_web_sm --url) | ||||
| 
 | ||||
| # With local file | ||||
| $ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl | ||||
| $ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz | ||||
|  | @ -545,21 +562,16 @@ should be specifying them directly. | |||
| Because pipeline packages are valid Python packages, you can add them to your | ||||
| application's `requirements.txt`. If you're running your own internal PyPi | ||||
| installation, you can upload the pipeline packages there. pip's | ||||
| [requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format) | ||||
| supports both package names to download via a PyPi server, as well as direct | ||||
| URLs. | ||||
| [requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/) | ||||
| supports both package names to download via a PyPi server, as well as | ||||
| [direct URLs](#pipeline-urls). | ||||
| 
 | ||||
| ```text | ||||
| ### requirements.txt | ||||
| spacy>=3.0.0,<4.0.0 | ||||
| https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm | ||||
| en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl | ||||
| ``` | ||||
| 
 | ||||
| Specifying `#egg=` with the package name tells pip which package to expect from | ||||
| the download URL. This way, the package won't be re-downloaded and overwritten | ||||
| if it's already installed - just like when you're downloading a package from | ||||
| PyPi. | ||||
| 
 | ||||
| All pipeline packages are versioned and specify their spaCy dependency. This | ||||
| ensures cross-compatibility and lets you specify exact version requirements for | ||||
| each pipeline. If you've [trained](/usage/training) your own pipeline, you can | ||||
|  |  | |||
|  | @ -1192,7 +1192,7 @@ | |||
|             "slogan": "Fast, flexible and transparent sentiment analysis", | ||||
|             "description": "Asent is a rule-based sentiment analysis library for Python made using spaCy. It is inspired by VADER, but uses a more modular ruleset, that allows the user to change e.g. the method for finding negations. Furthermore it includes visualisers to visualize the model predictions, making the model easily interpretable.", | ||||
|             "github": "kennethenevoldsen/asent", | ||||
|             "pip": "aseny", | ||||
|             "pip": "asent", | ||||
|             "code_example": [ | ||||
|                 "import spacy", | ||||
|                 "import asent", | ||||
|  |  | |||
|  | @ -76,6 +76,7 @@ const MODEL_META = { | |||
|     benchmark_ner: 'NER accuracy', | ||||
|     benchmark_speed: 'Speed', | ||||
|     compat: 'Latest compatible package version for your spaCy installation', | ||||
|     download_link: 'Download link for the pipeline', | ||||
| } | ||||
| 
 | ||||
| const LABEL_SCHEME_META = { | ||||
|  | @ -138,6 +139,13 @@ function formatAccuracy(data, lang) { | |||
|         .filter(item => item) | ||||
| } | ||||
| 
 | ||||
| function formatDownloadLink(lang, name, version) { | ||||
|   const fullName = `${lang}_${name}-${version}` | ||||
|   const filename = `${fullName}-py3-none-any.whl` | ||||
|   const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}` | ||||
|   return <Link to={url} hideIcon>{filename}</Link> | ||||
| } | ||||
| 
 | ||||
| function formatModelMeta(data) { | ||||
|     return { | ||||
|         fullName: `${data.lang}_${data.name}-${data.version}`, | ||||
|  | @ -154,6 +162,7 @@ function formatModelMeta(data) { | |||
|         labels: isEmptyObj(data.labels) ? null : data.labels, | ||||
|         vectors: formatVectors(data.vectors), | ||||
|         accuracy: formatAccuracy(data.performance, data.lang), | ||||
|         download_link: formatDownloadLink(data.lang, data.name, data.version), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -244,6 +253,7 @@ const Model = ({ | |||
|         { label: 'Components', content: components, help: MODEL_META.components }, | ||||
|         { label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline }, | ||||
|         { label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs }, | ||||
|         { label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link }, | ||||
|         { label: 'Sources', content: sources, help: MODEL_META.sources }, | ||||
|         { label: 'Author', content: author }, | ||||
|         { label: 'License', content: license }, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user