mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Replace links to nightly docs [ci skip]
This commit is contained in:
		
							parent
							
								
									b26a3daa9a
								
							
						
					
					
						commit
						d0c3775712
					
				|  | @ -29,7 +29,7 @@ COMMAND = "python -m spacy" | |||
| NAME = "spacy" | ||||
| HELP = """spaCy Command-line Interface | ||||
| 
 | ||||
| DOCS: https://nightly.spacy.io/api/cli | ||||
| DOCS: https://spacy.io/api/cli | ||||
| """ | ||||
| PROJECT_HELP = f"""Command-line interface for spaCy projects and templates. | ||||
| You'd typically start by cloning a project template to a local directory and | ||||
|  |  | |||
|  | @ -64,7 +64,7 @@ def convert_cli( | |||
|     is written to stdout, so you can pipe them forward to a JSON file: | ||||
|     $ spacy convert some_file.conllu --file-type json > some_file.json | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#convert | ||||
|     DOCS: https://spacy.io/api/cli#convert | ||||
|     """ | ||||
|     if isinstance(file_type, FileTypes): | ||||
|         # We get an instance of the FileTypes from the CLI so we need its string value | ||||
|  | @ -268,6 +268,6 @@ def _get_converter(msg, converter, input_path): | |||
|             msg.warn( | ||||
|                 "Can't automatically detect NER format. " | ||||
|                 "Conversion may not succeed. " | ||||
|                 "See https://nightly.spacy.io/api/cli#convert" | ||||
|                 "See https://spacy.io/api/cli#convert" | ||||
|             ) | ||||
|     return converter | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ def debug_config_cli( | |||
|     as command line options. For instance, --training.batch_size 128 overrides | ||||
|     the value of "batch_size" in the block "[training]". | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#debug-config | ||||
|     DOCS: https://spacy.io/api/cli#debug-config | ||||
|     """ | ||||
|     overrides = parse_config_overrides(ctx.args) | ||||
|     import_code(code_path) | ||||
|  |  | |||
|  | @ -50,7 +50,7 @@ def debug_data_cli( | |||
|     useful stats, and can help you find problems like invalid entity annotations, | ||||
|     cyclic dependencies, low data labels and more. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#debug-data | ||||
|     DOCS: https://spacy.io/api/cli#debug-data | ||||
|     """ | ||||
|     if ctx.command.name == "debug-data": | ||||
|         msg.warn( | ||||
|  |  | |||
|  | @ -40,7 +40,7 @@ def debug_model_cli( | |||
|     Analyze a Thinc model implementation. Includes checks for internal structure | ||||
|     and activations during training. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#debug-model | ||||
|     DOCS: https://spacy.io/api/cli#debug-model | ||||
|     """ | ||||
|     setup_gpu(use_gpu) | ||||
|     layers = string_to_list(layers, intify=True) | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ def download_cli( | |||
|     additional arguments provided to this command will be passed to `pip install` | ||||
|     on package installation. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#download | ||||
|     DOCS: https://spacy.io/api/cli#download | ||||
|     AVAILABLE PACKAGES: https://spacy.io/models | ||||
|     """ | ||||
|     download(model, direct, *ctx.args) | ||||
|  | @ -80,7 +80,7 @@ def get_compatibility() -> dict: | |||
|             f"Couldn't fetch compatibility table. Please find a package for your spaCy " | ||||
|             f"installation (v{about.__version__}), and download it manually. " | ||||
|             f"For more details, see the documentation: " | ||||
|             f"https://nightly.spacy.io/usage/models", | ||||
|             f"https://spacy.io/usage/models", | ||||
|             exits=1, | ||||
|         ) | ||||
|     comp_table = r.json() | ||||
|  |  | |||
|  | @ -36,7 +36,7 @@ def evaluate_cli( | |||
|     dependency parses in a HTML file, set as output directory as the | ||||
|     displacy_path argument. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#evaluate | ||||
|     DOCS: https://spacy.io/api/cli#evaluate | ||||
|     """ | ||||
|     import_code(code_path) | ||||
|     evaluate( | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ def info_cli( | |||
|     print its meta information. Flag --markdown prints details in Markdown for easy | ||||
|     copy-pasting to GitHub issues. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#info | ||||
|     DOCS: https://spacy.io/api/cli#info | ||||
|     """ | ||||
|     exclude = string_to_list(exclude) | ||||
|     info(model, markdown=markdown, silent=silent, exclude=exclude) | ||||
|  |  | |||
|  | @ -41,7 +41,7 @@ def init_config_cli( | |||
|     optimal settings for your use case. This includes the choice of architecture, | ||||
|     pretrained weights and related hyperparameters. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#init-config | ||||
|     DOCS: https://spacy.io/api/cli#init-config | ||||
|     """ | ||||
|     if isinstance(optimize, Optimizations):  # instance of enum from the CLI | ||||
|         optimize = optimize.value | ||||
|  | @ -78,9 +78,9 @@ def init_fill_config_cli( | |||
|     from the default config and will create all objects, check the registered | ||||
|     functions for their default values and update the base config. This command | ||||
|     can be used with a config generated via the training quickstart widget: | ||||
|     https://nightly.spacy.io/usage/training#quickstart | ||||
|     https://spacy.io/usage/training#quickstart | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#init-fill-config | ||||
|     DOCS: https://spacy.io/api/cli#init-fill-config | ||||
|     """ | ||||
|     fill_config(output_file, base_path, pretraining=pretraining, diff=diff) | ||||
| 
 | ||||
|  |  | |||
|  | @ -38,7 +38,7 @@ def package_cli( | |||
|     registered functions like pipeline components), they are copied into the | ||||
|     package and imported in the __init__.py. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#package | ||||
|     DOCS: https://spacy.io/api/cli#package | ||||
|     """ | ||||
|     create_sdist, create_wheel = get_build_formats(string_to_list(build)) | ||||
|     code_paths = [Path(p.strip()) for p in string_to_list(code_paths)] | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ def pretrain_cli( | |||
|     all settings are the same between pretraining and training. Ideally, | ||||
|     this is done by using the same config file for both commands. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#pretrain | ||||
|     DOCS: https://spacy.io/api/cli#pretrain | ||||
|     """ | ||||
|     config_overrides = parse_config_overrides(ctx.args) | ||||
|     import_code(code_path) | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ def profile_cli( | |||
|     It can either be provided as a JSONL file, or be read from sys.sytdin. | ||||
|     If no input file is specified, the IMDB dataset is loaded via Thinc. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#debug-profile | ||||
|     DOCS: https://spacy.io/api/cli#debug-profile | ||||
|     """ | ||||
|     if ctx.parent.command.name == NAME:  # called as top-level command | ||||
|         msg.warn( | ||||
|  |  | |||
|  | @ -22,7 +22,7 @@ def project_assets_cli( | |||
|     provided in the project.yml, the file is only downloaded if no local file | ||||
|     with the same checksum exists. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-assets | ||||
|     DOCS: https://spacy.io/api/cli#project-assets | ||||
|     """ | ||||
|     project_assets(project_dir, sparse_checkout=sparse_checkout) | ||||
| 
 | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ def project_clone_cli( | |||
|     defaults to the official spaCy template repo, but can be customized | ||||
|     (including using a private repo). | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-clone | ||||
|     DOCS: https://spacy.io/api/cli#project-clone | ||||
|     """ | ||||
|     if dest is None: | ||||
|         dest = Path.cwd() / Path(name).parts[-1] | ||||
|  |  | |||
|  | @ -5,7 +5,7 @@ from ...util import working_dir | |||
| from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config | ||||
| 
 | ||||
| 
 | ||||
| DOCS_URL = "https://nightly.spacy.io" | ||||
| DOCS_URL = "https://spacy.io" | ||||
| INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which | ||||
| > is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`: | ||||
| > `pip install spacy-nightly`. Make sure to use a fresh virtual environment.""" | ||||
|  | @ -44,7 +44,7 @@ def project_document_cli( | |||
|     auto-generated section and only the auto-generated docs will be replaced | ||||
|     when you re-run the command. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-document | ||||
|     DOCS: https://spacy.io/api/cli#project-document | ||||
|     """ | ||||
|     project_document(project_dir, output_file, no_emoji=no_emoji) | ||||
| 
 | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ def project_update_dvc_cli( | |||
|     workflow is used. The DVC config will only be updated if the project.yml | ||||
|     changed. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-dvc | ||||
|     DOCS: https://spacy.io/api/cli#project-dvc | ||||
|     """ | ||||
|     project_update_dvc(project_dir, workflow, verbose=verbose, force=force) | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,7 +19,7 @@ def project_pull_cli( | |||
|     A storage can be anything that the smart-open library can upload to, e.g. | ||||
|     AWS, Google Cloud Storage, SSH, local directories etc. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-pull | ||||
|     DOCS: https://spacy.io/api/cli#project-pull | ||||
|     """ | ||||
|     for url, output_path in project_pull(project_dir, remote): | ||||
|         if url is not None: | ||||
|  |  | |||
|  | @ -18,7 +18,7 @@ def project_push_cli( | |||
|     the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH, | ||||
|     local directories etc. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-push | ||||
|     DOCS: https://spacy.io/api/cli#project-push | ||||
|     """ | ||||
|     for output_path, url in project_push(project_dir, remote): | ||||
|         if url is None: | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ def project_run_cli( | |||
|     commands define dependencies and/or outputs, they will only be re-run if | ||||
|     state has changed. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-run | ||||
|     DOCS: https://spacy.io/api/cli#project-run | ||||
|     """ | ||||
|     if show_help or not subcommand: | ||||
|         print_run_help(project_dir, subcommand) | ||||
|  |  | |||
|  | @ -37,7 +37,7 @@ def train_cli( | |||
|     used to register custom functions and architectures that can then be | ||||
|     referenced in the config. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#train | ||||
|     DOCS: https://spacy.io/api/cli#train | ||||
|     """ | ||||
|     util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) | ||||
|     # Make sure all files and paths exists if they are needed | ||||
|  |  | |||
|  | @ -17,7 +17,7 @@ def validate_cli(): | |||
|     if the installed packages are compatible and shows upgrade instructions if | ||||
|     available. Should be run after `pip install -U spacy`. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#validate | ||||
|     DOCS: https://spacy.io/api/cli#validate | ||||
|     """ | ||||
|     validate() | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,8 +1,8 @@ | |||
| """ | ||||
| spaCy's built in visualization suite for dependencies and named entities. | ||||
| 
 | ||||
| DOCS: https://nightly.spacy.io/api/top-level#displacy | ||||
| USAGE: https://nightly.spacy.io/usage/visualizers | ||||
| DOCS: https://spacy.io/api/top-level#displacy | ||||
| USAGE: https://spacy.io/usage/visualizers | ||||
| """ | ||||
| from typing import Union, Iterable, Optional, Dict, Any, Callable | ||||
| import warnings | ||||
|  | @ -37,8 +37,8 @@ def render( | |||
|     manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts. | ||||
|     RETURNS (str): Rendered HTML markup. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/top-level#displacy.render | ||||
|     USAGE: https://nightly.spacy.io/usage/visualizers | ||||
|     DOCS: https://spacy.io/api/top-level#displacy.render | ||||
|     USAGE: https://spacy.io/usage/visualizers | ||||
|     """ | ||||
|     factories = { | ||||
|         "dep": (DependencyRenderer, parse_deps), | ||||
|  | @ -88,8 +88,8 @@ def serve( | |||
|     port (int): Port to serve visualisation. | ||||
|     host (str): Host to serve visualisation. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/top-level#displacy.serve | ||||
|     USAGE: https://nightly.spacy.io/usage/visualizers | ||||
|     DOCS: https://spacy.io/api/top-level#displacy.serve | ||||
|     USAGE: https://spacy.io/usage/visualizers | ||||
|     """ | ||||
|     from wsgiref import simple_server | ||||
| 
 | ||||
|  |  | |||
|  | @ -20,7 +20,7 @@ class Warnings: | |||
|             "generate a dependency visualization for it. Make sure the Doc " | ||||
|             "was processed with a model that supports dependency parsing, and " | ||||
|             "not just a language class like `English()`. For more info, see " | ||||
|             "the docs:\nhttps://nightly.spacy.io/usage/models") | ||||
|             "the docs:\nhttps://spacy.io/usage/models") | ||||
|     W006 = ("No entities to visualize found in Doc object. If this is " | ||||
|             "surprising to you, make sure the Doc was processed using a model " | ||||
|             "that supports named entity recognition, and check the `doc.ents` " | ||||
|  | @ -86,7 +86,7 @@ class Warnings: | |||
|             "the config block to replace its token-to-vector listener with a copy " | ||||
|             "and make it independent. For example, `replace_listeners = " | ||||
|             "[\"model.tok2vec\"]` See the documentation for details: " | ||||
|             "https://nightly.spacy.io/usage/training#config-components-listeners") | ||||
|             "https://spacy.io/usage/training#config-components-listeners") | ||||
|     W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' " | ||||
|             "depends on it via a listener and is frozen. This means that the " | ||||
|             "performance of '{listener}' will be degraded. You can either freeze " | ||||
|  | @ -95,12 +95,12 @@ class Warnings: | |||
|             "the config block to replace its token-to-vector listener with a copy " | ||||
|             "and make it independent. For example, `replace_listeners = " | ||||
|             "[\"model.tok2vec\"]` See the documentation for details: " | ||||
|             "https://nightly.spacy.io/usage/training#config-components-listeners") | ||||
|             "https://spacy.io/usage/training#config-components-listeners") | ||||
|     W088 = ("The pipeline component {name} implements a `begin_training` " | ||||
|             "method, which won't be called by spaCy. As of v3.0, `begin_training` " | ||||
|             "has been renamed to `initialize`, so you likely want to rename the " | ||||
|             "component method. See the documentation for details: " | ||||
|             "https://nightly.spacy.io/api/language#initialize") | ||||
|             "https://spacy.io/api/language#initialize") | ||||
|     W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed " | ||||
|             "to `nlp.initialize`.") | ||||
|     W090 = ("Could not locate any {format} files in path '{path}'.") | ||||
|  | @ -180,7 +180,7 @@ class Errors: | |||
|     E010 = ("Word vectors set to length 0. This may be because you don't have " | ||||
|             "a model installed or loaded, or because your model doesn't " | ||||
|             "include word vectors. For more info, see the docs:\n" | ||||
|             "https://nightly.spacy.io/usage/models") | ||||
|             "https://spacy.io/usage/models") | ||||
|     E011 = ("Unknown operator: '{op}'. Options: {opts}") | ||||
|     E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}") | ||||
|     E016 = ("MultitaskObjective target should be function or one of: dep, " | ||||
|  | @ -211,7 +211,7 @@ class Errors: | |||
|     E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}") | ||||
|     E029 = ("`noun_chunks` requires the dependency parse, which requires a " | ||||
|             "statistical model to be installed and loaded. For more info, see " | ||||
|             "the documentation:\nhttps://nightly.spacy.io/usage/models") | ||||
|             "the documentation:\nhttps://spacy.io/usage/models") | ||||
|     E030 = ("Sentence boundaries unset. You can add the 'sentencizer' " | ||||
|             "component to the pipeline with: `nlp.add_pipe('sentencizer')`. " | ||||
|             "Alternatively, add the dependency parser or sentence recognizer, " | ||||
|  | @ -318,7 +318,7 @@ class Errors: | |||
|     E102 = ("Can't merge non-disjoint spans. '{token}' is already part of " | ||||
|             "tokens to merge. If you want to find the longest non-overlapping " | ||||
|             "spans, you can use the util.filter_spans helper:\n" | ||||
|             "https://nightly.spacy.io/api/top-level#util.filter_spans") | ||||
|             "https://spacy.io/api/top-level#util.filter_spans") | ||||
|     E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A " | ||||
|             "token can only be part of one entity, so make sure the entities " | ||||
|             "you're setting don't overlap.") | ||||
|  | @ -536,9 +536,9 @@ class Errors: | |||
|             "solve this, remove the existing directories in the output directory.") | ||||
|     E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. " | ||||
|             "Try checking whitespace and delimiters. See " | ||||
|             "https://nightly.spacy.io/api/cli#convert") | ||||
|             "https://spacy.io/api/cli#convert") | ||||
|     E903 = ("The token-per-line NER file is not formatted correctly. Try checking " | ||||
|             "whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert") | ||||
|             "whitespace and delimiters. See https://spacy.io/api/cli#convert") | ||||
|     E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This " | ||||
|             "dimension refers to the output width, after the linear projection " | ||||
|             "has been applied.") | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ cdef class Candidate: | |||
|     algorithm which will disambiguate the various candidates to the correct one. | ||||
|     Each candidate (alias, entity) pair is assigned to a certain prior probability. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/kb/#candidate_init | ||||
|     DOCS: https://spacy.io/api/kb/#candidate_init | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob): | ||||
|  | @ -81,7 +81,7 @@ cdef class KnowledgeBase: | |||
|     """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases, | ||||
|     to support entity linking of named entities to real-world concepts. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/kb | ||||
|     DOCS: https://spacy.io/api/kb | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, Vocab vocab, entity_vector_length): | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ class Language: | |||
|         object and processing pipeline. | ||||
|     lang (str): Two-letter language ID, i.e. ISO code. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/language | ||||
|     DOCS: https://spacy.io/api/language | ||||
|     """ | ||||
| 
 | ||||
|     Defaults = BaseDefaults | ||||
|  | @ -141,7 +141,7 @@ class Language: | |||
|             returns a tokenizer. | ||||
|         batch_size (int): Default batch size for pipe and evaluate. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#init | ||||
|         DOCS: https://spacy.io/api/language#init | ||||
|         """ | ||||
|         # We're only calling this to import all factories provided via entry | ||||
|         # points. The factory decorator applied to these functions takes care | ||||
|  | @ -194,7 +194,7 @@ class Language: | |||
| 
 | ||||
|         RETURNS (Dict[str, Any]): The meta. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#meta | ||||
|         DOCS: https://spacy.io/api/language#meta | ||||
|         """ | ||||
|         spacy_version = util.get_model_version_range(about.__version__) | ||||
|         if self.vocab.lang: | ||||
|  | @ -235,7 +235,7 @@ class Language: | |||
| 
 | ||||
|         RETURNS (thinc.api.Config): The config. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#config | ||||
|         DOCS: https://spacy.io/api/language#config | ||||
|         """ | ||||
|         self._config.setdefault("nlp", {}) | ||||
|         self._config.setdefault("training", {}) | ||||
|  | @ -444,7 +444,7 @@ class Language: | |||
|             the score won't be shown in the logs or be weighted. | ||||
|         func (Optional[Callable]): Factory function if not used as a decorator. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#factory | ||||
|         DOCS: https://spacy.io/api/language#factory | ||||
|         """ | ||||
|         if not isinstance(name, str): | ||||
|             raise ValueError(Errors.E963.format(decorator="factory")) | ||||
|  | @ -524,7 +524,7 @@ class Language: | |||
|             Used for pipeline analysis. | ||||
|         func (Optional[Callable]): Factory function if not used as a decorator. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#component | ||||
|         DOCS: https://spacy.io/api/language#component | ||||
|         """ | ||||
|         if name is not None and not isinstance(name, str): | ||||
|             raise ValueError(Errors.E963.format(decorator="component")) | ||||
|  | @ -590,7 +590,7 @@ class Language: | |||
|         name (str): Name of pipeline component to get. | ||||
|         RETURNS (callable): The pipeline component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#get_pipe | ||||
|         DOCS: https://spacy.io/api/language#get_pipe | ||||
|         """ | ||||
|         for pipe_name, component in self._components: | ||||
|             if pipe_name == name: | ||||
|  | @ -619,7 +619,7 @@ class Language: | |||
|             arguments and types expected by the factory. | ||||
|         RETURNS (Callable[[Doc], Doc]): The pipeline component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#create_pipe | ||||
|         DOCS: https://spacy.io/api/language#create_pipe | ||||
|         """ | ||||
|         name = name if name is not None else factory_name | ||||
|         if not isinstance(config, dict): | ||||
|  | @ -740,7 +740,7 @@ class Language: | |||
|             arguments and types expected by the factory. | ||||
|         RETURNS (Callable[[Doc], Doc]): The pipeline component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#add_pipe | ||||
|         DOCS: https://spacy.io/api/language#add_pipe | ||||
|         """ | ||||
|         if not isinstance(factory_name, str): | ||||
|             bad_val = repr(factory_name) | ||||
|  | @ -838,7 +838,7 @@ class Language: | |||
|         name (str): Name of the component. | ||||
|         RETURNS (bool): Whether a component of the name exists in the pipeline. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#has_pipe | ||||
|         DOCS: https://spacy.io/api/language#has_pipe | ||||
|         """ | ||||
|         return name in self.pipe_names | ||||
| 
 | ||||
|  | @ -860,7 +860,7 @@ class Language: | |||
|             arguments and types expected by the factory. | ||||
|         RETURNS (Callable[[Doc], Doc]): The new pipeline component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#replace_pipe | ||||
|         DOCS: https://spacy.io/api/language#replace_pipe | ||||
|         """ | ||||
|         if name not in self.pipe_names: | ||||
|             raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) | ||||
|  | @ -891,7 +891,7 @@ class Language: | |||
|         old_name (str): Name of the component to rename. | ||||
|         new_name (str): New name of the component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#rename_pipe | ||||
|         DOCS: https://spacy.io/api/language#rename_pipe | ||||
|         """ | ||||
|         if old_name not in self.component_names: | ||||
|             raise ValueError( | ||||
|  | @ -916,7 +916,7 @@ class Language: | |||
|         name (str): Name of the component to remove. | ||||
|         RETURNS (tuple): A `(name, component)` tuple of the removed component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#remove_pipe | ||||
|         DOCS: https://spacy.io/api/language#remove_pipe | ||||
|         """ | ||||
|         if name not in self.component_names: | ||||
|             raise ValueError(Errors.E001.format(name=name, opts=self.component_names)) | ||||
|  | @ -972,7 +972,7 @@ class Language: | |||
|             keyword arguments for specific components. | ||||
|         RETURNS (Doc): A container for accessing the annotations. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#call | ||||
|         DOCS: https://spacy.io/api/language#call | ||||
|         """ | ||||
|         doc = self.make_doc(text) | ||||
|         if component_cfg is None: | ||||
|  | @ -1023,7 +1023,7 @@ class Language: | |||
|         disable (str or iterable): The name(s) of the pipes to disable | ||||
|         enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#select_pipes | ||||
|         DOCS: https://spacy.io/api/language#select_pipes | ||||
|         """ | ||||
|         if enable is None and disable is None: | ||||
|             raise ValueError(Errors.E991) | ||||
|  | @ -1081,7 +1081,7 @@ class Language: | |||
|         exclude (Iterable[str]): Names of components that shouldn't be updated. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#update | ||||
|         DOCS: https://spacy.io/api/language#update | ||||
|         """ | ||||
|         if _ is not None: | ||||
|             raise ValueError(Errors.E989) | ||||
|  | @ -1144,7 +1144,7 @@ class Language: | |||
|             >>>     raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)] | ||||
|             >>>     nlp.rehearse(raw_batch) | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#rehearse | ||||
|         DOCS: https://spacy.io/api/language#rehearse | ||||
|         """ | ||||
|         if len(examples) == 0: | ||||
|             return | ||||
|  | @ -1199,7 +1199,7 @@ class Language: | |||
|             provided, will be created using the .create_optimizer() method. | ||||
|         RETURNS (thinc.api.Optimizer): The optimizer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#initialize | ||||
|         DOCS: https://spacy.io/api/language#initialize | ||||
|         """ | ||||
|         if get_examples is None: | ||||
|             util.logger.debug( | ||||
|  | @ -1266,7 +1266,7 @@ class Language: | |||
| 
 | ||||
|         RETURNS (Optimizer): The optimizer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#resume_training | ||||
|         DOCS: https://spacy.io/api/language#resume_training | ||||
|         """ | ||||
|         ops = get_current_ops() | ||||
|         if self.vocab.vectors.data.shape[1] >= 1: | ||||
|  | @ -1293,7 +1293,7 @@ class Language: | |||
|             Function that deals with a failing batch of documents. This callable function should take in | ||||
|             the component's name, the component itself, the offending batch of documents, and the exception | ||||
|             that was thrown. | ||||
|         DOCS: https://nightly.spacy.io/api/language#set_error_handler | ||||
|         DOCS: https://spacy.io/api/language#set_error_handler | ||||
|         """ | ||||
|         self.default_error_handler = error_handler | ||||
|         for name, pipe in self.pipeline: | ||||
|  | @ -1322,7 +1322,7 @@ class Language: | |||
| 
 | ||||
|         RETURNS (Scorer): The scorer containing the evaluation results. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#evaluate | ||||
|         DOCS: https://spacy.io/api/language#evaluate | ||||
|         """ | ||||
|         examples = list(examples) | ||||
|         validate_examples(examples, "Language.evaluate") | ||||
|  | @ -1377,7 +1377,7 @@ class Language: | |||
|             >>> with nlp.use_params(optimizer.averages): | ||||
|             >>>     nlp.to_disk("/tmp/checkpoint") | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#use_params | ||||
|         DOCS: https://spacy.io/api/language#use_params | ||||
|         """ | ||||
|         if not params: | ||||
|             yield | ||||
|  | @ -1424,7 +1424,7 @@ class Language: | |||
|         n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`. | ||||
|         YIELDS (Doc): Documents in the order of the original text. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#pipe | ||||
|         DOCS: https://spacy.io/api/language#pipe | ||||
|         """ | ||||
|         if n_process == -1: | ||||
|             n_process = mp.cpu_count() | ||||
|  | @ -1568,7 +1568,7 @@ class Language: | |||
|             the types expected by the factory. | ||||
|         RETURNS (Language): The initialized Language class. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#from_config | ||||
|         DOCS: https://spacy.io/api/language#from_config | ||||
|         """ | ||||
|         if auto_fill: | ||||
|             config = Config( | ||||
|  | @ -1712,7 +1712,7 @@ class Language: | |||
|             either be an empty list to not replace any listeners, or a complete | ||||
|             (!) list of the paths to all listener layers used by the model. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#replace_listeners | ||||
|         DOCS: https://spacy.io/api/language#replace_listeners | ||||
|         """ | ||||
|         if tok2vec_name not in self.pipe_names: | ||||
|             err = Errors.E889.format( | ||||
|  | @ -1782,7 +1782,7 @@ class Language: | |||
|             it doesn't exist. | ||||
|         exclude (list): Names of components or serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#to_disk | ||||
|         DOCS: https://spacy.io/api/language#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         serializers = {} | ||||
|  | @ -1811,7 +1811,7 @@ class Language: | |||
|         exclude (list): Names of components or serialization fields to exclude. | ||||
|         RETURNS (Language): The modified `Language` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#from_disk | ||||
|         DOCS: https://spacy.io/api/language#from_disk | ||||
|         """ | ||||
| 
 | ||||
|         def deserialize_meta(path: Path) -> None: | ||||
|  | @ -1859,7 +1859,7 @@ class Language: | |||
|         exclude (list): Names of components or serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized form of the `Language` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#to_bytes | ||||
|         DOCS: https://spacy.io/api/language#to_bytes | ||||
|         """ | ||||
|         serializers = {} | ||||
|         serializers["vocab"] = lambda: self.vocab.to_bytes() | ||||
|  | @ -1883,7 +1883,7 @@ class Language: | |||
|         exclude (list): Names of components or serialization fields to exclude. | ||||
|         RETURNS (Language): The `Language` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/language#from_bytes | ||||
|         DOCS: https://spacy.io/api/language#from_bytes | ||||
|         """ | ||||
| 
 | ||||
|         def deserialize_meta(b): | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ cdef class Lexeme: | |||
|     tag, dependency parse, or lemma (lemmatization depends on the | ||||
|     part-of-speech tag). | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/lexeme | ||||
|     DOCS: https://spacy.io/api/lexeme | ||||
|     """ | ||||
|     def __init__(self, Vocab vocab, attr_t orth): | ||||
|         """Create a Lexeme object. | ||||
|  |  | |||
|  | @ -57,7 +57,7 @@ class Table(OrderedDict): | |||
|         data (dict): The dictionary. | ||||
|         name (str): Optional table name for reference. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#table.from_dict | ||||
|         DOCS: https://spacy.io/api/lookups#table.from_dict | ||||
|         """ | ||||
|         self = cls(name=name) | ||||
|         self.update(data) | ||||
|  | @ -69,7 +69,7 @@ class Table(OrderedDict): | |||
|         name (str): Optional table name for reference. | ||||
|         data (dict): Initial data, used to hint Bloom Filter. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#table.init | ||||
|         DOCS: https://spacy.io/api/lookups#table.init | ||||
|         """ | ||||
|         OrderedDict.__init__(self) | ||||
|         self.name = name | ||||
|  | @ -135,7 +135,7 @@ class Table(OrderedDict): | |||
| 
 | ||||
|         RETURNS (bytes): The serialized table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes | ||||
|         DOCS: https://spacy.io/api/lookups#table.to_bytes | ||||
|         """ | ||||
|         data = { | ||||
|             "name": self.name, | ||||
|  | @ -150,7 +150,7 @@ class Table(OrderedDict): | |||
|         bytes_data (bytes): The data to load. | ||||
|         RETURNS (Table): The loaded table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes | ||||
|         DOCS: https://spacy.io/api/lookups#table.from_bytes | ||||
|         """ | ||||
|         loaded = srsly.msgpack_loads(bytes_data) | ||||
|         data = loaded.get("dict", {}) | ||||
|  | @ -172,7 +172,7 @@ class Lookups: | |||
|     def __init__(self) -> None: | ||||
|         """Initialize the Lookups object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#init | ||||
|         DOCS: https://spacy.io/api/lookups#init | ||||
|         """ | ||||
|         self._tables = {} | ||||
| 
 | ||||
|  | @ -201,7 +201,7 @@ class Lookups: | |||
|         data (dict): Optional data to add to the table. | ||||
|         RETURNS (Table): The newly added table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#add_table | ||||
|         DOCS: https://spacy.io/api/lookups#add_table | ||||
|         """ | ||||
|         if name in self.tables: | ||||
|             raise ValueError(Errors.E158.format(name=name)) | ||||
|  | @ -215,7 +215,7 @@ class Lookups: | |||
|         name (str): Name of the table to set. | ||||
|         table (Table): The Table to set. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#set_table | ||||
|         DOCS: https://spacy.io/api/lookups#set_table | ||||
|         """ | ||||
|         self._tables[name] = table | ||||
| 
 | ||||
|  | @ -227,7 +227,7 @@ class Lookups: | |||
|         default (Any): Optional default value to return if table doesn't exist. | ||||
|         RETURNS (Table): The table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#get_table | ||||
|         DOCS: https://spacy.io/api/lookups#get_table | ||||
|         """ | ||||
|         if name not in self._tables: | ||||
|             if default == UNSET: | ||||
|  | @ -241,7 +241,7 @@ class Lookups: | |||
|         name (str): Name of the table to remove. | ||||
|         RETURNS (Table): The removed table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#remove_table | ||||
|         DOCS: https://spacy.io/api/lookups#remove_table | ||||
|         """ | ||||
|         if name not in self._tables: | ||||
|             raise KeyError(Errors.E159.format(name=name, tables=self.tables)) | ||||
|  | @ -253,7 +253,7 @@ class Lookups: | |||
|         name (str): Name of the table. | ||||
|         RETURNS (bool): Whether a table of that name exists. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#has_table | ||||
|         DOCS: https://spacy.io/api/lookups#has_table | ||||
|         """ | ||||
|         return name in self._tables | ||||
| 
 | ||||
|  | @ -262,7 +262,7 @@ class Lookups: | |||
| 
 | ||||
|         RETURNS (bytes): The serialized Lookups. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#to_bytes | ||||
|         DOCS: https://spacy.io/api/lookups#to_bytes | ||||
|         """ | ||||
|         return srsly.msgpack_dumps(self._tables) | ||||
| 
 | ||||
|  | @ -272,7 +272,7 @@ class Lookups: | |||
|         bytes_data (bytes): The data to load. | ||||
|         RETURNS (Lookups): The loaded Lookups. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#from_bytes | ||||
|         DOCS: https://spacy.io/api/lookups#from_bytes | ||||
|         """ | ||||
|         self._tables = {} | ||||
|         for key, value in srsly.msgpack_loads(bytes_data).items(): | ||||
|  | @ -287,7 +287,7 @@ class Lookups: | |||
| 
 | ||||
|         path (str / Path): The file path. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#to_disk | ||||
|         DOCS: https://spacy.io/api/lookups#to_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         if not path.exists(): | ||||
|  | @ -305,7 +305,7 @@ class Lookups: | |||
|         path (str / Path): The directory path. | ||||
|         RETURNS (Lookups): The loaded lookups. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#from_disk | ||||
|         DOCS: https://spacy.io/api/lookups#from_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         filepath = path / filename | ||||
|  |  | |||
|  | @ -32,8 +32,8 @@ DEF PADDING = 5 | |||
| cdef class Matcher: | ||||
|     """Match sequences of tokens, based on pattern rules. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/matcher | ||||
|     USAGE: https://nightly.spacy.io/usage/rule-based-matching | ||||
|     DOCS: https://spacy.io/api/matcher | ||||
|     USAGE: https://spacy.io/usage/rule-based-matching | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, vocab, validate=True): | ||||
|  |  | |||
|  | @ -20,8 +20,8 @@ cdef class PhraseMatcher: | |||
|     sequences based on lists of token descriptions, the `PhraseMatcher` accepts | ||||
|     match patterns in the form of `Doc` objects. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/phrasematcher | ||||
|     USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher | ||||
|     DOCS: https://spacy.io/api/phrasematcher | ||||
|     USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher | ||||
| 
 | ||||
|     Adapted from FlashText: https://github.com/vi3k6i5/flashtext | ||||
|     MIT License (see `LICENSE`) | ||||
|  | @ -35,7 +35,7 @@ cdef class PhraseMatcher: | |||
|         attr (int / str): Token attribute to match on. | ||||
|         validate (bool): Perform additional validation when patterns are added. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#init | ||||
|         DOCS: https://spacy.io/api/phrasematcher#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self._callbacks = {} | ||||
|  | @ -64,7 +64,7 @@ cdef class PhraseMatcher: | |||
| 
 | ||||
|         RETURNS (int): The number of rules. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#len | ||||
|         DOCS: https://spacy.io/api/phrasematcher#len | ||||
|         """ | ||||
|         return len(self._callbacks) | ||||
| 
 | ||||
|  | @ -74,7 +74,7 @@ cdef class PhraseMatcher: | |||
|         key (str): The match ID. | ||||
|         RETURNS (bool): Whether the matcher contains rules for this match ID. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#contains | ||||
|         DOCS: https://spacy.io/api/phrasematcher#contains | ||||
|         """ | ||||
|         return key in self._callbacks | ||||
| 
 | ||||
|  | @ -88,7 +88,7 @@ cdef class PhraseMatcher: | |||
| 
 | ||||
|         key (str): The match ID. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#remove | ||||
|         DOCS: https://spacy.io/api/phrasematcher#remove | ||||
|         """ | ||||
|         if key not in self._docs: | ||||
|             raise KeyError(key) | ||||
|  | @ -167,7 +167,7 @@ cdef class PhraseMatcher: | |||
|             as variable arguments. Will be ignored if a list of patterns is | ||||
|             provided as the second argument. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#add | ||||
|         DOCS: https://spacy.io/api/phrasematcher#add | ||||
|         """ | ||||
|         if docs is None or hasattr(docs, "__call__"):  # old API | ||||
|             on_match = docs | ||||
|  | @ -241,7 +241,7 @@ cdef class PhraseMatcher: | |||
|             `doc[start:end]`. The `match_id` is an integer. If as_spans is set | ||||
|             to True, a list of Span objects is returned. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/phrasematcher#call | ||||
|         DOCS: https://spacy.io/api/phrasematcher#call | ||||
|         """ | ||||
|         matches = [] | ||||
|         if doc is None or len(doc) == 0: | ||||
|  |  | |||
|  | @ -32,7 +32,7 @@ class AttributeRuler(Pipe): | |||
|     """Set token-level attributes for tokens matched by Matcher patterns. | ||||
|     Additionally supports importing patterns from tag maps and morph rules. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/attributeruler | ||||
|     DOCS: https://spacy.io/api/attributeruler | ||||
|     """ | ||||
| 
 | ||||
|     def __init__( | ||||
|  | @ -48,7 +48,7 @@ class AttributeRuler(Pipe): | |||
| 
 | ||||
|         RETURNS (AttributeRuler): The AttributeRuler component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#init | ||||
|         DOCS: https://spacy.io/api/attributeruler#init | ||||
|         """ | ||||
|         self.name = name | ||||
|         self.vocab = vocab | ||||
|  | @ -94,7 +94,7 @@ class AttributeRuler(Pipe): | |||
|         doc (Doc): The document to process. | ||||
|         RETURNS (Doc): The processed Doc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#call | ||||
|         DOCS: https://spacy.io/api/attributeruler#call | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         try: | ||||
|  | @ -143,7 +143,7 @@ class AttributeRuler(Pipe): | |||
|         tag_map (dict): The tag map that maps fine-grained tags to | ||||
|             coarse-grained tags and morphological features. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules | ||||
|         DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules | ||||
|         """ | ||||
|         for tag, attrs in tag_map.items(): | ||||
|             pattern = [{"TAG": tag}] | ||||
|  | @ -165,7 +165,7 @@ class AttributeRuler(Pipe): | |||
|             fine-grained tags to coarse-grained tags, lemmas and morphological | ||||
|             features. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules | ||||
|         DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules | ||||
|         """ | ||||
|         for tag in morph_rules: | ||||
|             for word in morph_rules[tag]: | ||||
|  | @ -193,7 +193,7 @@ class AttributeRuler(Pipe): | |||
|         index (int): The index of the token in the matched span to modify. May | ||||
|             be negative to index from the end of the span. Defaults to 0. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#add | ||||
|         DOCS: https://spacy.io/api/attributeruler#add | ||||
|         """ | ||||
|         # We need to make a string here, because otherwise the ID we pass back | ||||
|         # will be interpreted as the hash of a string, rather than an ordinal. | ||||
|  | @ -211,7 +211,7 @@ class AttributeRuler(Pipe): | |||
|             as the arguments to AttributeRuler.add (patterns/attrs/index) to | ||||
|             add as patterns. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns | ||||
|         DOCS: https://spacy.io/api/attributeruler#add_patterns | ||||
|         """ | ||||
|         for p in patterns: | ||||
|             self.add(**p) | ||||
|  | @ -236,7 +236,7 @@ class AttributeRuler(Pipe): | |||
|             Scorer.score_token_attr for the attributes "tag", "pos", "morph" | ||||
|             and "lemma" for the target token attributes. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#score | ||||
|         DOCS: https://spacy.io/api/tagger#score | ||||
|         """ | ||||
| 
 | ||||
|         def morph_key_getter(token, attr): | ||||
|  | @ -273,7 +273,7 @@ class AttributeRuler(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes | ||||
|         DOCS: https://spacy.io/api/attributeruler#to_bytes | ||||
|         """ | ||||
|         serialize = {} | ||||
|         serialize["vocab"] = self.vocab.to_bytes | ||||
|  | @ -289,7 +289,7 @@ class AttributeRuler(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         returns (AttributeRuler): The loaded object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes | ||||
|         DOCS: https://spacy.io/api/attributeruler#from_bytes | ||||
|         """ | ||||
| 
 | ||||
|         def load_patterns(b): | ||||
|  | @ -310,7 +310,7 @@ class AttributeRuler(Pipe): | |||
|         path (Union[Path, str]): A path to a directory. | ||||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#to_disk | ||||
|         DOCS: https://spacy.io/api/attributeruler#to_disk | ||||
|         """ | ||||
|         serialize = { | ||||
|             "vocab": lambda p: self.vocab.to_disk(p), | ||||
|  | @ -327,7 +327,7 @@ class AttributeRuler(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (AttributeRuler): The loaded object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/attributeruler#from_disk | ||||
|         DOCS: https://spacy.io/api/attributeruler#from_disk | ||||
|         """ | ||||
| 
 | ||||
|         def load_patterns(p): | ||||
|  |  | |||
|  | @ -202,7 +202,7 @@ def make_beam_parser( | |||
| cdef class DependencyParser(Parser): | ||||
|     """Pipeline component for dependency parsing. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/dependencyparser | ||||
|     DOCS: https://spacy.io/api/dependencyparser | ||||
|     """ | ||||
|     TransitionSystem = ArcEager | ||||
| 
 | ||||
|  | @ -243,7 +243,7 @@ cdef class DependencyParser(Parser): | |||
|         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans | ||||
|             and Scorer.score_deps. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/dependencyparser#score | ||||
|         DOCS: https://spacy.io/api/dependencyparser#score | ||||
|         """ | ||||
|         def has_sents(doc): | ||||
|             return doc.has_annotation("SENT_START") | ||||
|  |  | |||
|  | @ -94,7 +94,7 @@ def make_entity_linker( | |||
| class EntityLinker(TrainablePipe): | ||||
|     """Pipeline component for named entity linking. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/entitylinker | ||||
|     DOCS: https://spacy.io/api/entitylinker | ||||
|     """ | ||||
| 
 | ||||
|     NIL = "NIL"  # string used to refer to a non-existing link | ||||
|  | @ -124,7 +124,7 @@ class EntityLinker(TrainablePipe): | |||
|         get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that | ||||
|             produces a list of candidates, given a certain knowledge base and a textual mention. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#init | ||||
|         DOCS: https://spacy.io/api/entitylinker#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -171,7 +171,7 @@ class EntityLinker(TrainablePipe): | |||
|             Note that providing this argument, will overwrite all data accumulated in the current KB. | ||||
|             Use this only when loading a KB as-such from file. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#initialize | ||||
|         DOCS: https://spacy.io/api/entitylinker#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "EntityLinker.initialize") | ||||
|         if kb_loader is not None: | ||||
|  | @ -207,7 +207,7 @@ class EntityLinker(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#update | ||||
|         DOCS: https://spacy.io/api/entitylinker#update | ||||
|         """ | ||||
|         self.validate_kb() | ||||
|         if losses is None: | ||||
|  | @ -283,7 +283,7 @@ class EntityLinker(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to predict. | ||||
|         RETURNS (List[int]): The models prediction for each document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#predict | ||||
|         DOCS: https://spacy.io/api/entitylinker#predict | ||||
|         """ | ||||
|         self.validate_kb() | ||||
|         entity_count = 0 | ||||
|  | @ -380,7 +380,7 @@ class EntityLinker(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations | ||||
|         DOCS: https://spacy.io/api/entitylinker#set_annotations | ||||
|         """ | ||||
|         count_ents = len([ent for doc in docs for ent in doc.ents]) | ||||
|         if count_ents != len(kb_ids): | ||||
|  | @ -399,7 +399,7 @@ class EntityLinker(TrainablePipe): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores. | ||||
| 
 | ||||
|         DOCS TODO: https://nightly.spacy.io/api/entity_linker#score | ||||
|         DOCS TODO: https://spacy.io/api/entity_linker#score | ||||
|         """ | ||||
|         validate_examples(examples, "EntityLinker.score") | ||||
|         return Scorer.score_links(examples, negative_labels=[self.NIL]) | ||||
|  | @ -412,7 +412,7 @@ class EntityLinker(TrainablePipe): | |||
|         path (str / Path): Path to a directory. | ||||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#to_disk | ||||
|         DOCS: https://spacy.io/api/entitylinker#to_disk | ||||
|         """ | ||||
|         serialize = {} | ||||
|         serialize["vocab"] = lambda p: self.vocab.to_disk(p) | ||||
|  | @ -430,7 +430,7 @@ class EntityLinker(TrainablePipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (EntityLinker): The modified EntityLinker object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entitylinker#from_disk | ||||
|         DOCS: https://spacy.io/api/entitylinker#from_disk | ||||
|         """ | ||||
| 
 | ||||
|         def load_model(p): | ||||
|  |  | |||
|  | @ -59,8 +59,8 @@ class EntityRuler(Pipe): | |||
|     purely rule-based entity recognition system. After initialization, the | ||||
|     component is typically added to the pipeline using `nlp.add_pipe`. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/entityruler | ||||
|     USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler | ||||
|     DOCS: https://spacy.io/api/entityruler | ||||
|     USAGE: https://spacy.io/usage/rule-based-matching#entityruler | ||||
|     """ | ||||
| 
 | ||||
|     def __init__( | ||||
|  | @ -94,7 +94,7 @@ class EntityRuler(Pipe): | |||
|             added by the model, overwrite them by matches if necessary. | ||||
|         ent_id_sep (str): Separator used internally for entity IDs. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#init | ||||
|         DOCS: https://spacy.io/api/entityruler#init | ||||
|         """ | ||||
|         self.nlp = nlp | ||||
|         self.name = name | ||||
|  | @ -133,7 +133,7 @@ class EntityRuler(Pipe): | |||
|         doc (Doc): The Doc object in the pipeline. | ||||
|         RETURNS (Doc): The Doc with added entities, if available. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#call | ||||
|         DOCS: https://spacy.io/api/entityruler#call | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         try: | ||||
|  | @ -183,7 +183,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         RETURNS (set): The string labels. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#labels | ||||
|         DOCS: https://spacy.io/api/entityruler#labels | ||||
|         """ | ||||
|         keys = set(self.token_patterns.keys()) | ||||
|         keys.update(self.phrase_patterns.keys()) | ||||
|  | @ -211,7 +211,7 @@ class EntityRuler(Pipe): | |||
|         nlp (Language): The current nlp object the component is part of. | ||||
|         patterns Optional[Iterable[PatternType]]: The list of patterns. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#initialize | ||||
|         DOCS: https://spacy.io/api/entityruler#initialize | ||||
|         """ | ||||
|         self.clear() | ||||
|         if patterns: | ||||
|  | @ -223,7 +223,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         RETURNS (set): The string entity ids. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#ent_ids | ||||
|         DOCS: https://spacy.io/api/entityruler#ent_ids | ||||
|         """ | ||||
|         keys = set(self.token_patterns.keys()) | ||||
|         keys.update(self.phrase_patterns.keys()) | ||||
|  | @ -241,7 +241,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         RETURNS (list): The original patterns, one dictionary per pattern. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#patterns | ||||
|         DOCS: https://spacy.io/api/entityruler#patterns | ||||
|         """ | ||||
|         all_patterns = [] | ||||
|         for label, patterns in self.token_patterns.items(): | ||||
|  | @ -268,7 +268,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         patterns (list): The patterns to add. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#add_patterns | ||||
|         DOCS: https://spacy.io/api/entityruler#add_patterns | ||||
|         """ | ||||
| 
 | ||||
|         # disable the nlp components after this one in case they hadn't been initialized / deserialised yet | ||||
|  | @ -366,7 +366,7 @@ class EntityRuler(Pipe): | |||
|         patterns_bytes (bytes): The bytestring to load. | ||||
|         RETURNS (EntityRuler): The loaded entity ruler. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#from_bytes | ||||
|         DOCS: https://spacy.io/api/entityruler#from_bytes | ||||
|         """ | ||||
|         cfg = srsly.msgpack_loads(patterns_bytes) | ||||
|         self.clear() | ||||
|  | @ -388,7 +388,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         RETURNS (bytes): The serialized patterns. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#to_bytes | ||||
|         DOCS: https://spacy.io/api/entityruler#to_bytes | ||||
|         """ | ||||
|         serial = { | ||||
|             "overwrite": self.overwrite, | ||||
|  | @ -407,7 +407,7 @@ class EntityRuler(Pipe): | |||
|         path (str / Path): The JSONL file to load. | ||||
|         RETURNS (EntityRuler): The loaded entity ruler. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#from_disk | ||||
|         DOCS: https://spacy.io/api/entityruler#from_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         self.clear() | ||||
|  | @ -443,7 +443,7 @@ class EntityRuler(Pipe): | |||
| 
 | ||||
|         path (str / Path): The JSONL file to save. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityruler#to_disk | ||||
|         DOCS: https://spacy.io/api/entityruler#to_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         cfg = { | ||||
|  |  | |||
|  | @ -18,7 +18,7 @@ def merge_noun_chunks(doc: Doc) -> Doc: | |||
|     doc (Doc): The Doc object. | ||||
|     RETURNS (Doc): The Doc object with merged noun chunks. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks | ||||
|     DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks | ||||
|     """ | ||||
|     if not doc.has_annotation("DEP"): | ||||
|         return doc | ||||
|  | @ -40,7 +40,7 @@ def merge_entities(doc: Doc): | |||
|     doc (Doc): The Doc object. | ||||
|     RETURNS (Doc): The Doc object with merged entities. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities | ||||
|     DOCS: https://spacy.io/api/pipeline-functions#merge_entities | ||||
|     """ | ||||
|     with doc.retokenize() as retokenizer: | ||||
|         for ent in doc.ents: | ||||
|  | @ -57,7 +57,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc: | |||
|     label (str): The subtoken dependency label. | ||||
|     RETURNS (Doc): The Doc object with merged subtokens. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens | ||||
|     DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens | ||||
|     """ | ||||
|     # TODO: make stateful component with "label" config | ||||
|     merger = Matcher(doc.vocab) | ||||
|  |  | |||
|  | @ -32,7 +32,7 @@ class Lemmatizer(Pipe): | |||
|     The Lemmatizer supports simple part-of-speech-sensitive suffix rules and | ||||
|     lookup tables. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/lemmatizer | ||||
|     DOCS: https://spacy.io/api/lemmatizer | ||||
|     """ | ||||
| 
 | ||||
|     @classmethod | ||||
|  | @ -68,7 +68,7 @@ class Lemmatizer(Pipe): | |||
|         overwrite (bool): Whether to overwrite existing lemmas. Defaults to | ||||
|             `False`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#init | ||||
|         DOCS: https://spacy.io/api/lemmatizer#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -98,7 +98,7 @@ class Lemmatizer(Pipe): | |||
|         doc (Doc): The Doc to process. | ||||
|         RETURNS (Doc): The processed Doc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#call | ||||
|         DOCS: https://spacy.io/api/lemmatizer#call | ||||
|         """ | ||||
|         if not self._validated: | ||||
|             self._validate_tables(Errors.E1004) | ||||
|  | @ -159,7 +159,7 @@ class Lemmatizer(Pipe): | |||
|         token (Token): The token to lemmatize. | ||||
|         RETURNS (list): The available lemmas for the string. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize | ||||
|         DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize | ||||
|         """ | ||||
|         lookup_table = self.lookups.get_table("lemma_lookup", {}) | ||||
|         result = lookup_table.get(token.text, token.text) | ||||
|  | @ -173,7 +173,7 @@ class Lemmatizer(Pipe): | |||
|         token (Token): The token to lemmatize. | ||||
|         RETURNS (list): The available lemmas for the string. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize | ||||
|         DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize | ||||
|         """ | ||||
|         cache_key = (token.orth, token.pos, token.morph) | ||||
|         if cache_key in self.cache: | ||||
|  | @ -241,7 +241,7 @@ class Lemmatizer(Pipe): | |||
|         token (Token): The token. | ||||
|         RETURNS (bool): Whether the token is a base form. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form | ||||
|         DOCS: https://spacy.io/api/lemmatizer#is_base_form | ||||
|         """ | ||||
|         return False | ||||
| 
 | ||||
|  | @ -251,7 +251,7 @@ class Lemmatizer(Pipe): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#score | ||||
|         DOCS: https://spacy.io/api/lemmatizer#score | ||||
|         """ | ||||
|         validate_examples(examples, "Lemmatizer.score") | ||||
|         return Scorer.score_token_attr(examples, "lemma", **kwargs) | ||||
|  | @ -264,7 +264,7 @@ class Lemmatizer(Pipe): | |||
|         path (str / Path): Path to a directory. | ||||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#to_disk | ||||
|         DOCS: https://spacy.io/api/lemmatizer#to_disk | ||||
|         """ | ||||
|         serialize = {} | ||||
|         serialize["vocab"] = lambda p: self.vocab.to_disk(p) | ||||
|  | @ -280,7 +280,7 @@ class Lemmatizer(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (Lemmatizer): The modified Lemmatizer object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#from_disk | ||||
|         DOCS: https://spacy.io/api/lemmatizer#from_disk | ||||
|         """ | ||||
|         deserialize = {} | ||||
|         deserialize["vocab"] = lambda p: self.vocab.from_disk(p) | ||||
|  | @ -295,7 +295,7 @@ class Lemmatizer(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#to_bytes | ||||
|         DOCS: https://spacy.io/api/lemmatizer#to_bytes | ||||
|         """ | ||||
|         serialize = {} | ||||
|         serialize["vocab"] = self.vocab.to_bytes | ||||
|  | @ -311,7 +311,7 @@ class Lemmatizer(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (Lemmatizer): The loaded Lemmatizer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lemmatizer#from_bytes | ||||
|         DOCS: https://spacy.io/api/lemmatizer#from_bytes | ||||
|         """ | ||||
|         deserialize = {} | ||||
|         deserialize["vocab"] = lambda b: self.vocab.from_bytes(b) | ||||
|  |  | |||
|  | @ -75,7 +75,7 @@ class Morphologizer(Tagger): | |||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#init | ||||
|         DOCS: https://spacy.io/api/morphologizer#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -104,7 +104,7 @@ class Morphologizer(Tagger): | |||
|         label (str): The label to add. | ||||
|         RETURNS (int): 0 if label is already present, otherwise 1. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#add_label | ||||
|         DOCS: https://spacy.io/api/morphologizer#add_label | ||||
|         """ | ||||
|         if not isinstance(label, str): | ||||
|             raise ValueError(Errors.E187) | ||||
|  | @ -134,7 +134,7 @@ class Morphologizer(Tagger): | |||
|             returns a representative sample of gold-standard Example objects. | ||||
|         nlp (Language): The current nlp object the component is part of. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#initialize | ||||
|         DOCS: https://spacy.io/api/morphologizer#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "Morphologizer.initialize") | ||||
|         if labels is not None: | ||||
|  | @ -185,7 +185,7 @@ class Morphologizer(Tagger): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         batch_tag_ids: The IDs to set, produced by Morphologizer.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations | ||||
|         DOCS: https://spacy.io/api/morphologizer#set_annotations | ||||
|         """ | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|  | @ -208,7 +208,7 @@ class Morphologizer(Tagger): | |||
|         scores: Scores representing the model's predictions. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#get_loss | ||||
|         DOCS: https://spacy.io/api/morphologizer#get_loss | ||||
|         """ | ||||
|         validate_examples(examples, "Morphologizer.get_loss") | ||||
|         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) | ||||
|  | @ -254,7 +254,7 @@ class Morphologizer(Tagger): | |||
|             Scorer.score_token_attr for the attributes "pos" and "morph" and | ||||
|             Scorer.score_token_attr_per_feat for the attribute "morph". | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#score | ||||
|         DOCS: https://spacy.io/api/morphologizer#score | ||||
|         """ | ||||
|         def morph_key_getter(token, attr): | ||||
|             return getattr(token, attr).key | ||||
|  |  | |||
|  | @ -163,7 +163,7 @@ def make_beam_ner( | |||
| cdef class EntityRecognizer(Parser): | ||||
|     """Pipeline component for named entity recognition. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/entityrecognizer | ||||
|     DOCS: https://spacy.io/api/entityrecognizer | ||||
|     """ | ||||
|     TransitionSystem = BiluoPushDown | ||||
| 
 | ||||
|  | @ -194,7 +194,7 @@ cdef class EntityRecognizer(Parser): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The NER precision, recall and f-scores. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/entityrecognizer#score | ||||
|         DOCS: https://spacy.io/api/entityrecognizer#score | ||||
|         """ | ||||
|         validate_examples(examples, "EntityRecognizer.score") | ||||
|         return get_ner_prf(examples) | ||||
|  |  | |||
|  | @ -16,7 +16,7 @@ cdef class Pipe: | |||
|     Trainable pipeline components like the EntityRecognizer or TextCategorizer | ||||
|     should inherit from the subclass 'TrainablePipe'. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/pipe | ||||
|     DOCS: https://spacy.io/api/pipe | ||||
|     """ | ||||
| 
 | ||||
|     @classmethod | ||||
|  | @ -34,7 +34,7 @@ cdef class Pipe: | |||
|         docs (Doc): The Doc to process. | ||||
|         RETURNS (Doc): The processed Doc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#call | ||||
|         DOCS: https://spacy.io/api/pipe#call | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name)) | ||||
| 
 | ||||
|  | @ -47,7 +47,7 @@ cdef class Pipe: | |||
|         batch_size (int): The number of documents to buffer. | ||||
|         YIELDS (Doc): Processed documents in order. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#pipe | ||||
|         DOCS: https://spacy.io/api/pipe#pipe | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         for doc in stream: | ||||
|  | @ -69,7 +69,7 @@ cdef class Pipe: | |||
|             returns a representative sample of gold-standard Example objects. | ||||
|         nlp (Language): The current nlp object the component is part of. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#initialize | ||||
|         DOCS: https://spacy.io/api/pipe#initialize | ||||
|         """ | ||||
|         pass | ||||
| 
 | ||||
|  | @ -79,7 +79,7 @@ cdef class Pipe: | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#score | ||||
|         DOCS: https://spacy.io/api/pipe#score | ||||
|         """ | ||||
|         return {} | ||||
| 
 | ||||
|  | @ -111,7 +111,7 @@ cdef class Pipe: | |||
|             the component's name, the component itself, the offending batch of documents, and the exception | ||||
|             that was thrown. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#set_error_handler | ||||
|         DOCS: https://spacy.io/api/pipe#set_error_handler | ||||
|         """ | ||||
|         self.error_handler = error_handler | ||||
| 
 | ||||
|  | @ -120,7 +120,7 @@ cdef class Pipe: | |||
| 
 | ||||
|         RETURNS (Callable): The error handler, or if it's not set a default function that just reraises. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#get_error_handler | ||||
|         DOCS: https://spacy.io/api/pipe#get_error_handler | ||||
|         """ | ||||
|         if hasattr(self, "error_handler"): | ||||
|             return self.error_handler | ||||
|  |  | |||
|  | @ -26,7 +26,7 @@ def make_sentencizer( | |||
| class Sentencizer(Pipe): | ||||
|     """Segment the Doc into sentences using a rule-based strategy. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/sentencizer | ||||
|     DOCS: https://spacy.io/api/sentencizer | ||||
|     """ | ||||
| 
 | ||||
|     default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹', | ||||
|  | @ -48,7 +48,7 @@ class Sentencizer(Pipe): | |||
|             serialized with the nlp object. | ||||
|         RETURNS (Sentencizer): The sentencizer component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#init | ||||
|         DOCS: https://spacy.io/api/sentencizer#init | ||||
|         """ | ||||
|         self.name = name | ||||
|         if punct_chars: | ||||
|  | @ -62,7 +62,7 @@ class Sentencizer(Pipe): | |||
|         doc (Doc): The document to process. | ||||
|         RETURNS (Doc): The processed Doc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#call | ||||
|         DOCS: https://spacy.io/api/sentencizer#call | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         try: | ||||
|  | @ -142,7 +142,7 @@ class Sentencizer(Pipe): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#score | ||||
|         DOCS: https://spacy.io/api/sentencizer#score | ||||
|         """ | ||||
|         def has_sents(doc): | ||||
|             return doc.has_annotation("SENT_START") | ||||
|  | @ -157,7 +157,7 @@ class Sentencizer(Pipe): | |||
| 
 | ||||
|         RETURNS (bytes): The serialized object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes | ||||
|         DOCS: https://spacy.io/api/sentencizer#to_bytes | ||||
|         """ | ||||
|         return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)}) | ||||
| 
 | ||||
|  | @ -167,7 +167,7 @@ class Sentencizer(Pipe): | |||
|         bytes_data (bytes): The data to load. | ||||
|         returns (Sentencizer): The loaded object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes | ||||
|         DOCS: https://spacy.io/api/sentencizer#from_bytes | ||||
|         """ | ||||
|         cfg = srsly.msgpack_loads(bytes_data) | ||||
|         self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars)) | ||||
|  | @ -176,7 +176,7 @@ class Sentencizer(Pipe): | |||
|     def to_disk(self, path, *, exclude=tuple()): | ||||
|         """Serialize the sentencizer to disk. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#to_disk | ||||
|         DOCS: https://spacy.io/api/sentencizer#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         path = path.with_suffix(".json") | ||||
|  | @ -186,7 +186,7 @@ class Sentencizer(Pipe): | |||
|     def from_disk(self, path, *, exclude=tuple()): | ||||
|         """Load the sentencizer from disk. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencizer#from_disk | ||||
|         DOCS: https://spacy.io/api/sentencizer#from_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         path = path.with_suffix(".json") | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model): | |||
| class SentenceRecognizer(Tagger): | ||||
|     """Pipeline component for sentence segmentation. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/sentencerecognizer | ||||
|     DOCS: https://spacy.io/api/sentencerecognizer | ||||
|     """ | ||||
|     def __init__(self, vocab, model, name="senter"): | ||||
|         """Initialize a sentence recognizer. | ||||
|  | @ -54,7 +54,7 @@ class SentenceRecognizer(Tagger): | |||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#init | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -80,7 +80,7 @@ class SentenceRecognizer(Tagger): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#set_annotations | ||||
|         """ | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|  | @ -105,7 +105,7 @@ class SentenceRecognizer(Tagger): | |||
|         scores: Scores representing the model's predictions. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#get_loss | ||||
|         """ | ||||
|         validate_examples(examples, "SentenceRecognizer.get_loss") | ||||
|         labels = self.labels | ||||
|  | @ -135,7 +135,7 @@ class SentenceRecognizer(Tagger): | |||
|             returns a representative sample of gold-standard Example objects. | ||||
|         nlp (Language): The current nlp object the component is part of. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "SentenceRecognizer.initialize") | ||||
|         doc_sample = [] | ||||
|  | @ -158,7 +158,7 @@ class SentenceRecognizer(Tagger): | |||
| 
 | ||||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#score | ||||
|         DOCS: https://spacy.io/api/sentencerecognizer#score | ||||
|         """ | ||||
|         def has_sents(doc): | ||||
|             return doc.has_annotation("SENT_START") | ||||
|  |  | |||
|  | @ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model): | |||
| class Tagger(TrainablePipe): | ||||
|     """Pipeline component for part-of-speech tagging. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/tagger | ||||
|     DOCS: https://spacy.io/api/tagger | ||||
|     """ | ||||
|     def __init__(self, vocab, model, name="tagger"): | ||||
|         """Initialize a part-of-speech tagger. | ||||
|  | @ -68,7 +68,7 @@ class Tagger(TrainablePipe): | |||
|         name (str): The component instance name, used to add entries to the | ||||
|             losses during training. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#init | ||||
|         DOCS: https://spacy.io/api/tagger#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -85,7 +85,7 @@ class Tagger(TrainablePipe): | |||
| 
 | ||||
|         RETURNS (Tuple[str]): The labels. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#labels | ||||
|         DOCS: https://spacy.io/api/tagger#labels | ||||
|         """ | ||||
|         return tuple(self.cfg["labels"]) | ||||
| 
 | ||||
|  | @ -100,7 +100,7 @@ class Tagger(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to predict. | ||||
|         RETURNS: The models prediction for each document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#predict | ||||
|         DOCS: https://spacy.io/api/tagger#predict | ||||
|         """ | ||||
|         if not any(len(doc) for doc in docs): | ||||
|             # Handle cases where there are no tokens in any docs. | ||||
|  | @ -129,7 +129,7 @@ class Tagger(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         batch_tag_ids: The IDs to set, produced by Tagger.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#set_annotations | ||||
|         DOCS: https://spacy.io/api/tagger#set_annotations | ||||
|         """ | ||||
|         if isinstance(docs, Doc): | ||||
|             docs = [docs] | ||||
|  | @ -155,7 +155,7 @@ class Tagger(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#update | ||||
|         DOCS: https://spacy.io/api/tagger#update | ||||
|         """ | ||||
|         if losses is None: | ||||
|             losses = {} | ||||
|  | @ -190,7 +190,7 @@ class Tagger(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#rehearse | ||||
|         DOCS: https://spacy.io/api/tagger#rehearse | ||||
|         """ | ||||
|         if losses is None: | ||||
|             losses = {} | ||||
|  | @ -219,7 +219,7 @@ class Tagger(TrainablePipe): | |||
|         scores: Scores representing the model's predictions. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#get_loss | ||||
|         DOCS: https://spacy.io/api/tagger#get_loss | ||||
|         """ | ||||
|         validate_examples(examples, "Tagger.get_loss") | ||||
|         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) | ||||
|  | @ -246,7 +246,7 @@ class Tagger(TrainablePipe): | |||
|             `init labels` command. If no labels are provided, the get_examples | ||||
|             callback is used to extract the labels from the data. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#initialize | ||||
|         DOCS: https://spacy.io/api/tagger#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "Tagger.initialize") | ||||
|         if labels is not None: | ||||
|  | @ -278,7 +278,7 @@ class Tagger(TrainablePipe): | |||
|         label (str): The label to add. | ||||
|         RETURNS (int): 0 if label is already present, otherwise 1. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#add_label | ||||
|         DOCS: https://spacy.io/api/tagger#add_label | ||||
|         """ | ||||
|         if not isinstance(label, str): | ||||
|             raise ValueError(Errors.E187) | ||||
|  | @ -296,7 +296,7 @@ class Tagger(TrainablePipe): | |||
|         RETURNS (Dict[str, Any]): The scores, produced by | ||||
|             Scorer.score_token_attr for the attributes "tag". | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#score | ||||
|         DOCS: https://spacy.io/api/tagger#score | ||||
|         """ | ||||
|         validate_examples(examples, "Tagger.score") | ||||
|         return Scorer.score_token_attr(examples, "tag", **kwargs) | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ def make_textcat( | |||
| class TextCategorizer(TrainablePipe): | ||||
|     """Pipeline component for single-label text classification. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/textcategorizer | ||||
|     DOCS: https://spacy.io/api/textcategorizer | ||||
|     """ | ||||
| 
 | ||||
|     def __init__( | ||||
|  | @ -118,7 +118,7 @@ class TextCategorizer(TrainablePipe): | |||
|             losses during training. | ||||
|         threshold (float): Cutoff to consider a prediction "positive". | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#init | ||||
|         DOCS: https://spacy.io/api/textcategorizer#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -131,7 +131,7 @@ class TextCategorizer(TrainablePipe): | |||
|     def labels(self) -> Tuple[str]: | ||||
|         """RETURNS (Tuple[str]): The labels currently added to the component. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#labels | ||||
|         DOCS: https://spacy.io/api/textcategorizer#labels | ||||
|         """ | ||||
|         return tuple(self.cfg["labels"]) | ||||
| 
 | ||||
|  | @ -139,7 +139,7 @@ class TextCategorizer(TrainablePipe): | |||
|     def label_data(self) -> List[str]: | ||||
|         """RETURNS (List[str]): Information about the component's labels. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#label_data | ||||
|         DOCS: https://spacy.io/api/textcategorizer#label_data | ||||
|         """ | ||||
|         return self.labels | ||||
| 
 | ||||
|  | @ -149,7 +149,7 @@ class TextCategorizer(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to predict. | ||||
|         RETURNS: The models prediction for each document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#predict | ||||
|         DOCS: https://spacy.io/api/textcategorizer#predict | ||||
|         """ | ||||
|         if not any(len(doc) for doc in docs): | ||||
|             # Handle cases where there are no tokens in any docs. | ||||
|  | @ -167,7 +167,7 @@ class TextCategorizer(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         scores: The scores to set, produced by TextCategorizer.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations | ||||
|         DOCS: https://spacy.io/api/textcategorizer#set_annotations | ||||
|         """ | ||||
|         for i, doc in enumerate(docs): | ||||
|             for j, label in enumerate(self.labels): | ||||
|  | @ -191,7 +191,7 @@ class TextCategorizer(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#update | ||||
|         DOCS: https://spacy.io/api/textcategorizer#update | ||||
|         """ | ||||
|         if losses is None: | ||||
|             losses = {} | ||||
|  | @ -230,7 +230,7 @@ class TextCategorizer(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse | ||||
|         DOCS: https://spacy.io/api/textcategorizer#rehearse | ||||
|         """ | ||||
|         if losses is not None: | ||||
|             losses.setdefault(self.name, 0.0) | ||||
|  | @ -275,7 +275,7 @@ class TextCategorizer(TrainablePipe): | |||
|         scores: Scores representing the model's predictions. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss | ||||
|         DOCS: https://spacy.io/api/textcategorizer#get_loss | ||||
|         """ | ||||
|         validate_examples(examples, "TextCategorizer.get_loss") | ||||
|         self._validate_categories(examples) | ||||
|  | @ -292,7 +292,7 @@ class TextCategorizer(TrainablePipe): | |||
|         label (str): The label to add. | ||||
|         RETURNS (int): 0 if label is already present, otherwise 1. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#add_label | ||||
|         DOCS: https://spacy.io/api/textcategorizer#add_label | ||||
|         """ | ||||
|         if not isinstance(label, str): | ||||
|             raise ValueError(Errors.E187) | ||||
|  | @ -321,7 +321,7 @@ class TextCategorizer(TrainablePipe): | |||
|             `init labels` command. If no labels are provided, the get_examples | ||||
|             callback is used to extract the labels from the data. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#initialize | ||||
|         DOCS: https://spacy.io/api/textcategorizer#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "TextCategorizer.initialize") | ||||
|         self._validate_categories(get_examples()) | ||||
|  | @ -354,7 +354,7 @@ class TextCategorizer(TrainablePipe): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#score | ||||
|         DOCS: https://spacy.io/api/textcategorizer#score | ||||
|         """ | ||||
|         validate_examples(examples, "TextCategorizer.score") | ||||
|         self._validate_categories(examples) | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ def make_multilabel_textcat( | |||
| class MultiLabel_TextCategorizer(TextCategorizer): | ||||
|     """Pipeline component for multi-label text classification. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer | ||||
|     DOCS: https://spacy.io/api/multilabel_textcategorizer | ||||
|     """ | ||||
| 
 | ||||
|     def __init__( | ||||
|  | @ -123,7 +123,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): | |||
|             losses during training. | ||||
|         threshold (float): Cutoff to consider a prediction "positive". | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#init | ||||
|         DOCS: https://spacy.io/api/multilabel_textcategorizer#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -149,7 +149,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): | |||
|             `init labels` command. If no labels are provided, the get_examples | ||||
|             callback is used to extract the labels from the data. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#initialize | ||||
|         DOCS: https://spacy.io/api/multilabel_textcategorizer#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "MultiLabel_TextCategorizer.initialize") | ||||
|         if labels is None: | ||||
|  | @ -173,7 +173,7 @@ class MultiLabel_TextCategorizer(TextCategorizer): | |||
|         examples (Iterable[Example]): The examples to score. | ||||
|         RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#score | ||||
|         DOCS: https://spacy.io/api/multilabel_textcategorizer#score | ||||
|         """ | ||||
|         validate_examples(examples, "MultiLabel_TextCategorizer.score") | ||||
|         return Scorer.score_cats( | ||||
|  |  | |||
|  | @ -55,7 +55,7 @@ class Tok2Vec(TrainablePipe): | |||
|             a list of Doc objects as input, and output a list of 2d float arrays. | ||||
|         name (str): The component instance name. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tok2vec#init | ||||
|         DOCS: https://spacy.io/api/tok2vec#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -115,7 +115,7 @@ class Tok2Vec(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to predict. | ||||
|         RETURNS: Vector representations for each token in the documents. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tok2vec#predict | ||||
|         DOCS: https://spacy.io/api/tok2vec#predict | ||||
|         """ | ||||
|         tokvecs = self.model.predict(docs) | ||||
|         batch_id = Tok2VecListener.get_batch_id(docs) | ||||
|  | @ -129,7 +129,7 @@ class Tok2Vec(TrainablePipe): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         tokvecses: The tensors to set, produced by Tok2Vec.predict. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations | ||||
|         DOCS: https://spacy.io/api/tok2vec#set_annotations | ||||
|         """ | ||||
|         for doc, tokvecs in zip(docs, tokvecses): | ||||
|             assert tokvecs.shape[0] == len(doc) | ||||
|  | @ -153,7 +153,7 @@ class Tok2Vec(TrainablePipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tok2vec#update | ||||
|         DOCS: https://spacy.io/api/tok2vec#update | ||||
|         """ | ||||
|         if losses is None: | ||||
|             losses = {} | ||||
|  | @ -204,7 +204,7 @@ class Tok2Vec(TrainablePipe): | |||
|             returns a representative sample of gold-standard Example objects. | ||||
|         nlp (Language): The current nlp object the component is part of. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tok2vec#initialize | ||||
|         DOCS: https://spacy.io/api/tok2vec#initialize | ||||
|         """ | ||||
|         validate_get_examples(get_examples, "Tok2Vec.initialize") | ||||
|         doc_sample = [] | ||||
|  |  | |||
|  | @ -20,7 +20,7 @@ cdef class TrainablePipe(Pipe): | |||
|     from it and it defines the interface that components should follow to | ||||
|     function as trainable components in a spaCy pipeline. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/pipe | ||||
|     DOCS: https://spacy.io/api/pipe | ||||
|     """ | ||||
|     def __init__(self, vocab: Vocab, model: Model, name: str, **cfg): | ||||
|         """Initialize a pipeline component. | ||||
|  | @ -30,7 +30,7 @@ cdef class TrainablePipe(Pipe): | |||
|         name (str): The component instance name. | ||||
|         **cfg: Additional settings and config parameters. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#init | ||||
|         DOCS: https://spacy.io/api/pipe#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.model = model | ||||
|  | @ -45,7 +45,7 @@ cdef class TrainablePipe(Pipe): | |||
|         docs (Doc): The Doc to process. | ||||
|         RETURNS (Doc): The processed Doc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#call | ||||
|         DOCS: https://spacy.io/api/pipe#call | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         try: | ||||
|  | @ -67,7 +67,7 @@ cdef class TrainablePipe(Pipe): | |||
|             the exception. | ||||
|         YIELDS (Doc): Processed documents in order. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#pipe | ||||
|         DOCS: https://spacy.io/api/pipe#pipe | ||||
|         """ | ||||
|         error_handler = self.get_error_handler() | ||||
|         for docs in util.minibatch(stream, size=batch_size): | ||||
|  | @ -85,7 +85,7 @@ cdef class TrainablePipe(Pipe): | |||
|         docs (Iterable[Doc]): The documents to predict. | ||||
|         RETURNS: Vector representations of the predictions. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#predict | ||||
|         DOCS: https://spacy.io/api/pipe#predict | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="predict", name=self.name)) | ||||
| 
 | ||||
|  | @ -95,7 +95,7 @@ cdef class TrainablePipe(Pipe): | |||
|         docs (Iterable[Doc]): The documents to modify. | ||||
|         scores: The scores to assign. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#set_annotations | ||||
|         DOCS: https://spacy.io/api/pipe#set_annotations | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="set_annotations", name=self.name)) | ||||
| 
 | ||||
|  | @ -114,7 +114,7 @@ cdef class TrainablePipe(Pipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#update | ||||
|         DOCS: https://spacy.io/api/pipe#update | ||||
|         """ | ||||
|         if losses is None: | ||||
|             losses = {} | ||||
|  | @ -151,7 +151,7 @@ cdef class TrainablePipe(Pipe): | |||
|             Updated using the component name as the key. | ||||
|         RETURNS (Dict[str, float]): The updated losses dictionary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#rehearse | ||||
|         DOCS: https://spacy.io/api/pipe#rehearse | ||||
|         """ | ||||
|         pass | ||||
| 
 | ||||
|  | @ -163,7 +163,7 @@ cdef class TrainablePipe(Pipe): | |||
|         scores: Scores representing the model's predictions. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#get_loss | ||||
|         DOCS: https://spacy.io/api/pipe#get_loss | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_loss", name=self.name)) | ||||
| 
 | ||||
|  | @ -172,7 +172,7 @@ cdef class TrainablePipe(Pipe): | |||
| 
 | ||||
|         RETURNS (thinc.api.Optimizer): The optimizer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#create_optimizer | ||||
|         DOCS: https://spacy.io/api/pipe#create_optimizer | ||||
|         """ | ||||
|         return util.create_default_optimizer() | ||||
| 
 | ||||
|  | @ -186,7 +186,7 @@ cdef class TrainablePipe(Pipe): | |||
|             returns a representative sample of gold-standard Example objects. | ||||
|         nlp (Language): The current nlp object the component is part of. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#initialize | ||||
|         DOCS: https://spacy.io/api/pipe#initialize | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="initialize", name=self.name)) | ||||
| 
 | ||||
|  | @ -199,7 +199,7 @@ cdef class TrainablePipe(Pipe): | |||
|         label (str): The label to add. | ||||
|         RETURNS (int): 0 if label is already present, otherwise 1. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#add_label | ||||
|         DOCS: https://spacy.io/api/pipe#add_label | ||||
|         """ | ||||
|         raise NotImplementedError(Errors.E931.format(parent="Pipe", method="add_label", name=self.name)) | ||||
| 
 | ||||
|  | @ -229,7 +229,7 @@ cdef class TrainablePipe(Pipe): | |||
| 
 | ||||
|         params (dict): The parameter values to use in the model. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#use_params | ||||
|         DOCS: https://spacy.io/api/pipe#use_params | ||||
|         """ | ||||
|         with self.model.use_params(params): | ||||
|             yield | ||||
|  | @ -241,7 +241,7 @@ cdef class TrainablePipe(Pipe): | |||
| 
 | ||||
|         sgd (thinc.api.Optimizer): The optimizer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#finish_update | ||||
|         DOCS: https://spacy.io/api/pipe#finish_update | ||||
|         """ | ||||
|         self.model.finish_update(sgd) | ||||
| 
 | ||||
|  | @ -261,7 +261,7 @@ cdef class TrainablePipe(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#to_bytes | ||||
|         DOCS: https://spacy.io/api/pipe#to_bytes | ||||
|         """ | ||||
|         self._validate_serialization_attrs() | ||||
|         serialize = {} | ||||
|  | @ -277,7 +277,7 @@ cdef class TrainablePipe(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (TrainablePipe): The loaded object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#from_bytes | ||||
|         DOCS: https://spacy.io/api/pipe#from_bytes | ||||
|         """ | ||||
|         self._validate_serialization_attrs() | ||||
| 
 | ||||
|  | @ -301,7 +301,7 @@ cdef class TrainablePipe(Pipe): | |||
|         path (str / Path): Path to a directory. | ||||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#to_disk | ||||
|         DOCS: https://spacy.io/api/pipe#to_disk | ||||
|         """ | ||||
|         self._validate_serialization_attrs() | ||||
|         serialize = {} | ||||
|  | @ -318,7 +318,7 @@ cdef class TrainablePipe(Pipe): | |||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
|         RETURNS (TrainablePipe): The loaded object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#from_disk | ||||
|         DOCS: https://spacy.io/api/pipe#from_disk | ||||
|         """ | ||||
|         self._validate_serialization_attrs() | ||||
| 
 | ||||
|  |  | |||
|  | @ -103,7 +103,7 @@ class Scorer: | |||
|     ) -> None: | ||||
|         """Initialize the Scorer. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#init | ||||
|         DOCS: https://spacy.io/api/scorer#init | ||||
|         """ | ||||
|         self.nlp = nlp | ||||
|         self.cfg = cfg | ||||
|  | @ -119,7 +119,7 @@ class Scorer: | |||
|         examples (Iterable[Example]): The predicted annotations + correct annotations. | ||||
|         RETURNS (Dict): A dictionary of scores. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score | ||||
|         DOCS: https://spacy.io/api/scorer#score | ||||
|         """ | ||||
|         scores = {} | ||||
|         if hasattr(self.nlp.tokenizer, "score"): | ||||
|  | @ -139,7 +139,7 @@ class Scorer: | |||
|         RETURNS (Dict[str, Any]): A dictionary containing the scores | ||||
|             token_acc/p/r/f. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_tokenization | ||||
|         DOCS: https://spacy.io/api/scorer#score_tokenization | ||||
|         """ | ||||
|         acc_score = PRFScore() | ||||
|         prf_score = PRFScore() | ||||
|  | @ -198,7 +198,7 @@ class Scorer: | |||
|         RETURNS (Dict[str, Any]): A dictionary containing the accuracy score | ||||
|             under the key attr_acc. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_token_attr | ||||
|         DOCS: https://spacy.io/api/scorer#score_token_attr | ||||
|         """ | ||||
|         tag_score = PRFScore() | ||||
|         for example in examples: | ||||
|  | @ -317,7 +317,7 @@ class Scorer: | |||
|         RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under | ||||
|             the keys attr_p/r/f and the per-type PRF scores under attr_per_type. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_spans | ||||
|         DOCS: https://spacy.io/api/scorer#score_spans | ||||
|         """ | ||||
|         score = PRFScore() | ||||
|         score_per_type = dict() | ||||
|  | @ -413,7 +413,7 @@ class Scorer: | |||
|                 attr_f_per_type, | ||||
|                 attr_auc_per_type | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_cats | ||||
|         DOCS: https://spacy.io/api/scorer#score_cats | ||||
|         """ | ||||
|         if threshold is None: | ||||
|             threshold = 0.5 if multi_label else 0.0 | ||||
|  | @ -519,7 +519,7 @@ class Scorer: | |||
|         negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL") | ||||
|         RETURNS (Dict[str, Any]): A dictionary containing the scores. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_links | ||||
|         DOCS: https://spacy.io/api/scorer#score_links | ||||
|         """ | ||||
|         f_per_type = {} | ||||
|         for example in examples: | ||||
|  | @ -603,7 +603,7 @@ class Scorer: | |||
|         RETURNS (Dict[str, Any]): A dictionary containing the scores: | ||||
|             attr_uas, attr_las, and attr_las_per_type. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/scorer#score_deps | ||||
|         DOCS: https://spacy.io/api/scorer#score_deps | ||||
|         """ | ||||
|         unlabelled = PRFScore() | ||||
|         labelled = PRFScore() | ||||
|  |  | |||
|  | @ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e | |||
| cdef class StringStore: | ||||
|     """Look up strings by 64-bit hashes. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/stringstore | ||||
|     DOCS: https://spacy.io/api/stringstore | ||||
|     """ | ||||
|     def __init__(self, strings=None, freeze=False): | ||||
|         """Create the StringStore. | ||||
|  |  | |||
|  | @ -31,7 +31,7 @@ cdef class Tokenizer: | |||
|     """Segment text, and create Doc objects with the discovered segment | ||||
|     boundaries. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/tokenizer | ||||
|     DOCS: https://spacy.io/api/tokenizer | ||||
|     """ | ||||
|     def __init__(self, Vocab vocab, rules=None, prefix_search=None, | ||||
|                  suffix_search=None, infix_finditer=None, token_match=None, | ||||
|  | @ -54,7 +54,7 @@ cdef class Tokenizer: | |||
|         EXAMPLE: | ||||
|             >>> tokenizer = Tokenizer(nlp.vocab) | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#init | ||||
|         DOCS: https://spacy.io/api/tokenizer#init | ||||
|         """ | ||||
|         self.mem = Pool() | ||||
|         self._cache = PreshMap() | ||||
|  | @ -147,7 +147,7 @@ cdef class Tokenizer: | |||
|         string (str): The string to tokenize. | ||||
|         RETURNS (Doc): A container for linguistic annotations. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#call | ||||
|         DOCS: https://spacy.io/api/tokenizer#call | ||||
|         """ | ||||
|         doc = self._tokenize_affixes(string, True) | ||||
|         self._apply_special_cases(doc) | ||||
|  | @ -209,7 +209,7 @@ cdef class Tokenizer: | |||
|         Defaults to 1000. | ||||
|         YIELDS (Doc): A sequence of Doc objects, in order. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#pipe | ||||
|         DOCS: https://spacy.io/api/tokenizer#pipe | ||||
|         """ | ||||
|         for text in texts: | ||||
|             yield self(text) | ||||
|  | @ -529,7 +529,7 @@ cdef class Tokenizer: | |||
|             and `.end()` methods, denoting the placement of internal segment | ||||
|             separators, e.g. hyphens. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#find_infix | ||||
|         DOCS: https://spacy.io/api/tokenizer#find_infix | ||||
|         """ | ||||
|         if self.infix_finditer is None: | ||||
|             return 0 | ||||
|  | @ -542,7 +542,7 @@ cdef class Tokenizer: | |||
|         string (str): The string to segment. | ||||
|         RETURNS (int): The length of the prefix if present, otherwise `None`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix | ||||
|         DOCS: https://spacy.io/api/tokenizer#find_prefix | ||||
|         """ | ||||
|         if self.prefix_search is None: | ||||
|             return 0 | ||||
|  | @ -556,7 +556,7 @@ cdef class Tokenizer: | |||
|         string (str): The string to segment. | ||||
|         Returns (int): The length of the suffix if present, otherwise `None`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix | ||||
|         DOCS: https://spacy.io/api/tokenizer#find_suffix | ||||
|         """ | ||||
|         if self.suffix_search is None: | ||||
|             return 0 | ||||
|  | @ -596,7 +596,7 @@ cdef class Tokenizer: | |||
|             a token and its attributes. The `ORTH` fields of the attributes | ||||
|             must exactly match the string when they are concatenated. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case | ||||
|         DOCS: https://spacy.io/api/tokenizer#add_special_case | ||||
|         """ | ||||
|         self._validate_special_case(string, substrings) | ||||
|         substrings = list(substrings) | ||||
|  | @ -635,7 +635,7 @@ cdef class Tokenizer: | |||
|         string (str): The string to tokenize. | ||||
|         RETURNS (list): A list of (pattern_string, token_string) tuples | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#explain | ||||
|         DOCS: https://spacy.io/api/tokenizer#explain | ||||
|         """ | ||||
|         prefix_search = self.prefix_search | ||||
|         suffix_search = self.suffix_search | ||||
|  | @ -718,7 +718,7 @@ cdef class Tokenizer: | |||
|             it doesn't exist. | ||||
|         exclude (list): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#to_disk | ||||
|         DOCS: https://spacy.io/api/tokenizer#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         with path.open("wb") as file_: | ||||
|  | @ -732,7 +732,7 @@ cdef class Tokenizer: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Tokenizer): The modified `Tokenizer` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#from_disk | ||||
|         DOCS: https://spacy.io/api/tokenizer#from_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         with path.open("rb") as file_: | ||||
|  | @ -746,7 +746,7 @@ cdef class Tokenizer: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized form of the `Tokenizer` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes | ||||
|         DOCS: https://spacy.io/api/tokenizer#to_bytes | ||||
|         """ | ||||
|         serializers = { | ||||
|             "vocab": lambda: self.vocab.to_bytes(), | ||||
|  | @ -766,7 +766,7 @@ cdef class Tokenizer: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Tokenizer): The `Tokenizer` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes | ||||
|         DOCS: https://spacy.io/api/tokenizer#from_bytes | ||||
|         """ | ||||
|         data = {} | ||||
|         deserializers = { | ||||
|  |  | |||
|  | @ -24,8 +24,8 @@ from ..strings import get_string_id | |||
| cdef class Retokenizer: | ||||
|     """Helper class for doc.retokenize() context manager. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/doc#retokenize | ||||
|     USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization | ||||
|     DOCS: https://spacy.io/api/doc#retokenize | ||||
|     USAGE: https://spacy.io/usage/linguistic-features#retokenization | ||||
|     """ | ||||
|     cdef Doc doc | ||||
|     cdef list merges | ||||
|  | @ -47,7 +47,7 @@ cdef class Retokenizer: | |||
|         span (Span): The span to merge. | ||||
|         attrs (dict): Attributes to set on the merged token. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge | ||||
|         DOCS: https://spacy.io/api/doc#retokenizer.merge | ||||
|         """ | ||||
|         if (span.start, span.end) in self._spans_to_merge: | ||||
|             return | ||||
|  | @ -73,7 +73,7 @@ cdef class Retokenizer: | |||
|         attrs (dict): Attributes to set on all split tokens. Attribute names | ||||
|             mapped to list of per-token attribute values. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#retokenizer.split | ||||
|         DOCS: https://spacy.io/api/doc#retokenizer.split | ||||
|         """ | ||||
|         if ''.join(orths) != token.text: | ||||
|             raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text)) | ||||
|  |  | |||
|  | @ -62,7 +62,7 @@ class DocBin: | |||
|         store_user_data (bool): Whether to write the `Doc.user_data` to bytes/file. | ||||
|         docs (Iterable[Doc]): Docs to add. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#init | ||||
|         DOCS: https://spacy.io/api/docbin#init | ||||
|         """ | ||||
|         attrs = sorted([intify_attr(attr) for attr in attrs]) | ||||
|         self.version = "0.1" | ||||
|  | @ -88,7 +88,7 @@ class DocBin: | |||
| 
 | ||||
|         doc (Doc): The Doc object to add. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#add | ||||
|         DOCS: https://spacy.io/api/docbin#add | ||||
|         """ | ||||
|         array = doc.to_array(self.attrs) | ||||
|         if len(array.shape) == 1: | ||||
|  | @ -122,7 +122,7 @@ class DocBin: | |||
|         vocab (Vocab): The shared vocab. | ||||
|         YIELDS (Doc): The Doc objects. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#get_docs | ||||
|         DOCS: https://spacy.io/api/docbin#get_docs | ||||
|         """ | ||||
|         for string in self.strings: | ||||
|             vocab[string] | ||||
|  | @ -153,7 +153,7 @@ class DocBin: | |||
| 
 | ||||
|         other (DocBin): The DocBin to merge into the current bin. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#merge | ||||
|         DOCS: https://spacy.io/api/docbin#merge | ||||
|         """ | ||||
|         if self.attrs != other.attrs: | ||||
|             raise ValueError( | ||||
|  | @ -180,7 +180,7 @@ class DocBin: | |||
| 
 | ||||
|         RETURNS (bytes): The serialized DocBin. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#to_bytes | ||||
|         DOCS: https://spacy.io/api/docbin#to_bytes | ||||
|         """ | ||||
|         for tokens in self.tokens: | ||||
|             assert len(tokens.shape) == 2, tokens.shape  # this should never happen | ||||
|  | @ -208,7 +208,7 @@ class DocBin: | |||
|         bytes_data (bytes): The data to load from. | ||||
|         RETURNS (DocBin): The loaded DocBin. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#from_bytes | ||||
|         DOCS: https://spacy.io/api/docbin#from_bytes | ||||
|         """ | ||||
|         try: | ||||
|             msg = srsly.msgpack_loads(zlib.decompress(bytes_data)) | ||||
|  | @ -240,7 +240,7 @@ class DocBin: | |||
| 
 | ||||
|         path (str / Path): The file path. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#to_disk | ||||
|         DOCS: https://spacy.io/api/docbin#to_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         with path.open("wb") as file_: | ||||
|  | @ -252,7 +252,7 @@ class DocBin: | |||
|         path (str / Path): The file path. | ||||
|         RETURNS (DocBin): The loaded DocBin. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/docbin#to_disk | ||||
|         DOCS: https://spacy.io/api/docbin#to_disk | ||||
|         """ | ||||
|         path = ensure_path(path) | ||||
|         with path.open("rb") as file_: | ||||
|  |  | |||
|  | @ -116,7 +116,7 @@ cdef class Doc: | |||
|         >>> from spacy.tokens import Doc | ||||
|         >>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False]) | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/doc | ||||
|     DOCS: https://spacy.io/api/doc | ||||
|     """ | ||||
| 
 | ||||
|     @classmethod | ||||
|  | @ -130,8 +130,8 @@ cdef class Doc: | |||
|         method (callable): Optional method for method extension. | ||||
|         force (bool): Force overwriting existing attribute. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#set_extension | ||||
|         USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         DOCS: https://spacy.io/api/doc#set_extension | ||||
|         USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         """ | ||||
|         if cls.has_extension(name) and not kwargs.get("force", False): | ||||
|             raise ValueError(Errors.E090.format(name=name, obj="Doc")) | ||||
|  | @ -144,7 +144,7 @@ cdef class Doc: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#get_extension | ||||
|         DOCS: https://spacy.io/api/doc#get_extension | ||||
|         """ | ||||
|         return Underscore.doc_extensions.get(name) | ||||
| 
 | ||||
|  | @ -155,7 +155,7 @@ cdef class Doc: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (bool): Whether the extension has been registered. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#has_extension | ||||
|         DOCS: https://spacy.io/api/doc#has_extension | ||||
|         """ | ||||
|         return name in Underscore.doc_extensions | ||||
| 
 | ||||
|  | @ -167,7 +167,7 @@ cdef class Doc: | |||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the | ||||
|             removed extension. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#remove_extension | ||||
|         DOCS: https://spacy.io/api/doc#remove_extension | ||||
|         """ | ||||
|         if not cls.has_extension(name): | ||||
|             raise ValueError(Errors.E046.format(name=name)) | ||||
|  | @ -219,7 +219,7 @@ cdef class Doc: | |||
|             length as words, as IOB tags to assign as token.ent_iob and | ||||
|             token.ent_type. Defaults to None. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#init | ||||
|         DOCS: https://spacy.io/api/doc#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         size = max(20, (len(words) if words is not None else 0)) | ||||
|  | @ -399,7 +399,7 @@ cdef class Doc: | |||
|             every token in the doc. | ||||
|         RETURNS (bool): Whether annotation is present. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#has_annotation | ||||
|         DOCS: https://spacy.io/api/doc#has_annotation | ||||
|         """ | ||||
| 
 | ||||
|         # empty docs are always annotated | ||||
|  | @ -450,7 +450,7 @@ cdef class Doc: | |||
|             You can use negative indices and open-ended ranges, which have | ||||
|             their normal Python semantics. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#getitem | ||||
|         DOCS: https://spacy.io/api/doc#getitem | ||||
|         """ | ||||
|         if isinstance(i, slice): | ||||
|             start, stop = util.normalize_slice(len(self), i.start, i.stop, i.step) | ||||
|  | @ -467,7 +467,7 @@ cdef class Doc: | |||
|         than-Python speeds are required, you can instead access the annotations | ||||
|         as a numpy array, or access the underlying C data directly from Cython. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#iter | ||||
|         DOCS: https://spacy.io/api/doc#iter | ||||
|         """ | ||||
|         cdef int i | ||||
|         for i in range(self.length): | ||||
|  | @ -478,7 +478,7 @@ cdef class Doc: | |||
| 
 | ||||
|         RETURNS (int): The number of tokens in the document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#len | ||||
|         DOCS: https://spacy.io/api/doc#len | ||||
|         """ | ||||
|         return self.length | ||||
| 
 | ||||
|  | @ -519,7 +519,7 @@ cdef class Doc: | |||
|             partially covered by the character span). Defaults to "strict". | ||||
|         RETURNS (Span): The newly constructed object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#char_span | ||||
|         DOCS: https://spacy.io/api/doc#char_span | ||||
|         """ | ||||
|         if not isinstance(label, int): | ||||
|             label = self.vocab.strings.add(label) | ||||
|  | @ -562,7 +562,7 @@ cdef class Doc: | |||
|             `Span`, `Token` and `Lexeme` objects. | ||||
|         RETURNS (float): A scalar similarity score. Higher is more similar. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#similarity | ||||
|         DOCS: https://spacy.io/api/doc#similarity | ||||
|         """ | ||||
|         if "similarity" in self.user_hooks: | ||||
|             return self.user_hooks["similarity"](self, other) | ||||
|  | @ -595,7 +595,7 @@ cdef class Doc: | |||
| 
 | ||||
|         RETURNS (bool): Whether a word vector is associated with the object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#has_vector | ||||
|         DOCS: https://spacy.io/api/doc#has_vector | ||||
|         """ | ||||
|         if "has_vector" in self.user_hooks: | ||||
|             return self.user_hooks["has_vector"](self) | ||||
|  | @ -613,7 +613,7 @@ cdef class Doc: | |||
|         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array | ||||
|             representing the document's semantics. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#vector | ||||
|         DOCS: https://spacy.io/api/doc#vector | ||||
|         """ | ||||
|         def __get__(self): | ||||
|             if "vector" in self.user_hooks: | ||||
|  | @ -641,7 +641,7 @@ cdef class Doc: | |||
| 
 | ||||
|         RETURNS (float): The L2 norm of the vector representation. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#vector_norm | ||||
|         DOCS: https://spacy.io/api/doc#vector_norm | ||||
|         """ | ||||
|         def __get__(self): | ||||
|             if "vector_norm" in self.user_hooks: | ||||
|  | @ -681,7 +681,7 @@ cdef class Doc: | |||
| 
 | ||||
|         RETURNS (tuple): Entities in the document, one `Span` per entity. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#ents | ||||
|         DOCS: https://spacy.io/api/doc#ents | ||||
|         """ | ||||
|         def __get__(self): | ||||
|             cdef int i | ||||
|  | @ -827,7 +827,7 @@ cdef class Doc: | |||
| 
 | ||||
|         YIELDS (Span): Noun chunks in the document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#noun_chunks | ||||
|         DOCS: https://spacy.io/api/doc#noun_chunks | ||||
|         """ | ||||
|         if self.noun_chunks_iterator is None: | ||||
|             raise NotImplementedError(Errors.E894.format(lang=self.vocab.lang)) | ||||
|  | @ -850,7 +850,7 @@ cdef class Doc: | |||
| 
 | ||||
|         YIELDS (Span): Sentences in the document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#sents | ||||
|         DOCS: https://spacy.io/api/doc#sents | ||||
|         """ | ||||
|         if not self.has_annotation("SENT_START"): | ||||
|             raise ValueError(Errors.E030) | ||||
|  | @ -959,7 +959,7 @@ cdef class Doc: | |||
|         attr_id (int): The attribute ID to key the counts. | ||||
|         RETURNS (dict): A dictionary mapping attributes to integer counts. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#count_by | ||||
|         DOCS: https://spacy.io/api/doc#count_by | ||||
|         """ | ||||
|         cdef int i | ||||
|         cdef attr_t attr | ||||
|  | @ -1006,7 +1006,7 @@ cdef class Doc: | |||
|         array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values. | ||||
|         RETURNS (Doc): Itself. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#from_array | ||||
|         DOCS: https://spacy.io/api/doc#from_array | ||||
|         """ | ||||
|         # Handle scalar/list inputs of strings/ints for py_attr_ids | ||||
|         # See also #3064 | ||||
|  | @ -1098,7 +1098,7 @@ cdef class Doc: | |||
|         attrs (list): Optional list of attribute ID ints or attribute name strings. | ||||
|         RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#from_docs | ||||
|         DOCS: https://spacy.io/api/doc#from_docs | ||||
|         """ | ||||
|         if not docs: | ||||
|             return None | ||||
|  | @ -1170,7 +1170,7 @@ cdef class Doc: | |||
|         RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape | ||||
|             (n, n), where n = len(self). | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix | ||||
|         DOCS: https://spacy.io/api/doc#get_lca_matrix | ||||
|         """ | ||||
|         return numpy.asarray(_get_lca_matrix(self, 0, len(self))) | ||||
| 
 | ||||
|  | @ -1203,7 +1203,7 @@ cdef class Doc: | |||
|             it doesn't exist. Paths may be either strings or Path-like objects. | ||||
|         exclude (Iterable[str]): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#to_disk | ||||
|         DOCS: https://spacy.io/api/doc#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         with path.open("wb") as file_: | ||||
|  | @ -1218,7 +1218,7 @@ cdef class Doc: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Doc): The modified `Doc` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#from_disk | ||||
|         DOCS: https://spacy.io/api/doc#from_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         with path.open("rb") as file_: | ||||
|  | @ -1232,7 +1232,7 @@ cdef class Doc: | |||
|         RETURNS (bytes): A losslessly serialized copy of the `Doc`, including | ||||
|             all annotations. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#to_bytes | ||||
|         DOCS: https://spacy.io/api/doc#to_bytes | ||||
|         """ | ||||
|         return srsly.msgpack_dumps(self.to_dict(exclude=exclude)) | ||||
| 
 | ||||
|  | @ -1243,7 +1243,7 @@ cdef class Doc: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Doc): Itself. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#from_bytes | ||||
|         DOCS: https://spacy.io/api/doc#from_bytes | ||||
|         """ | ||||
|         return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude) | ||||
| 
 | ||||
|  | @ -1254,7 +1254,7 @@ cdef class Doc: | |||
|         RETURNS (bytes): A losslessly serialized copy of the `Doc`, including | ||||
|             all annotations. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#to_bytes | ||||
|         DOCS: https://spacy.io/api/doc#to_bytes | ||||
|         """ | ||||
|         array_head = Doc._get_array_attrs() | ||||
|         strings = set() | ||||
|  | @ -1302,7 +1302,7 @@ cdef class Doc: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Doc): Itself. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#from_dict | ||||
|         DOCS: https://spacy.io/api/doc#from_dict | ||||
|         """ | ||||
|         if self.length != 0: | ||||
|             raise ValueError(Errors.E033.format(length=self.length)) | ||||
|  | @ -1373,8 +1373,8 @@ cdef class Doc: | |||
|         retokenization are invalidated, although they may accidentally | ||||
|         continue to work. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/doc#retokenize | ||||
|         USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization | ||||
|         DOCS: https://spacy.io/api/doc#retokenize | ||||
|         USAGE: https://spacy.io/usage/linguistic-features#retokenization | ||||
|         """ | ||||
|         return Retokenizer(self) | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,7 +24,7 @@ from .underscore import Underscore, get_ext_args | |||
| cdef class Span: | ||||
|     """A slice from a Doc object. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/span | ||||
|     DOCS: https://spacy.io/api/span | ||||
|     """ | ||||
|     @classmethod | ||||
|     def set_extension(cls, name, **kwargs): | ||||
|  | @ -37,8 +37,8 @@ cdef class Span: | |||
|         method (callable): Optional method for method extension. | ||||
|         force (bool): Force overwriting existing attribute. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#set_extension | ||||
|         USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         DOCS: https://spacy.io/api/span#set_extension | ||||
|         USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         """ | ||||
|         if cls.has_extension(name) and not kwargs.get("force", False): | ||||
|             raise ValueError(Errors.E090.format(name=name, obj="Span")) | ||||
|  | @ -51,7 +51,7 @@ cdef class Span: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#get_extension | ||||
|         DOCS: https://spacy.io/api/span#get_extension | ||||
|         """ | ||||
|         return Underscore.span_extensions.get(name) | ||||
| 
 | ||||
|  | @ -62,7 +62,7 @@ cdef class Span: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (bool): Whether the extension has been registered. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#has_extension | ||||
|         DOCS: https://spacy.io/api/span#has_extension | ||||
|         """ | ||||
|         return name in Underscore.span_extensions | ||||
| 
 | ||||
|  | @ -74,7 +74,7 @@ cdef class Span: | |||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the | ||||
|             removed extension. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#remove_extension | ||||
|         DOCS: https://spacy.io/api/span#remove_extension | ||||
|         """ | ||||
|         if not cls.has_extension(name): | ||||
|             raise ValueError(Errors.E046.format(name=name)) | ||||
|  | @ -92,7 +92,7 @@ cdef class Span: | |||
|         vector (ndarray[ndim=1, dtype='float32']): A meaning representation | ||||
|             of the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#init | ||||
|         DOCS: https://spacy.io/api/span#init | ||||
|         """ | ||||
|         if not (0 <= start <= end <= len(doc)): | ||||
|             raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc))) | ||||
|  | @ -162,7 +162,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (int): The number of tokens in the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#len | ||||
|         DOCS: https://spacy.io/api/span#len | ||||
|         """ | ||||
|         if self.c.end < self.c.start: | ||||
|             return 0 | ||||
|  | @ -178,7 +178,7 @@ cdef class Span: | |||
|             the span to get. | ||||
|         RETURNS (Token or Span): The token at `span[i]`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#getitem | ||||
|         DOCS: https://spacy.io/api/span#getitem | ||||
|         """ | ||||
|         if isinstance(i, slice): | ||||
|             start, end = normalize_slice(len(self), i.start, i.stop, i.step) | ||||
|  | @ -198,7 +198,7 @@ cdef class Span: | |||
| 
 | ||||
|         YIELDS (Token): A `Token` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#iter | ||||
|         DOCS: https://spacy.io/api/span#iter | ||||
|         """ | ||||
|         for i in range(self.c.start, self.c.end): | ||||
|             yield self.doc[i] | ||||
|  | @ -218,7 +218,7 @@ cdef class Span: | |||
|         copy_user_data (bool): Whether or not to copy the original doc's user data. | ||||
|         RETURNS (Doc): The `Doc` copy of the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#as_doc | ||||
|         DOCS: https://spacy.io/api/span#as_doc | ||||
|         """ | ||||
|         words = [t.text for t in self] | ||||
|         spaces = [bool(t.whitespace_) for t in self] | ||||
|  | @ -291,7 +291,7 @@ cdef class Span: | |||
|         RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape | ||||
|             (n, n), where n = len(self). | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#get_lca_matrix | ||||
|         DOCS: https://spacy.io/api/span#get_lca_matrix | ||||
|         """ | ||||
|         return numpy.asarray(_get_lca_matrix(self.doc, self.c.start, self.c.end)) | ||||
| 
 | ||||
|  | @ -303,7 +303,7 @@ cdef class Span: | |||
|             `Span`, `Token` and `Lexeme` objects. | ||||
|         RETURNS (float): A scalar similarity score. Higher is more similar. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#similarity | ||||
|         DOCS: https://spacy.io/api/span#similarity | ||||
|         """ | ||||
|         if "similarity" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["similarity"](self, other) | ||||
|  | @ -385,7 +385,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (tuple): Entities in the span, one `Span` per entity. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#ents | ||||
|         DOCS: https://spacy.io/api/span#ents | ||||
|         """ | ||||
|         cdef Span ent | ||||
|         ents = [] | ||||
|  | @ -404,7 +404,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (bool): Whether a word vector is associated with the object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#has_vector | ||||
|         DOCS: https://spacy.io/api/span#has_vector | ||||
|         """ | ||||
|         if "has_vector" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["has_vector"](self) | ||||
|  | @ -423,7 +423,7 @@ cdef class Span: | |||
|         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array | ||||
|             representing the span's semantics. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#vector | ||||
|         DOCS: https://spacy.io/api/span#vector | ||||
|         """ | ||||
|         if "vector" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["vector"](self) | ||||
|  | @ -437,7 +437,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (float): The L2 norm of the vector representation. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#vector_norm | ||||
|         DOCS: https://spacy.io/api/span#vector_norm | ||||
|         """ | ||||
|         if "vector_norm" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["vector"](self) | ||||
|  | @ -501,7 +501,7 @@ cdef class Span: | |||
| 
 | ||||
|         YIELDS (Span): Noun chunks in the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#noun_chunks | ||||
|         DOCS: https://spacy.io/api/span#noun_chunks | ||||
|         """ | ||||
|         for span in self.doc.noun_chunks: | ||||
|             if span.start >= self.start and span.end <= self.end: | ||||
|  | @ -515,7 +515,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (Token): The root token. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#root | ||||
|         DOCS: https://spacy.io/api/span#root | ||||
|         """ | ||||
|         if "root" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["root"](self) | ||||
|  | @ -571,7 +571,7 @@ cdef class Span: | |||
| 
 | ||||
|         RETURNS (tuple): A tuple of Token objects. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#lefts | ||||
|         DOCS: https://spacy.io/api/span#lefts | ||||
|         """ | ||||
|         return self.root.conjuncts | ||||
| 
 | ||||
|  | @ -582,7 +582,7 @@ cdef class Span: | |||
| 
 | ||||
|         YIELDS (Token):A left-child of a token of the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#lefts | ||||
|         DOCS: https://spacy.io/api/span#lefts | ||||
|         """ | ||||
|         for token in reversed(self):  # Reverse, so we get tokens in order | ||||
|             for left in token.lefts: | ||||
|  | @ -596,7 +596,7 @@ cdef class Span: | |||
| 
 | ||||
|         YIELDS (Token): A right-child of a token of the span. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#rights | ||||
|         DOCS: https://spacy.io/api/span#rights | ||||
|         """ | ||||
|         for token in self: | ||||
|             for right in token.rights: | ||||
|  | @ -611,7 +611,7 @@ cdef class Span: | |||
|         RETURNS (int): The number of leftward immediate children of the | ||||
|             span, in the syntactic dependency parse. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#n_lefts | ||||
|         DOCS: https://spacy.io/api/span#n_lefts | ||||
|         """ | ||||
|         return len(list(self.lefts)) | ||||
| 
 | ||||
|  | @ -623,7 +623,7 @@ cdef class Span: | |||
|         RETURNS (int): The number of rightward immediate children of the | ||||
|             span, in the syntactic dependency parse. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#n_rights | ||||
|         DOCS: https://spacy.io/api/span#n_rights | ||||
|         """ | ||||
|         return len(list(self.rights)) | ||||
| 
 | ||||
|  | @ -633,7 +633,7 @@ cdef class Span: | |||
| 
 | ||||
|         YIELDS (Token): A token within the span, or a descendant from it. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#subtree | ||||
|         DOCS: https://spacy.io/api/span#subtree | ||||
|         """ | ||||
|         for word in self.lefts: | ||||
|             yield from word.subtree | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ cdef class SpanGroup: | |||
|         >>> doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
|         >>> assert isinstance(doc.spans["errors"], SpanGroup) | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/spangroup | ||||
|     DOCS: https://spacy.io/api/spangroup | ||||
|     """ | ||||
|     def __init__(self, doc, *, name="", attrs={}, spans=[]): | ||||
|         """Create a SpanGroup. | ||||
|  | @ -37,7 +37,7 @@ cdef class SpanGroup: | |||
|         attrs (Dict[str, Any]): Optional JSON-serializable attributes to attach. | ||||
|         spans (Iterable[Span]): The spans to add to the group. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#init | ||||
|         DOCS: https://spacy.io/api/spangroup#init | ||||
|         """ | ||||
|         # We need to make this a weak reference, so that the Doc object can | ||||
|         # own the SpanGroup without circular references. We do want to get | ||||
|  | @ -56,7 +56,7 @@ cdef class SpanGroup: | |||
|     def doc(self): | ||||
|         """RETURNS (Doc): The reference document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#doc | ||||
|         DOCS: https://spacy.io/api/spangroup#doc | ||||
|         """ | ||||
|         return self._doc_ref() | ||||
| 
 | ||||
|  | @ -64,7 +64,7 @@ cdef class SpanGroup: | |||
|     def has_overlap(self): | ||||
|         """RETURNS (bool): Whether the group contains overlapping spans. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#has_overlap | ||||
|         DOCS: https://spacy.io/api/spangroup#has_overlap | ||||
|         """ | ||||
|         if not len(self): | ||||
|             return False | ||||
|  | @ -79,7 +79,7 @@ cdef class SpanGroup: | |||
|     def __len__(self): | ||||
|         """RETURNS (int): The number of spans in the group. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#len | ||||
|         DOCS: https://spacy.io/api/spangroup#len | ||||
|         """ | ||||
|         return self.c.size() | ||||
| 
 | ||||
|  | @ -89,7 +89,7 @@ cdef class SpanGroup: | |||
| 
 | ||||
|         span (Span): The span to append. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#append | ||||
|         DOCS: https://spacy.io/api/spangroup#append | ||||
|         """ | ||||
|         if span.doc is not self.doc: | ||||
|             raise ValueError("Cannot add span to group: refers to different Doc.") | ||||
|  | @ -101,7 +101,7 @@ cdef class SpanGroup: | |||
| 
 | ||||
|         spans (Iterable[Span]): The spans to add. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#extend | ||||
|         DOCS: https://spacy.io/api/spangroup#extend | ||||
|         """ | ||||
|         cdef Span span | ||||
|         for span in spans: | ||||
|  | @ -113,7 +113,7 @@ cdef class SpanGroup: | |||
|         i (int): The item index. | ||||
|         RETURNS (Span): The span at the given index. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#getitem | ||||
|         DOCS: https://spacy.io/api/spangroup#getitem | ||||
|         """ | ||||
|         cdef int size = self.c.size() | ||||
|         if i < -size or i >= size: | ||||
|  | @ -127,7 +127,7 @@ cdef class SpanGroup: | |||
| 
 | ||||
|         RETURNS (bytes): The serialized span group. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#to_bytes | ||||
|         DOCS: https://spacy.io/api/spangroup#to_bytes | ||||
|         """ | ||||
|         output = {"name": self.name, "attrs": self.attrs, "spans": []} | ||||
|         for i in range(self.c.size()): | ||||
|  | @ -159,7 +159,7 @@ cdef class SpanGroup: | |||
|         bytes_data (bytes): The span group to load. | ||||
|         RETURNS (SpanGroup): The deserialized span group. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/spangroup#from_bytes | ||||
|         DOCS: https://spacy.io/api/spangroup#from_bytes | ||||
|         """ | ||||
|         msg = srsly.msgpack_loads(bytes_data) | ||||
|         self.name = msg["name"] | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ cdef class Token: | |||
|     """An individual token – i.e. a word, punctuation symbol, whitespace, | ||||
|     etc. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/token | ||||
|     DOCS: https://spacy.io/api/token | ||||
|     """ | ||||
|     @classmethod | ||||
|     def set_extension(cls, name, **kwargs): | ||||
|  | @ -40,8 +40,8 @@ cdef class Token: | |||
|         method (callable): Optional method for method extension. | ||||
|         force (bool): Force overwriting existing attribute. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#set_extension | ||||
|         USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         DOCS: https://spacy.io/api/token#set_extension | ||||
|         USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes | ||||
|         """ | ||||
|         if cls.has_extension(name) and not kwargs.get("force", False): | ||||
|             raise ValueError(Errors.E090.format(name=name, obj="Token")) | ||||
|  | @ -54,7 +54,7 @@ cdef class Token: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#get_extension | ||||
|         DOCS: https://spacy.io/api/token#get_extension | ||||
|         """ | ||||
|         return Underscore.token_extensions.get(name) | ||||
| 
 | ||||
|  | @ -65,7 +65,7 @@ cdef class Token: | |||
|         name (str): Name of the extension. | ||||
|         RETURNS (bool): Whether the extension has been registered. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#has_extension | ||||
|         DOCS: https://spacy.io/api/token#has_extension | ||||
|         """ | ||||
|         return name in Underscore.token_extensions | ||||
| 
 | ||||
|  | @ -77,7 +77,7 @@ cdef class Token: | |||
|         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the | ||||
|             removed extension. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#remove_extension | ||||
|         DOCS: https://spacy.io/api/token#remove_extension | ||||
|         """ | ||||
|         if not cls.has_extension(name): | ||||
|             raise ValueError(Errors.E046.format(name=name)) | ||||
|  | @ -90,7 +90,7 @@ cdef class Token: | |||
|         doc (Doc): The parent document. | ||||
|         offset (int): The index of the token within the document. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#init | ||||
|         DOCS: https://spacy.io/api/token#init | ||||
|         """ | ||||
|         self.vocab = vocab | ||||
|         self.doc = doc | ||||
|  | @ -105,7 +105,7 @@ cdef class Token: | |||
| 
 | ||||
|         RETURNS (int): The number of unicode characters in the token. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#len | ||||
|         DOCS: https://spacy.io/api/token#len | ||||
|         """ | ||||
|         return self.c.lex.length | ||||
| 
 | ||||
|  | @ -168,7 +168,7 @@ cdef class Token: | |||
|         flag_id (int): The ID of the flag attribute. | ||||
|         RETURNS (bool): Whether the flag is set. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#check_flag | ||||
|         DOCS: https://spacy.io/api/token#check_flag | ||||
|         """ | ||||
|         return Lexeme.c_check_flag(self.c.lex, flag_id) | ||||
| 
 | ||||
|  | @ -178,7 +178,7 @@ cdef class Token: | |||
|         i (int): The relative position of the token to get. Defaults to 1. | ||||
|         RETURNS (Token): The token at position `self.doc[self.i+i]`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#nbor | ||||
|         DOCS: https://spacy.io/api/token#nbor | ||||
|         """ | ||||
|         if self.i+i < 0 or (self.i+i >= len(self.doc)): | ||||
|             raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc))) | ||||
|  | @ -192,7 +192,7 @@ cdef class Token: | |||
|             `Span`, `Token` and `Lexeme` objects. | ||||
|         RETURNS (float): A scalar similarity score. Higher is more similar. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#similarity | ||||
|         DOCS: https://spacy.io/api/token#similarity | ||||
|         """ | ||||
|         if "similarity" in self.doc.user_token_hooks: | ||||
|             return self.doc.user_token_hooks["similarity"](self, other) | ||||
|  | @ -388,7 +388,7 @@ cdef class Token: | |||
| 
 | ||||
|         RETURNS (bool): Whether a word vector is associated with the object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#has_vector | ||||
|         DOCS: https://spacy.io/api/token#has_vector | ||||
|         """ | ||||
|         if "has_vector" in self.doc.user_token_hooks: | ||||
|             return self.doc.user_token_hooks["has_vector"](self) | ||||
|  | @ -403,7 +403,7 @@ cdef class Token: | |||
|         RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array | ||||
|             representing the token's semantics. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#vector | ||||
|         DOCS: https://spacy.io/api/token#vector | ||||
|         """ | ||||
|         if "vector" in self.doc.user_token_hooks: | ||||
|             return self.doc.user_token_hooks["vector"](self) | ||||
|  | @ -418,7 +418,7 @@ cdef class Token: | |||
| 
 | ||||
|         RETURNS (float): The L2 norm of the vector representation. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#vector_norm | ||||
|         DOCS: https://spacy.io/api/token#vector_norm | ||||
|         """ | ||||
|         if "vector_norm" in self.doc.user_token_hooks: | ||||
|             return self.doc.user_token_hooks["vector_norm"](self) | ||||
|  | @ -441,7 +441,7 @@ cdef class Token: | |||
|         RETURNS (int): The number of leftward immediate children of the | ||||
|             word, in the syntactic dependency parse. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#n_lefts | ||||
|         DOCS: https://spacy.io/api/token#n_lefts | ||||
|         """ | ||||
|         return self.c.l_kids | ||||
| 
 | ||||
|  | @ -453,7 +453,7 @@ cdef class Token: | |||
|         RETURNS (int): The number of rightward immediate children of the | ||||
|             word, in the syntactic dependency parse. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#n_rights | ||||
|         DOCS: https://spacy.io/api/token#n_rights | ||||
|         """ | ||||
|         return self.c.r_kids | ||||
| 
 | ||||
|  | @ -485,7 +485,7 @@ cdef class Token: | |||
|         RETURNS (bool / None): Whether the token starts a sentence. | ||||
|             None if unknown. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#is_sent_start | ||||
|         DOCS: https://spacy.io/api/token#is_sent_start | ||||
|         """ | ||||
|         def __get__(self): | ||||
|             if self.c.sent_start == 0: | ||||
|  | @ -514,7 +514,7 @@ cdef class Token: | |||
|         RETURNS (bool / None): Whether the token ends a sentence. | ||||
|             None if unknown. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#is_sent_end | ||||
|         DOCS: https://spacy.io/api/token#is_sent_end | ||||
|         """ | ||||
|         def __get__(self): | ||||
|             if self.i + 1 == len(self.doc): | ||||
|  | @ -536,7 +536,7 @@ cdef class Token: | |||
| 
 | ||||
|         YIELDS (Token): A left-child of the token. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#lefts | ||||
|         DOCS: https://spacy.io/api/token#lefts | ||||
|         """ | ||||
|         cdef int nr_iter = 0 | ||||
|         cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge) | ||||
|  | @ -556,7 +556,7 @@ cdef class Token: | |||
| 
 | ||||
|         YIELDS (Token): A right-child of the token. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#rights | ||||
|         DOCS: https://spacy.io/api/token#rights | ||||
|         """ | ||||
|         cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i) | ||||
|         tokens = [] | ||||
|  | @ -578,7 +578,7 @@ cdef class Token: | |||
| 
 | ||||
|         YIELDS (Token): A child token such that `child.head==self`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#children | ||||
|         DOCS: https://spacy.io/api/token#children | ||||
|         """ | ||||
|         yield from self.lefts | ||||
|         yield from self.rights | ||||
|  | @ -591,7 +591,7 @@ cdef class Token: | |||
|         YIELDS (Token): A descendent token such that | ||||
|             `self.is_ancestor(descendent) or token == self`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#subtree | ||||
|         DOCS: https://spacy.io/api/token#subtree | ||||
|         """ | ||||
|         for word in self.lefts: | ||||
|             yield from word.subtree | ||||
|  | @ -622,7 +622,7 @@ cdef class Token: | |||
|         YIELDS (Token): A sequence of ancestor tokens such that | ||||
|             `ancestor.is_ancestor(self)`. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#ancestors | ||||
|         DOCS: https://spacy.io/api/token#ancestors | ||||
|         """ | ||||
|         cdef const TokenC* head_ptr = self.c | ||||
|         # Guard against infinite loop, no token can have | ||||
|  | @ -640,7 +640,7 @@ cdef class Token: | |||
|         descendant (Token): Another token. | ||||
|         RETURNS (bool): Whether this token is the ancestor of the descendant. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#is_ancestor | ||||
|         DOCS: https://spacy.io/api/token#is_ancestor | ||||
|         """ | ||||
|         if self.doc is not descendant.doc: | ||||
|             return False | ||||
|  | @ -696,7 +696,7 @@ cdef class Token: | |||
| 
 | ||||
|         RETURNS (tuple): The coordinated tokens. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/token#conjuncts | ||||
|         DOCS: https://spacy.io/api/token#conjuncts | ||||
|         """ | ||||
|         cdef Token word, child | ||||
|         if "conjuncts" in self.doc.user_token_hooks: | ||||
|  |  | |||
|  | @ -97,7 +97,7 @@ class Corpus: | |||
|     augment (Callable[Example, Iterable[Example]]): Optional data augmentation | ||||
|         function, to extrapolate additional examples from your annotations. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/corpus | ||||
|     DOCS: https://spacy.io/api/corpus | ||||
|     """ | ||||
| 
 | ||||
|     def __init__( | ||||
|  | @ -121,7 +121,7 @@ class Corpus: | |||
|         nlp (Language): The current nlp object. | ||||
|         YIELDS (Example): The examples. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/corpus#call | ||||
|         DOCS: https://spacy.io/api/corpus#call | ||||
|         """ | ||||
|         ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE)) | ||||
|         if self.gold_preproc: | ||||
|  | @ -206,7 +206,7 @@ class JsonlCorpus: | |||
|     limit (int): Limit corpus to a subset of examples, e.g. for debugging. | ||||
|         Defaults to 0, which indicates no limit. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus | ||||
|     DOCS: https://spacy.io/api/corpus#jsonlcorpus | ||||
|     """ | ||||
| 
 | ||||
|     file_type = "jsonl" | ||||
|  | @ -230,7 +230,7 @@ class JsonlCorpus: | |||
|         nlp (Language): The current nlp object. | ||||
|         YIELDS (Example): The example objects. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus-call | ||||
|         DOCS: https://spacy.io/api/corpus#jsonlcorpus-call | ||||
|         """ | ||||
|         for loc in walk_corpus(self.path, ".jsonl"): | ||||
|             records = srsly.read_jsonl(loc) | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ cdef class Vectors: | |||
|     the table need to be assigned - so len(list(vectors.keys())) may be | ||||
|     greater or smaller than vectors.shape[0]. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/vectors | ||||
|     DOCS: https://spacy.io/api/vectors | ||||
|     """ | ||||
|     cdef public object name | ||||
|     cdef public object data | ||||
|  | @ -59,7 +59,7 @@ cdef class Vectors: | |||
|         keys (iterable): A sequence of keys, aligned with the data. | ||||
|         name (str): A name to identify the vectors table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#init | ||||
|         DOCS: https://spacy.io/api/vectors#init | ||||
|         """ | ||||
|         self.name = name | ||||
|         if data is None: | ||||
|  | @ -83,7 +83,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         RETURNS (tuple): A `(rows, dims)` pair. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#shape | ||||
|         DOCS: https://spacy.io/api/vectors#shape | ||||
|         """ | ||||
|         return self.data.shape | ||||
| 
 | ||||
|  | @ -93,7 +93,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         RETURNS (int): The vector size. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#size | ||||
|         DOCS: https://spacy.io/api/vectors#size | ||||
|         """ | ||||
|         return self.data.shape[0] * self.data.shape[1] | ||||
| 
 | ||||
|  | @ -103,7 +103,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         RETURNS (bool): `True` if no slots are available for new keys. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#is_full | ||||
|         DOCS: https://spacy.io/api/vectors#is_full | ||||
|         """ | ||||
|         return self._unset.size() == 0 | ||||
| 
 | ||||
|  | @ -114,7 +114,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         RETURNS (int): The number of keys in the table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#n_keys | ||||
|         DOCS: https://spacy.io/api/vectors#n_keys | ||||
|         """ | ||||
|         return len(self.key2row) | ||||
| 
 | ||||
|  | @ -127,7 +127,7 @@ cdef class Vectors: | |||
|         key (int): The key to get the vector for. | ||||
|         RETURNS (ndarray): The vector for the key. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#getitem | ||||
|         DOCS: https://spacy.io/api/vectors#getitem | ||||
|         """ | ||||
|         i = self.key2row[key] | ||||
|         if i is None: | ||||
|  | @ -141,7 +141,7 @@ cdef class Vectors: | |||
|         key (int): The key to set the vector for. | ||||
|         vector (ndarray): The vector to set. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#setitem | ||||
|         DOCS: https://spacy.io/api/vectors#setitem | ||||
|         """ | ||||
|         i = self.key2row[key] | ||||
|         self.data[i] = vector | ||||
|  | @ -153,7 +153,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         YIELDS (int): A key in the table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#iter | ||||
|         DOCS: https://spacy.io/api/vectors#iter | ||||
|         """ | ||||
|         yield from self.key2row | ||||
| 
 | ||||
|  | @ -162,7 +162,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         RETURNS (int): The number of vectors in the data. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#len | ||||
|         DOCS: https://spacy.io/api/vectors#len | ||||
|         """ | ||||
|         return self.data.shape[0] | ||||
| 
 | ||||
|  | @ -172,7 +172,7 @@ cdef class Vectors: | |||
|         key (int): The key to check. | ||||
|         RETURNS (bool): Whether the key has a vector entry. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#contains | ||||
|         DOCS: https://spacy.io/api/vectors#contains | ||||
|         """ | ||||
|         return key in self.key2row | ||||
| 
 | ||||
|  | @ -189,7 +189,7 @@ cdef class Vectors: | |||
|         inplace (bool): Reallocate the memory. | ||||
|         RETURNS (list): The removed items as a list of `(key, row)` tuples. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#resize | ||||
|         DOCS: https://spacy.io/api/vectors#resize | ||||
|         """ | ||||
|         xp = get_array_module(self.data) | ||||
|         if inplace: | ||||
|  | @ -224,7 +224,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         YIELDS (ndarray): A vector in the table. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#values | ||||
|         DOCS: https://spacy.io/api/vectors#values | ||||
|         """ | ||||
|         for row, vector in enumerate(range(self.data.shape[0])): | ||||
|             if not self._unset.count(row): | ||||
|  | @ -235,7 +235,7 @@ cdef class Vectors: | |||
| 
 | ||||
|         YIELDS (tuple): A key/vector pair. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#items | ||||
|         DOCS: https://spacy.io/api/vectors#items | ||||
|         """ | ||||
|         for key, row in self.key2row.items(): | ||||
|             yield key, self.data[row] | ||||
|  | @ -281,7 +281,7 @@ cdef class Vectors: | |||
|         row (int / None): The row number of a vector to map the key to. | ||||
|         RETURNS (int): The row the vector was added to. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#add | ||||
|         DOCS: https://spacy.io/api/vectors#add | ||||
|         """ | ||||
|         # use int for all keys and rows in key2row for more efficient access | ||||
|         # and serialization | ||||
|  | @ -368,7 +368,7 @@ cdef class Vectors: | |||
|         path (str / Path): A path to a directory, which will be created if | ||||
|             it doesn't exists. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#to_disk | ||||
|         DOCS: https://spacy.io/api/vectors#to_disk | ||||
|         """ | ||||
|         xp = get_array_module(self.data) | ||||
|         if xp is numpy: | ||||
|  | @ -396,7 +396,7 @@ cdef class Vectors: | |||
|         path (str / Path): Directory path, string or Path-like object. | ||||
|         RETURNS (Vectors): The modified object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#from_disk | ||||
|         DOCS: https://spacy.io/api/vectors#from_disk | ||||
|         """ | ||||
|         def load_key2row(path): | ||||
|             if path.exists(): | ||||
|  | @ -432,7 +432,7 @@ cdef class Vectors: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized form of the `Vectors` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#to_bytes | ||||
|         DOCS: https://spacy.io/api/vectors#to_bytes | ||||
|         """ | ||||
|         def serialize_weights(): | ||||
|             if hasattr(self.data, "to_bytes"): | ||||
|  | @ -453,7 +453,7 @@ cdef class Vectors: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Vectors): The `Vectors` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vectors#from_bytes | ||||
|         DOCS: https://spacy.io/api/vectors#from_bytes | ||||
|         """ | ||||
|         def deserialize_weights(b): | ||||
|             if hasattr(self.data, "from_bytes"): | ||||
|  |  | |||
|  | @ -47,7 +47,7 @@ cdef class Vocab: | |||
|     instance also provides access to the `StringStore`, and owns underlying | ||||
|     C-data that is shared between `Doc` objects. | ||||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/vocab | ||||
|     DOCS: https://spacy.io/api/vocab | ||||
|     """ | ||||
|     def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None, | ||||
|                  oov_prob=-20., vectors_name=None, writing_system={}, | ||||
|  | @ -110,7 +110,7 @@ cdef class Vocab: | |||
|             available bit will be chosen. | ||||
|         RETURNS (int): The integer ID by which the flag value can be checked. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#add_flag | ||||
|         DOCS: https://spacy.io/api/vocab#add_flag | ||||
|         """ | ||||
|         if flag_id == -1: | ||||
|             for bit in range(1, 64): | ||||
|  | @ -202,7 +202,7 @@ cdef class Vocab: | |||
|         string (unicode): The ID string. | ||||
|         RETURNS (bool) Whether the string has an entry in the vocabulary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#contains | ||||
|         DOCS: https://spacy.io/api/vocab#contains | ||||
|         """ | ||||
|         cdef hash_t int_key | ||||
|         if isinstance(key, bytes): | ||||
|  | @ -219,7 +219,7 @@ cdef class Vocab: | |||
| 
 | ||||
|         YIELDS (Lexeme): An entry in the vocabulary. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#iter | ||||
|         DOCS: https://spacy.io/api/vocab#iter | ||||
|         """ | ||||
|         cdef attr_t key | ||||
|         cdef size_t addr | ||||
|  | @ -242,7 +242,7 @@ cdef class Vocab: | |||
|             >>> apple = nlp.vocab.strings["apple"] | ||||
|             >>> assert nlp.vocab[apple] == nlp.vocab[u"apple"] | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#getitem | ||||
|         DOCS: https://spacy.io/api/vocab#getitem | ||||
|         """ | ||||
|         cdef attr_t orth | ||||
|         if isinstance(id_or_string, unicode): | ||||
|  | @ -310,7 +310,7 @@ cdef class Vocab: | |||
|             word was mapped to, and `score` the similarity score between the | ||||
|             two words. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#prune_vectors | ||||
|         DOCS: https://spacy.io/api/vocab#prune_vectors | ||||
|         """ | ||||
|         xp = get_array_module(self.vectors.data) | ||||
|         # Make sure all vectors are in the vocab | ||||
|  | @ -353,7 +353,7 @@ cdef class Vocab: | |||
|             and shape determined by the `vocab.vectors` instance. Usually, a | ||||
|             numpy ndarray of shape (300,) and dtype float32. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#get_vector | ||||
|         DOCS: https://spacy.io/api/vocab#get_vector | ||||
|         """ | ||||
|         if isinstance(orth, str): | ||||
|             orth = self.strings.add(orth) | ||||
|  | @ -400,7 +400,7 @@ cdef class Vocab: | |||
|         orth (int / unicode): The word. | ||||
|         vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#set_vector | ||||
|         DOCS: https://spacy.io/api/vocab#set_vector | ||||
|         """ | ||||
|         if isinstance(orth, str): | ||||
|             orth = self.strings.add(orth) | ||||
|  | @ -422,7 +422,7 @@ cdef class Vocab: | |||
|         orth (int / unicode): The word. | ||||
|         RETURNS (bool): Whether the word has a vector. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#has_vector | ||||
|         DOCS: https://spacy.io/api/vocab#has_vector | ||||
|         """ | ||||
|         if isinstance(orth, str): | ||||
|             orth = self.strings.add(orth) | ||||
|  | @ -448,7 +448,7 @@ cdef class Vocab: | |||
|             it doesn't exist. | ||||
|         exclude (list): String names of serialization fields to exclude. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#to_disk | ||||
|         DOCS: https://spacy.io/api/vocab#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         if not path.exists(): | ||||
|  | @ -469,7 +469,7 @@ cdef class Vocab: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Vocab): The modified `Vocab` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#to_disk | ||||
|         DOCS: https://spacy.io/api/vocab#to_disk | ||||
|         """ | ||||
|         path = util.ensure_path(path) | ||||
|         getters = ["strings", "vectors"] | ||||
|  | @ -494,7 +494,7 @@ cdef class Vocab: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (bytes): The serialized form of the `Vocab` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#to_bytes | ||||
|         DOCS: https://spacy.io/api/vocab#to_bytes | ||||
|         """ | ||||
|         def deserialize_vectors(): | ||||
|             if self.vectors is None: | ||||
|  | @ -516,7 +516,7 @@ cdef class Vocab: | |||
|         exclude (list): String names of serialization fields to exclude. | ||||
|         RETURNS (Vocab): The `Vocab` object. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/vocab#from_bytes | ||||
|         DOCS: https://spacy.io/api/vocab#from_bytes | ||||
|         """ | ||||
|         def serialize_vectors(b): | ||||
|             if self.vectors is None: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user