Replace links to nightly docs [ci skip]

This commit is contained in:
Ines Montani 2021-01-30 20:09:38 +11:00
parent b26a3daa9a
commit d0c3775712
57 changed files with 389 additions and 389 deletions

View File

@ -29,7 +29,7 @@ COMMAND = "python -m spacy"
NAME = "spacy" NAME = "spacy"
HELP = """spaCy Command-line Interface HELP = """spaCy Command-line Interface
DOCS: https://nightly.spacy.io/api/cli DOCS: https://spacy.io/api/cli
""" """
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates. PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
You'd typically start by cloning a project template to a local directory and You'd typically start by cloning a project template to a local directory and

View File

@ -64,7 +64,7 @@ def convert_cli(
is written to stdout, so you can pipe them forward to a JSON file: is written to stdout, so you can pipe them forward to a JSON file:
$ spacy convert some_file.conllu --file-type json > some_file.json $ spacy convert some_file.conllu --file-type json > some_file.json
DOCS: https://nightly.spacy.io/api/cli#convert DOCS: https://spacy.io/api/cli#convert
""" """
if isinstance(file_type, FileTypes): if isinstance(file_type, FileTypes):
# We get an instance of the FileTypes from the CLI so we need its string value # We get an instance of the FileTypes from the CLI so we need its string value
@ -268,6 +268,6 @@ def _get_converter(msg, converter, input_path):
msg.warn( msg.warn(
"Can't automatically detect NER format. " "Can't automatically detect NER format. "
"Conversion may not succeed. " "Conversion may not succeed. "
"See https://nightly.spacy.io/api/cli#convert" "See https://spacy.io/api/cli#convert"
) )
return converter return converter

View File

@ -34,7 +34,7 @@ def debug_config_cli(
as command line options. For instance, --training.batch_size 128 overrides as command line options. For instance, --training.batch_size 128 overrides
the value of "batch_size" in the block "[training]". the value of "batch_size" in the block "[training]".
DOCS: https://nightly.spacy.io/api/cli#debug-config DOCS: https://spacy.io/api/cli#debug-config
""" """
overrides = parse_config_overrides(ctx.args) overrides = parse_config_overrides(ctx.args)
import_code(code_path) import_code(code_path)

View File

@ -50,7 +50,7 @@ def debug_data_cli(
useful stats, and can help you find problems like invalid entity annotations, useful stats, and can help you find problems like invalid entity annotations,
cyclic dependencies, low data labels and more. cyclic dependencies, low data labels and more.
DOCS: https://nightly.spacy.io/api/cli#debug-data DOCS: https://spacy.io/api/cli#debug-data
""" """
if ctx.command.name == "debug-data": if ctx.command.name == "debug-data":
msg.warn( msg.warn(

View File

@ -40,7 +40,7 @@ def debug_model_cli(
Analyze a Thinc model implementation. Includes checks for internal structure Analyze a Thinc model implementation. Includes checks for internal structure
and activations during training. and activations during training.
DOCS: https://nightly.spacy.io/api/cli#debug-model DOCS: https://spacy.io/api/cli#debug-model
""" """
setup_gpu(use_gpu) setup_gpu(use_gpu)
layers = string_to_list(layers, intify=True) layers = string_to_list(layers, intify=True)

View File

@ -28,7 +28,7 @@ def download_cli(
additional arguments provided to this command will be passed to `pip install` additional arguments provided to this command will be passed to `pip install`
on package installation. on package installation.
DOCS: https://nightly.spacy.io/api/cli#download DOCS: https://spacy.io/api/cli#download
AVAILABLE PACKAGES: https://spacy.io/models AVAILABLE PACKAGES: https://spacy.io/models
""" """
download(model, direct, *ctx.args) download(model, direct, *ctx.args)
@ -80,7 +80,7 @@ def get_compatibility() -> dict:
f"Couldn't fetch compatibility table. Please find a package for your spaCy " f"Couldn't fetch compatibility table. Please find a package for your spaCy "
f"installation (v{about.__version__}), and download it manually. " f"installation (v{about.__version__}), and download it manually. "
f"For more details, see the documentation: " f"For more details, see the documentation: "
f"https://nightly.spacy.io/usage/models", f"https://spacy.io/usage/models",
exits=1, exits=1,
) )
comp_table = r.json() comp_table = r.json()

View File

@ -36,7 +36,7 @@ def evaluate_cli(
dependency parses in a HTML file, set as output directory as the dependency parses in a HTML file, set as output directory as the
displacy_path argument. displacy_path argument.
DOCS: https://nightly.spacy.io/api/cli#evaluate DOCS: https://spacy.io/api/cli#evaluate
""" """
import_code(code_path) import_code(code_path)
evaluate( evaluate(

View File

@ -23,7 +23,7 @@ def info_cli(
print its meta information. Flag --markdown prints details in Markdown for easy print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues. copy-pasting to GitHub issues.
DOCS: https://nightly.spacy.io/api/cli#info DOCS: https://spacy.io/api/cli#info
""" """
exclude = string_to_list(exclude) exclude = string_to_list(exclude)
info(model, markdown=markdown, silent=silent, exclude=exclude) info(model, markdown=markdown, silent=silent, exclude=exclude)

View File

@ -41,7 +41,7 @@ def init_config_cli(
optimal settings for your use case. This includes the choice of architecture, optimal settings for your use case. This includes the choice of architecture,
pretrained weights and related hyperparameters. pretrained weights and related hyperparameters.
DOCS: https://nightly.spacy.io/api/cli#init-config DOCS: https://spacy.io/api/cli#init-config
""" """
if isinstance(optimize, Optimizations): # instance of enum from the CLI if isinstance(optimize, Optimizations): # instance of enum from the CLI
optimize = optimize.value optimize = optimize.value
@ -78,9 +78,9 @@ def init_fill_config_cli(
from the default config and will create all objects, check the registered from the default config and will create all objects, check the registered
functions for their default values and update the base config. This command functions for their default values and update the base config. This command
can be used with a config generated via the training quickstart widget: can be used with a config generated via the training quickstart widget:
https://nightly.spacy.io/usage/training#quickstart https://spacy.io/usage/training#quickstart
DOCS: https://nightly.spacy.io/api/cli#init-fill-config DOCS: https://spacy.io/api/cli#init-fill-config
""" """
fill_config(output_file, base_path, pretraining=pretraining, diff=diff) fill_config(output_file, base_path, pretraining=pretraining, diff=diff)

View File

@ -38,7 +38,7 @@ def package_cli(
registered functions like pipeline components), they are copied into the registered functions like pipeline components), they are copied into the
package and imported in the __init__.py. package and imported in the __init__.py.
DOCS: https://nightly.spacy.io/api/cli#package DOCS: https://spacy.io/api/cli#package
""" """
create_sdist, create_wheel = get_build_formats(string_to_list(build)) create_sdist, create_wheel = get_build_formats(string_to_list(build))
code_paths = [Path(p.strip()) for p in string_to_list(code_paths)] code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]

View File

@ -44,7 +44,7 @@ def pretrain_cli(
all settings are the same between pretraining and training. Ideally, all settings are the same between pretraining and training. Ideally,
this is done by using the same config file for both commands. this is done by using the same config file for both commands.
DOCS: https://nightly.spacy.io/api/cli#pretrain DOCS: https://spacy.io/api/cli#pretrain
""" """
config_overrides = parse_config_overrides(ctx.args) config_overrides = parse_config_overrides(ctx.args)
import_code(code_path) import_code(code_path)

View File

@ -30,7 +30,7 @@ def profile_cli(
It can either be provided as a JSONL file, or be read from sys.sytdin. It can either be provided as a JSONL file, or be read from sys.sytdin.
If no input file is specified, the IMDB dataset is loaded via Thinc. If no input file is specified, the IMDB dataset is loaded via Thinc.
DOCS: https://nightly.spacy.io/api/cli#debug-profile DOCS: https://spacy.io/api/cli#debug-profile
""" """
if ctx.parent.command.name == NAME: # called as top-level command if ctx.parent.command.name == NAME: # called as top-level command
msg.warn( msg.warn(

View File

@ -22,7 +22,7 @@ def project_assets_cli(
provided in the project.yml, the file is only downloaded if no local file provided in the project.yml, the file is only downloaded if no local file
with the same checksum exists. with the same checksum exists.
DOCS: https://nightly.spacy.io/api/cli#project-assets DOCS: https://spacy.io/api/cli#project-assets
""" """
project_assets(project_dir, sparse_checkout=sparse_checkout) project_assets(project_dir, sparse_checkout=sparse_checkout)

View File

@ -25,7 +25,7 @@ def project_clone_cli(
defaults to the official spaCy template repo, but can be customized defaults to the official spaCy template repo, but can be customized
(including using a private repo). (including using a private repo).
DOCS: https://nightly.spacy.io/api/cli#project-clone DOCS: https://spacy.io/api/cli#project-clone
""" """
if dest is None: if dest is None:
dest = Path.cwd() / Path(name).parts[-1] dest = Path.cwd() / Path(name).parts[-1]

View File

@ -5,7 +5,7 @@ from ...util import working_dir
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
DOCS_URL = "https://nightly.spacy.io" DOCS_URL = "https://spacy.io"
INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which
> is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`: > is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`:
> `pip install spacy-nightly`. Make sure to use a fresh virtual environment.""" > `pip install spacy-nightly`. Make sure to use a fresh virtual environment."""
@ -44,7 +44,7 @@ def project_document_cli(
auto-generated section and only the auto-generated docs will be replaced auto-generated section and only the auto-generated docs will be replaced
when you re-run the command. when you re-run the command.
DOCS: https://nightly.spacy.io/api/cli#project-document DOCS: https://spacy.io/api/cli#project-document
""" """
project_document(project_dir, output_file, no_emoji=no_emoji) project_document(project_dir, output_file, no_emoji=no_emoji)

View File

@ -34,7 +34,7 @@ def project_update_dvc_cli(
workflow is used. The DVC config will only be updated if the project.yml workflow is used. The DVC config will only be updated if the project.yml
changed. changed.
DOCS: https://nightly.spacy.io/api/cli#project-dvc DOCS: https://spacy.io/api/cli#project-dvc
""" """
project_update_dvc(project_dir, workflow, verbose=verbose, force=force) project_update_dvc(project_dir, workflow, verbose=verbose, force=force)

View File

@ -19,7 +19,7 @@ def project_pull_cli(
A storage can be anything that the smart-open library can upload to, e.g. A storage can be anything that the smart-open library can upload to, e.g.
AWS, Google Cloud Storage, SSH, local directories etc. AWS, Google Cloud Storage, SSH, local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-pull DOCS: https://spacy.io/api/cli#project-pull
""" """
for url, output_path in project_pull(project_dir, remote): for url, output_path in project_pull(project_dir, remote):
if url is not None: if url is not None:

View File

@ -18,7 +18,7 @@ def project_push_cli(
the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH, the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
local directories etc. local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-push DOCS: https://spacy.io/api/cli#project-push
""" """
for output_path, url in project_push(project_dir, remote): for output_path, url in project_push(project_dir, remote):
if url is None: if url is None:

View File

@ -28,7 +28,7 @@ def project_run_cli(
commands define dependencies and/or outputs, they will only be re-run if commands define dependencies and/or outputs, they will only be re-run if
state has changed. state has changed.
DOCS: https://nightly.spacy.io/api/cli#project-run DOCS: https://spacy.io/api/cli#project-run
""" """
if show_help or not subcommand: if show_help or not subcommand:
print_run_help(project_dir, subcommand) print_run_help(project_dir, subcommand)

View File

@ -37,7 +37,7 @@ def train_cli(
used to register custom functions and architectures that can then be used to register custom functions and architectures that can then be
referenced in the config. referenced in the config.
DOCS: https://nightly.spacy.io/api/cli#train DOCS: https://spacy.io/api/cli#train
""" """
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO) util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
# Make sure all files and paths exists if they are needed # Make sure all files and paths exists if they are needed

View File

@ -17,7 +17,7 @@ def validate_cli():
if the installed packages are compatible and shows upgrade instructions if if the installed packages are compatible and shows upgrade instructions if
available. Should be run after `pip install -U spacy`. available. Should be run after `pip install -U spacy`.
DOCS: https://nightly.spacy.io/api/cli#validate DOCS: https://spacy.io/api/cli#validate
""" """
validate() validate()

View File

@ -1,8 +1,8 @@
""" """
spaCy's built in visualization suite for dependencies and named entities. spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://nightly.spacy.io/api/top-level#displacy DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://nightly.spacy.io/usage/visualizers USAGE: https://spacy.io/usage/visualizers
""" """
from typing import Union, Iterable, Optional, Dict, Any, Callable from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings import warnings
@ -37,8 +37,8 @@ def render(
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts. manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
RETURNS (str): Rendered HTML markup. RETURNS (str): Rendered HTML markup.
DOCS: https://nightly.spacy.io/api/top-level#displacy.render DOCS: https://spacy.io/api/top-level#displacy.render
USAGE: https://nightly.spacy.io/usage/visualizers USAGE: https://spacy.io/usage/visualizers
""" """
factories = { factories = {
"dep": (DependencyRenderer, parse_deps), "dep": (DependencyRenderer, parse_deps),
@ -88,8 +88,8 @@ def serve(
port (int): Port to serve visualisation. port (int): Port to serve visualisation.
host (str): Host to serve visualisation. host (str): Host to serve visualisation.
DOCS: https://nightly.spacy.io/api/top-level#displacy.serve DOCS: https://spacy.io/api/top-level#displacy.serve
USAGE: https://nightly.spacy.io/usage/visualizers USAGE: https://spacy.io/usage/visualizers
""" """
from wsgiref import simple_server from wsgiref import simple_server

View File

@ -20,7 +20,7 @@ class Warnings:
"generate a dependency visualization for it. Make sure the Doc " "generate a dependency visualization for it. Make sure the Doc "
"was processed with a model that supports dependency parsing, and " "was processed with a model that supports dependency parsing, and "
"not just a language class like `English()`. For more info, see " "not just a language class like `English()`. For more info, see "
"the docs:\nhttps://nightly.spacy.io/usage/models") "the docs:\nhttps://spacy.io/usage/models")
W006 = ("No entities to visualize found in Doc object. If this is " W006 = ("No entities to visualize found in Doc object. If this is "
"surprising to you, make sure the Doc was processed using a model " "surprising to you, make sure the Doc was processed using a model "
"that supports named entity recognition, and check the `doc.ents` " "that supports named entity recognition, and check the `doc.ents` "
@ -86,7 +86,7 @@ class Warnings:
"the config block to replace its token-to-vector listener with a copy " "the config block to replace its token-to-vector listener with a copy "
"and make it independent. For example, `replace_listeners = " "and make it independent. For example, `replace_listeners = "
"[\"model.tok2vec\"]` See the documentation for details: " "[\"model.tok2vec\"]` See the documentation for details: "
"https://nightly.spacy.io/usage/training#config-components-listeners") "https://spacy.io/usage/training#config-components-listeners")
W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' " W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
"depends on it via a listener and is frozen. This means that the " "depends on it via a listener and is frozen. This means that the "
"performance of '{listener}' will be degraded. You can either freeze " "performance of '{listener}' will be degraded. You can either freeze "
@ -95,12 +95,12 @@ class Warnings:
"the config block to replace its token-to-vector listener with a copy " "the config block to replace its token-to-vector listener with a copy "
"and make it independent. For example, `replace_listeners = " "and make it independent. For example, `replace_listeners = "
"[\"model.tok2vec\"]` See the documentation for details: " "[\"model.tok2vec\"]` See the documentation for details: "
"https://nightly.spacy.io/usage/training#config-components-listeners") "https://spacy.io/usage/training#config-components-listeners")
W088 = ("The pipeline component {name} implements a `begin_training` " W088 = ("The pipeline component {name} implements a `begin_training` "
"method, which won't be called by spaCy. As of v3.0, `begin_training` " "method, which won't be called by spaCy. As of v3.0, `begin_training` "
"has been renamed to `initialize`, so you likely want to rename the " "has been renamed to `initialize`, so you likely want to rename the "
"component method. See the documentation for details: " "component method. See the documentation for details: "
"https://nightly.spacy.io/api/language#initialize") "https://spacy.io/api/language#initialize")
W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed " W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed "
"to `nlp.initialize`.") "to `nlp.initialize`.")
W090 = ("Could not locate any {format} files in path '{path}'.") W090 = ("Could not locate any {format} files in path '{path}'.")
@ -180,7 +180,7 @@ class Errors:
E010 = ("Word vectors set to length 0. This may be because you don't have " E010 = ("Word vectors set to length 0. This may be because you don't have "
"a model installed or loaded, or because your model doesn't " "a model installed or loaded, or because your model doesn't "
"include word vectors. For more info, see the docs:\n" "include word vectors. For more info, see the docs:\n"
"https://nightly.spacy.io/usage/models") "https://spacy.io/usage/models")
E011 = ("Unknown operator: '{op}'. Options: {opts}") E011 = ("Unknown operator: '{op}'. Options: {opts}")
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}") E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
E016 = ("MultitaskObjective target should be function or one of: dep, " E016 = ("MultitaskObjective target should be function or one of: dep, "
@ -211,7 +211,7 @@ class Errors:
E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}") E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}")
E029 = ("`noun_chunks` requires the dependency parse, which requires a " E029 = ("`noun_chunks` requires the dependency parse, which requires a "
"statistical model to be installed and loaded. For more info, see " "statistical model to be installed and loaded. For more info, see "
"the documentation:\nhttps://nightly.spacy.io/usage/models") "the documentation:\nhttps://spacy.io/usage/models")
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' " E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
"component to the pipeline with: `nlp.add_pipe('sentencizer')`. " "component to the pipeline with: `nlp.add_pipe('sentencizer')`. "
"Alternatively, add the dependency parser or sentence recognizer, " "Alternatively, add the dependency parser or sentence recognizer, "
@ -318,7 +318,7 @@ class Errors:
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of " E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
"tokens to merge. If you want to find the longest non-overlapping " "tokens to merge. If you want to find the longest non-overlapping "
"spans, you can use the util.filter_spans helper:\n" "spans, you can use the util.filter_spans helper:\n"
"https://nightly.spacy.io/api/top-level#util.filter_spans") "https://spacy.io/api/top-level#util.filter_spans")
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A " E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
"token can only be part of one entity, so make sure the entities " "token can only be part of one entity, so make sure the entities "
"you're setting don't overlap.") "you're setting don't overlap.")
@ -536,9 +536,9 @@ class Errors:
"solve this, remove the existing directories in the output directory.") "solve this, remove the existing directories in the output directory.")
E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. " E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
"Try checking whitespace and delimiters. See " "Try checking whitespace and delimiters. See "
"https://nightly.spacy.io/api/cli#convert") "https://spacy.io/api/cli#convert")
E903 = ("The token-per-line NER file is not formatted correctly. Try checking " E903 = ("The token-per-line NER file is not formatted correctly. Try checking "
"whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert") "whitespace and delimiters. See https://spacy.io/api/cli#convert")
E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This " E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
"dimension refers to the output width, after the linear projection " "dimension refers to the output width, after the linear projection "
"has been applied.") "has been applied.")

View File

@ -23,7 +23,7 @@ cdef class Candidate:
algorithm which will disambiguate the various candidates to the correct one. algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity) pair is assigned to a certain prior probability. Each candidate (alias, entity) pair is assigned to a certain prior probability.
DOCS: https://nightly.spacy.io/api/kb/#candidate_init DOCS: https://spacy.io/api/kb/#candidate_init
""" """
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob): def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
@ -81,7 +81,7 @@ cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases, """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts. to support entity linking of named entities to real-world concepts.
DOCS: https://nightly.spacy.io/api/kb DOCS: https://spacy.io/api/kb
""" """
def __init__(self, Vocab vocab, entity_vector_length): def __init__(self, Vocab vocab, entity_vector_length):

View File

@ -104,7 +104,7 @@ class Language:
object and processing pipeline. object and processing pipeline.
lang (str): Two-letter language ID, i.e. ISO code. lang (str): Two-letter language ID, i.e. ISO code.
DOCS: https://nightly.spacy.io/api/language DOCS: https://spacy.io/api/language
""" """
Defaults = BaseDefaults Defaults = BaseDefaults
@ -141,7 +141,7 @@ class Language:
returns a tokenizer. returns a tokenizer.
batch_size (int): Default batch size for pipe and evaluate. batch_size (int): Default batch size for pipe and evaluate.
DOCS: https://nightly.spacy.io/api/language#init DOCS: https://spacy.io/api/language#init
""" """
# We're only calling this to import all factories provided via entry # We're only calling this to import all factories provided via entry
# points. The factory decorator applied to these functions takes care # points. The factory decorator applied to these functions takes care
@ -194,7 +194,7 @@ class Language:
RETURNS (Dict[str, Any]): The meta. RETURNS (Dict[str, Any]): The meta.
DOCS: https://nightly.spacy.io/api/language#meta DOCS: https://spacy.io/api/language#meta
""" """
spacy_version = util.get_model_version_range(about.__version__) spacy_version = util.get_model_version_range(about.__version__)
if self.vocab.lang: if self.vocab.lang:
@ -235,7 +235,7 @@ class Language:
RETURNS (thinc.api.Config): The config. RETURNS (thinc.api.Config): The config.
DOCS: https://nightly.spacy.io/api/language#config DOCS: https://spacy.io/api/language#config
""" """
self._config.setdefault("nlp", {}) self._config.setdefault("nlp", {})
self._config.setdefault("training", {}) self._config.setdefault("training", {})
@ -444,7 +444,7 @@ class Language:
the score won't be shown in the logs or be weighted. the score won't be shown in the logs or be weighted.
func (Optional[Callable]): Factory function if not used as a decorator. func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://nightly.spacy.io/api/language#factory DOCS: https://spacy.io/api/language#factory
""" """
if not isinstance(name, str): if not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="factory")) raise ValueError(Errors.E963.format(decorator="factory"))
@ -524,7 +524,7 @@ class Language:
Used for pipeline analysis. Used for pipeline analysis.
func (Optional[Callable]): Factory function if not used as a decorator. func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://nightly.spacy.io/api/language#component DOCS: https://spacy.io/api/language#component
""" """
if name is not None and not isinstance(name, str): if name is not None and not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="component")) raise ValueError(Errors.E963.format(decorator="component"))
@ -590,7 +590,7 @@ class Language:
name (str): Name of pipeline component to get. name (str): Name of pipeline component to get.
RETURNS (callable): The pipeline component. RETURNS (callable): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#get_pipe DOCS: https://spacy.io/api/language#get_pipe
""" """
for pipe_name, component in self._components: for pipe_name, component in self._components:
if pipe_name == name: if pipe_name == name:
@ -619,7 +619,7 @@ class Language:
arguments and types expected by the factory. arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component. RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#create_pipe DOCS: https://spacy.io/api/language#create_pipe
""" """
name = name if name is not None else factory_name name = name if name is not None else factory_name
if not isinstance(config, dict): if not isinstance(config, dict):
@ -740,7 +740,7 @@ class Language:
arguments and types expected by the factory. arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component. RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#add_pipe DOCS: https://spacy.io/api/language#add_pipe
""" """
if not isinstance(factory_name, str): if not isinstance(factory_name, str):
bad_val = repr(factory_name) bad_val = repr(factory_name)
@ -838,7 +838,7 @@ class Language:
name (str): Name of the component. name (str): Name of the component.
RETURNS (bool): Whether a component of the name exists in the pipeline. RETURNS (bool): Whether a component of the name exists in the pipeline.
DOCS: https://nightly.spacy.io/api/language#has_pipe DOCS: https://spacy.io/api/language#has_pipe
""" """
return name in self.pipe_names return name in self.pipe_names
@ -860,7 +860,7 @@ class Language:
arguments and types expected by the factory. arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The new pipeline component. RETURNS (Callable[[Doc], Doc]): The new pipeline component.
DOCS: https://nightly.spacy.io/api/language#replace_pipe DOCS: https://spacy.io/api/language#replace_pipe
""" """
if name not in self.pipe_names: if name not in self.pipe_names:
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
@ -891,7 +891,7 @@ class Language:
old_name (str): Name of the component to rename. old_name (str): Name of the component to rename.
new_name (str): New name of the component. new_name (str): New name of the component.
DOCS: https://nightly.spacy.io/api/language#rename_pipe DOCS: https://spacy.io/api/language#rename_pipe
""" """
if old_name not in self.component_names: if old_name not in self.component_names:
raise ValueError( raise ValueError(
@ -916,7 +916,7 @@ class Language:
name (str): Name of the component to remove. name (str): Name of the component to remove.
RETURNS (tuple): A `(name, component)` tuple of the removed component. RETURNS (tuple): A `(name, component)` tuple of the removed component.
DOCS: https://nightly.spacy.io/api/language#remove_pipe DOCS: https://spacy.io/api/language#remove_pipe
""" """
if name not in self.component_names: if name not in self.component_names:
raise ValueError(Errors.E001.format(name=name, opts=self.component_names)) raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
@ -972,7 +972,7 @@ class Language:
keyword arguments for specific components. keyword arguments for specific components.
RETURNS (Doc): A container for accessing the annotations. RETURNS (Doc): A container for accessing the annotations.
DOCS: https://nightly.spacy.io/api/language#call DOCS: https://spacy.io/api/language#call
""" """
doc = self.make_doc(text) doc = self.make_doc(text)
if component_cfg is None: if component_cfg is None:
@ -1023,7 +1023,7 @@ class Language:
disable (str or iterable): The name(s) of the pipes to disable disable (str or iterable): The name(s) of the pipes to disable
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
DOCS: https://nightly.spacy.io/api/language#select_pipes DOCS: https://spacy.io/api/language#select_pipes
""" """
if enable is None and disable is None: if enable is None and disable is None:
raise ValueError(Errors.E991) raise ValueError(Errors.E991)
@ -1081,7 +1081,7 @@ class Language:
exclude (Iterable[str]): Names of components that shouldn't be updated. exclude (Iterable[str]): Names of components that shouldn't be updated.
RETURNS (Dict[str, float]): The updated losses dictionary RETURNS (Dict[str, float]): The updated losses dictionary
DOCS: https://nightly.spacy.io/api/language#update DOCS: https://spacy.io/api/language#update
""" """
if _ is not None: if _ is not None:
raise ValueError(Errors.E989) raise ValueError(Errors.E989)
@ -1144,7 +1144,7 @@ class Language:
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)] >>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
>>> nlp.rehearse(raw_batch) >>> nlp.rehearse(raw_batch)
DOCS: https://nightly.spacy.io/api/language#rehearse DOCS: https://spacy.io/api/language#rehearse
""" """
if len(examples) == 0: if len(examples) == 0:
return return
@ -1199,7 +1199,7 @@ class Language:
provided, will be created using the .create_optimizer() method. provided, will be created using the .create_optimizer() method.
RETURNS (thinc.api.Optimizer): The optimizer. RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/language#initialize DOCS: https://spacy.io/api/language#initialize
""" """
if get_examples is None: if get_examples is None:
util.logger.debug( util.logger.debug(
@ -1266,7 +1266,7 @@ class Language:
RETURNS (Optimizer): The optimizer. RETURNS (Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/language#resume_training DOCS: https://spacy.io/api/language#resume_training
""" """
ops = get_current_ops() ops = get_current_ops()
if self.vocab.vectors.data.shape[1] >= 1: if self.vocab.vectors.data.shape[1] >= 1:
@ -1293,7 +1293,7 @@ class Language:
Function that deals with a failing batch of documents. This callable function should take in Function that deals with a failing batch of documents. This callable function should take in
the component's name, the component itself, the offending batch of documents, and the exception the component's name, the component itself, the offending batch of documents, and the exception
that was thrown. that was thrown.
DOCS: https://nightly.spacy.io/api/language#set_error_handler DOCS: https://spacy.io/api/language#set_error_handler
""" """
self.default_error_handler = error_handler self.default_error_handler = error_handler
for name, pipe in self.pipeline: for name, pipe in self.pipeline:
@ -1322,7 +1322,7 @@ class Language:
RETURNS (Scorer): The scorer containing the evaluation results. RETURNS (Scorer): The scorer containing the evaluation results.
DOCS: https://nightly.spacy.io/api/language#evaluate DOCS: https://spacy.io/api/language#evaluate
""" """
examples = list(examples) examples = list(examples)
validate_examples(examples, "Language.evaluate") validate_examples(examples, "Language.evaluate")
@ -1377,7 +1377,7 @@ class Language:
>>> with nlp.use_params(optimizer.averages): >>> with nlp.use_params(optimizer.averages):
>>> nlp.to_disk("/tmp/checkpoint") >>> nlp.to_disk("/tmp/checkpoint")
DOCS: https://nightly.spacy.io/api/language#use_params DOCS: https://spacy.io/api/language#use_params
""" """
if not params: if not params:
yield yield
@ -1424,7 +1424,7 @@ class Language:
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`. n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
YIELDS (Doc): Documents in the order of the original text. YIELDS (Doc): Documents in the order of the original text.
DOCS: https://nightly.spacy.io/api/language#pipe DOCS: https://spacy.io/api/language#pipe
""" """
if n_process == -1: if n_process == -1:
n_process = mp.cpu_count() n_process = mp.cpu_count()
@ -1568,7 +1568,7 @@ class Language:
the types expected by the factory. the types expected by the factory.
RETURNS (Language): The initialized Language class. RETURNS (Language): The initialized Language class.
DOCS: https://nightly.spacy.io/api/language#from_config DOCS: https://spacy.io/api/language#from_config
""" """
if auto_fill: if auto_fill:
config = Config( config = Config(
@ -1712,7 +1712,7 @@ class Language:
either be an empty list to not replace any listeners, or a complete either be an empty list to not replace any listeners, or a complete
(!) list of the paths to all listener layers used by the model. (!) list of the paths to all listener layers used by the model.
DOCS: https://nightly.spacy.io/api/language#replace_listeners DOCS: https://spacy.io/api/language#replace_listeners
""" """
if tok2vec_name not in self.pipe_names: if tok2vec_name not in self.pipe_names:
err = Errors.E889.format( err = Errors.E889.format(
@ -1782,7 +1782,7 @@ class Language:
it doesn't exist. it doesn't exist.
exclude (list): Names of components or serialization fields to exclude. exclude (list): Names of components or serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/language#to_disk DOCS: https://spacy.io/api/language#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
serializers = {} serializers = {}
@ -1811,7 +1811,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude. exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The modified `Language` object. RETURNS (Language): The modified `Language` object.
DOCS: https://nightly.spacy.io/api/language#from_disk DOCS: https://spacy.io/api/language#from_disk
""" """
def deserialize_meta(path: Path) -> None: def deserialize_meta(path: Path) -> None:
@ -1859,7 +1859,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude. exclude (list): Names of components or serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Language` object. RETURNS (bytes): The serialized form of the `Language` object.
DOCS: https://nightly.spacy.io/api/language#to_bytes DOCS: https://spacy.io/api/language#to_bytes
""" """
serializers = {} serializers = {}
serializers["vocab"] = lambda: self.vocab.to_bytes() serializers["vocab"] = lambda: self.vocab.to_bytes()
@ -1883,7 +1883,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude. exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The `Language` object. RETURNS (Language): The `Language` object.
DOCS: https://nightly.spacy.io/api/language#from_bytes DOCS: https://spacy.io/api/language#from_bytes
""" """
def deserialize_meta(b): def deserialize_meta(b):

View File

@ -30,7 +30,7 @@ cdef class Lexeme:
tag, dependency parse, or lemma (lemmatization depends on the tag, dependency parse, or lemma (lemmatization depends on the
part-of-speech tag). part-of-speech tag).
DOCS: https://nightly.spacy.io/api/lexeme DOCS: https://spacy.io/api/lexeme
""" """
def __init__(self, Vocab vocab, attr_t orth): def __init__(self, Vocab vocab, attr_t orth):
"""Create a Lexeme object. """Create a Lexeme object.

View File

@ -57,7 +57,7 @@ class Table(OrderedDict):
data (dict): The dictionary. data (dict): The dictionary.
name (str): Optional table name for reference. name (str): Optional table name for reference.
DOCS: https://nightly.spacy.io/api/lookups#table.from_dict DOCS: https://spacy.io/api/lookups#table.from_dict
""" """
self = cls(name=name) self = cls(name=name)
self.update(data) self.update(data)
@ -69,7 +69,7 @@ class Table(OrderedDict):
name (str): Optional table name for reference. name (str): Optional table name for reference.
data (dict): Initial data, used to hint Bloom Filter. data (dict): Initial data, used to hint Bloom Filter.
DOCS: https://nightly.spacy.io/api/lookups#table.init DOCS: https://spacy.io/api/lookups#table.init
""" """
OrderedDict.__init__(self) OrderedDict.__init__(self)
self.name = name self.name = name
@ -135,7 +135,7 @@ class Table(OrderedDict):
RETURNS (bytes): The serialized table. RETURNS (bytes): The serialized table.
DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes DOCS: https://spacy.io/api/lookups#table.to_bytes
""" """
data = { data = {
"name": self.name, "name": self.name,
@ -150,7 +150,7 @@ class Table(OrderedDict):
bytes_data (bytes): The data to load. bytes_data (bytes): The data to load.
RETURNS (Table): The loaded table. RETURNS (Table): The loaded table.
DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes DOCS: https://spacy.io/api/lookups#table.from_bytes
""" """
loaded = srsly.msgpack_loads(bytes_data) loaded = srsly.msgpack_loads(bytes_data)
data = loaded.get("dict", {}) data = loaded.get("dict", {})
@ -172,7 +172,7 @@ class Lookups:
def __init__(self) -> None: def __init__(self) -> None:
"""Initialize the Lookups object. """Initialize the Lookups object.
DOCS: https://nightly.spacy.io/api/lookups#init DOCS: https://spacy.io/api/lookups#init
""" """
self._tables = {} self._tables = {}
@ -201,7 +201,7 @@ class Lookups:
data (dict): Optional data to add to the table. data (dict): Optional data to add to the table.
RETURNS (Table): The newly added table. RETURNS (Table): The newly added table.
DOCS: https://nightly.spacy.io/api/lookups#add_table DOCS: https://spacy.io/api/lookups#add_table
""" """
if name in self.tables: if name in self.tables:
raise ValueError(Errors.E158.format(name=name)) raise ValueError(Errors.E158.format(name=name))
@ -215,7 +215,7 @@ class Lookups:
name (str): Name of the table to set. name (str): Name of the table to set.
table (Table): The Table to set. table (Table): The Table to set.
DOCS: https://nightly.spacy.io/api/lookups#set_table DOCS: https://spacy.io/api/lookups#set_table
""" """
self._tables[name] = table self._tables[name] = table
@ -227,7 +227,7 @@ class Lookups:
default (Any): Optional default value to return if table doesn't exist. default (Any): Optional default value to return if table doesn't exist.
RETURNS (Table): The table. RETURNS (Table): The table.
DOCS: https://nightly.spacy.io/api/lookups#get_table DOCS: https://spacy.io/api/lookups#get_table
""" """
if name not in self._tables: if name not in self._tables:
if default == UNSET: if default == UNSET:
@ -241,7 +241,7 @@ class Lookups:
name (str): Name of the table to remove. name (str): Name of the table to remove.
RETURNS (Table): The removed table. RETURNS (Table): The removed table.
DOCS: https://nightly.spacy.io/api/lookups#remove_table DOCS: https://spacy.io/api/lookups#remove_table
""" """
if name not in self._tables: if name not in self._tables:
raise KeyError(Errors.E159.format(name=name, tables=self.tables)) raise KeyError(Errors.E159.format(name=name, tables=self.tables))
@ -253,7 +253,7 @@ class Lookups:
name (str): Name of the table. name (str): Name of the table.
RETURNS (bool): Whether a table of that name exists. RETURNS (bool): Whether a table of that name exists.
DOCS: https://nightly.spacy.io/api/lookups#has_table DOCS: https://spacy.io/api/lookups#has_table
""" """
return name in self._tables return name in self._tables
@ -262,7 +262,7 @@ class Lookups:
RETURNS (bytes): The serialized Lookups. RETURNS (bytes): The serialized Lookups.
DOCS: https://nightly.spacy.io/api/lookups#to_bytes DOCS: https://spacy.io/api/lookups#to_bytes
""" """
return srsly.msgpack_dumps(self._tables) return srsly.msgpack_dumps(self._tables)
@ -272,7 +272,7 @@ class Lookups:
bytes_data (bytes): The data to load. bytes_data (bytes): The data to load.
RETURNS (Lookups): The loaded Lookups. RETURNS (Lookups): The loaded Lookups.
DOCS: https://nightly.spacy.io/api/lookups#from_bytes DOCS: https://spacy.io/api/lookups#from_bytes
""" """
self._tables = {} self._tables = {}
for key, value in srsly.msgpack_loads(bytes_data).items(): for key, value in srsly.msgpack_loads(bytes_data).items():
@ -287,7 +287,7 @@ class Lookups:
path (str / Path): The file path. path (str / Path): The file path.
DOCS: https://nightly.spacy.io/api/lookups#to_disk DOCS: https://spacy.io/api/lookups#to_disk
""" """
path = ensure_path(path) path = ensure_path(path)
if not path.exists(): if not path.exists():
@ -305,7 +305,7 @@ class Lookups:
path (str / Path): The directory path. path (str / Path): The directory path.
RETURNS (Lookups): The loaded lookups. RETURNS (Lookups): The loaded lookups.
DOCS: https://nightly.spacy.io/api/lookups#from_disk DOCS: https://spacy.io/api/lookups#from_disk
""" """
path = ensure_path(path) path = ensure_path(path)
filepath = path / filename filepath = path / filename

View File

@ -32,8 +32,8 @@ DEF PADDING = 5
cdef class Matcher: cdef class Matcher:
"""Match sequences of tokens, based on pattern rules. """Match sequences of tokens, based on pattern rules.
DOCS: https://nightly.spacy.io/api/matcher DOCS: https://spacy.io/api/matcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching USAGE: https://spacy.io/usage/rule-based-matching
""" """
def __init__(self, vocab, validate=True): def __init__(self, vocab, validate=True):

View File

@ -20,8 +20,8 @@ cdef class PhraseMatcher:
sequences based on lists of token descriptions, the `PhraseMatcher` accepts sequences based on lists of token descriptions, the `PhraseMatcher` accepts
match patterns in the form of `Doc` objects. match patterns in the form of `Doc` objects.
DOCS: https://nightly.spacy.io/api/phrasematcher DOCS: https://spacy.io/api/phrasematcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
Adapted from FlashText: https://github.com/vi3k6i5/flashtext Adapted from FlashText: https://github.com/vi3k6i5/flashtext
MIT License (see `LICENSE`) MIT License (see `LICENSE`)
@ -35,7 +35,7 @@ cdef class PhraseMatcher:
attr (int / str): Token attribute to match on. attr (int / str): Token attribute to match on.
validate (bool): Perform additional validation when patterns are added. validate (bool): Perform additional validation when patterns are added.
DOCS: https://nightly.spacy.io/api/phrasematcher#init DOCS: https://spacy.io/api/phrasematcher#init
""" """
self.vocab = vocab self.vocab = vocab
self._callbacks = {} self._callbacks = {}
@ -64,7 +64,7 @@ cdef class PhraseMatcher:
RETURNS (int): The number of rules. RETURNS (int): The number of rules.
DOCS: https://nightly.spacy.io/api/phrasematcher#len DOCS: https://spacy.io/api/phrasematcher#len
""" """
return len(self._callbacks) return len(self._callbacks)
@ -74,7 +74,7 @@ cdef class PhraseMatcher:
key (str): The match ID. key (str): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID. RETURNS (bool): Whether the matcher contains rules for this match ID.
DOCS: https://nightly.spacy.io/api/phrasematcher#contains DOCS: https://spacy.io/api/phrasematcher#contains
""" """
return key in self._callbacks return key in self._callbacks
@ -88,7 +88,7 @@ cdef class PhraseMatcher:
key (str): The match ID. key (str): The match ID.
DOCS: https://nightly.spacy.io/api/phrasematcher#remove DOCS: https://spacy.io/api/phrasematcher#remove
""" """
if key not in self._docs: if key not in self._docs:
raise KeyError(key) raise KeyError(key)
@ -167,7 +167,7 @@ cdef class PhraseMatcher:
as variable arguments. Will be ignored if a list of patterns is as variable arguments. Will be ignored if a list of patterns is
provided as the second argument. provided as the second argument.
DOCS: https://nightly.spacy.io/api/phrasematcher#add DOCS: https://spacy.io/api/phrasematcher#add
""" """
if docs is None or hasattr(docs, "__call__"): # old API if docs is None or hasattr(docs, "__call__"): # old API
on_match = docs on_match = docs
@ -241,7 +241,7 @@ cdef class PhraseMatcher:
`doc[start:end]`. The `match_id` is an integer. If as_spans is set `doc[start:end]`. The `match_id` is an integer. If as_spans is set
to True, a list of Span objects is returned. to True, a list of Span objects is returned.
DOCS: https://nightly.spacy.io/api/phrasematcher#call DOCS: https://spacy.io/api/phrasematcher#call
""" """
matches = [] matches = []
if doc is None or len(doc) == 0: if doc is None or len(doc) == 0:

View File

@ -32,7 +32,7 @@ class AttributeRuler(Pipe):
"""Set token-level attributes for tokens matched by Matcher patterns. """Set token-level attributes for tokens matched by Matcher patterns.
Additionally supports importing patterns from tag maps and morph rules. Additionally supports importing patterns from tag maps and morph rules.
DOCS: https://nightly.spacy.io/api/attributeruler DOCS: https://spacy.io/api/attributeruler
""" """
def __init__( def __init__(
@ -48,7 +48,7 @@ class AttributeRuler(Pipe):
RETURNS (AttributeRuler): The AttributeRuler component. RETURNS (AttributeRuler): The AttributeRuler component.
DOCS: https://nightly.spacy.io/api/attributeruler#init DOCS: https://spacy.io/api/attributeruler#init
""" """
self.name = name self.name = name
self.vocab = vocab self.vocab = vocab
@ -94,7 +94,7 @@ class AttributeRuler(Pipe):
doc (Doc): The document to process. doc (Doc): The document to process.
RETURNS (Doc): The processed Doc. RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/attributeruler#call DOCS: https://spacy.io/api/attributeruler#call
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
try: try:
@ -143,7 +143,7 @@ class AttributeRuler(Pipe):
tag_map (dict): The tag map that maps fine-grained tags to tag_map (dict): The tag map that maps fine-grained tags to
coarse-grained tags and morphological features. coarse-grained tags and morphological features.
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
""" """
for tag, attrs in tag_map.items(): for tag, attrs in tag_map.items():
pattern = [{"TAG": tag}] pattern = [{"TAG": tag}]
@ -165,7 +165,7 @@ class AttributeRuler(Pipe):
fine-grained tags to coarse-grained tags, lemmas and morphological fine-grained tags to coarse-grained tags, lemmas and morphological
features. features.
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
""" """
for tag in morph_rules: for tag in morph_rules:
for word in morph_rules[tag]: for word in morph_rules[tag]:
@ -193,7 +193,7 @@ class AttributeRuler(Pipe):
index (int): The index of the token in the matched span to modify. May index (int): The index of the token in the matched span to modify. May
be negative to index from the end of the span. Defaults to 0. be negative to index from the end of the span. Defaults to 0.
DOCS: https://nightly.spacy.io/api/attributeruler#add DOCS: https://spacy.io/api/attributeruler#add
""" """
# We need to make a string here, because otherwise the ID we pass back # We need to make a string here, because otherwise the ID we pass back
# will be interpreted as the hash of a string, rather than an ordinal. # will be interpreted as the hash of a string, rather than an ordinal.
@ -211,7 +211,7 @@ class AttributeRuler(Pipe):
as the arguments to AttributeRuler.add (patterns/attrs/index) to as the arguments to AttributeRuler.add (patterns/attrs/index) to
add as patterns. add as patterns.
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns DOCS: https://spacy.io/api/attributeruler#add_patterns
""" """
for p in patterns: for p in patterns:
self.add(**p) self.add(**p)
@ -236,7 +236,7 @@ class AttributeRuler(Pipe):
Scorer.score_token_attr for the attributes "tag", "pos", "morph" Scorer.score_token_attr for the attributes "tag", "pos", "morph"
and "lemma" for the target token attributes. and "lemma" for the target token attributes.
DOCS: https://nightly.spacy.io/api/tagger#score DOCS: https://spacy.io/api/tagger#score
""" """
def morph_key_getter(token, attr): def morph_key_getter(token, attr):
@ -273,7 +273,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object. RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes DOCS: https://spacy.io/api/attributeruler#to_bytes
""" """
serialize = {} serialize = {}
serialize["vocab"] = self.vocab.to_bytes serialize["vocab"] = self.vocab.to_bytes
@ -289,7 +289,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
returns (AttributeRuler): The loaded object. returns (AttributeRuler): The loaded object.
DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes DOCS: https://spacy.io/api/attributeruler#from_bytes
""" """
def load_patterns(b): def load_patterns(b):
@ -310,7 +310,7 @@ class AttributeRuler(Pipe):
path (Union[Path, str]): A path to a directory. path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk DOCS: https://spacy.io/api/attributeruler#to_disk
""" """
serialize = { serialize = {
"vocab": lambda p: self.vocab.to_disk(p), "vocab": lambda p: self.vocab.to_disk(p),
@ -327,7 +327,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (AttributeRuler): The loaded object. RETURNS (AttributeRuler): The loaded object.
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk DOCS: https://spacy.io/api/attributeruler#from_disk
""" """
def load_patterns(p): def load_patterns(p):

View File

@ -202,7 +202,7 @@ def make_beam_parser(
cdef class DependencyParser(Parser): cdef class DependencyParser(Parser):
"""Pipeline component for dependency parsing. """Pipeline component for dependency parsing.
DOCS: https://nightly.spacy.io/api/dependencyparser DOCS: https://spacy.io/api/dependencyparser
""" """
TransitionSystem = ArcEager TransitionSystem = ArcEager
@ -243,7 +243,7 @@ cdef class DependencyParser(Parser):
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
and Scorer.score_deps. and Scorer.score_deps.
DOCS: https://nightly.spacy.io/api/dependencyparser#score DOCS: https://spacy.io/api/dependencyparser#score
""" """
def has_sents(doc): def has_sents(doc):
return doc.has_annotation("SENT_START") return doc.has_annotation("SENT_START")

View File

@ -94,7 +94,7 @@ def make_entity_linker(
class EntityLinker(TrainablePipe): class EntityLinker(TrainablePipe):
"""Pipeline component for named entity linking. """Pipeline component for named entity linking.
DOCS: https://nightly.spacy.io/api/entitylinker DOCS: https://spacy.io/api/entitylinker
""" """
NIL = "NIL" # string used to refer to a non-existing link NIL = "NIL" # string used to refer to a non-existing link
@ -124,7 +124,7 @@ class EntityLinker(TrainablePipe):
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention. produces a list of candidates, given a certain knowledge base and a textual mention.
DOCS: https://nightly.spacy.io/api/entitylinker#init DOCS: https://spacy.io/api/entitylinker#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -171,7 +171,7 @@ class EntityLinker(TrainablePipe):
Note that providing this argument, will overwrite all data accumulated in the current KB. Note that providing this argument, will overwrite all data accumulated in the current KB.
Use this only when loading a KB as-such from file. Use this only when loading a KB as-such from file.
DOCS: https://nightly.spacy.io/api/entitylinker#initialize DOCS: https://spacy.io/api/entitylinker#initialize
""" """
validate_get_examples(get_examples, "EntityLinker.initialize") validate_get_examples(get_examples, "EntityLinker.initialize")
if kb_loader is not None: if kb_loader is not None:
@ -207,7 +207,7 @@ class EntityLinker(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/entitylinker#update DOCS: https://spacy.io/api/entitylinker#update
""" """
self.validate_kb() self.validate_kb()
if losses is None: if losses is None:
@ -283,7 +283,7 @@ class EntityLinker(TrainablePipe):
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS (List[int]): The models prediction for each document. RETURNS (List[int]): The models prediction for each document.
DOCS: https://nightly.spacy.io/api/entitylinker#predict DOCS: https://spacy.io/api/entitylinker#predict
""" """
self.validate_kb() self.validate_kb()
entity_count = 0 entity_count = 0
@ -380,7 +380,7 @@ class EntityLinker(TrainablePipe):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict. kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations DOCS: https://spacy.io/api/entitylinker#set_annotations
""" """
count_ents = len([ent for doc in docs for ent in doc.ents]) count_ents = len([ent for doc in docs for ent in doc.ents])
if count_ents != len(kb_ids): if count_ents != len(kb_ids):
@ -399,7 +399,7 @@ class EntityLinker(TrainablePipe):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores. RETURNS (Dict[str, Any]): The scores.
DOCS TODO: https://nightly.spacy.io/api/entity_linker#score DOCS TODO: https://spacy.io/api/entity_linker#score
""" """
validate_examples(examples, "EntityLinker.score") validate_examples(examples, "EntityLinker.score")
return Scorer.score_links(examples, negative_labels=[self.NIL]) return Scorer.score_links(examples, negative_labels=[self.NIL])
@ -412,7 +412,7 @@ class EntityLinker(TrainablePipe):
path (str / Path): Path to a directory. path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/entitylinker#to_disk DOCS: https://spacy.io/api/entitylinker#to_disk
""" """
serialize = {} serialize = {}
serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@ -430,7 +430,7 @@ class EntityLinker(TrainablePipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (EntityLinker): The modified EntityLinker object. RETURNS (EntityLinker): The modified EntityLinker object.
DOCS: https://nightly.spacy.io/api/entitylinker#from_disk DOCS: https://spacy.io/api/entitylinker#from_disk
""" """
def load_model(p): def load_model(p):

View File

@ -59,8 +59,8 @@ class EntityRuler(Pipe):
purely rule-based entity recognition system. After initialization, the purely rule-based entity recognition system. After initialization, the
component is typically added to the pipeline using `nlp.add_pipe`. component is typically added to the pipeline using `nlp.add_pipe`.
DOCS: https://nightly.spacy.io/api/entityruler DOCS: https://spacy.io/api/entityruler
USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler USAGE: https://spacy.io/usage/rule-based-matching#entityruler
""" """
def __init__( def __init__(
@ -94,7 +94,7 @@ class EntityRuler(Pipe):
added by the model, overwrite them by matches if necessary. added by the model, overwrite them by matches if necessary.
ent_id_sep (str): Separator used internally for entity IDs. ent_id_sep (str): Separator used internally for entity IDs.
DOCS: https://nightly.spacy.io/api/entityruler#init DOCS: https://spacy.io/api/entityruler#init
""" """
self.nlp = nlp self.nlp = nlp
self.name = name self.name = name
@ -133,7 +133,7 @@ class EntityRuler(Pipe):
doc (Doc): The Doc object in the pipeline. doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available. RETURNS (Doc): The Doc with added entities, if available.
DOCS: https://nightly.spacy.io/api/entityruler#call DOCS: https://spacy.io/api/entityruler#call
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
try: try:
@ -183,7 +183,7 @@ class EntityRuler(Pipe):
RETURNS (set): The string labels. RETURNS (set): The string labels.
DOCS: https://nightly.spacy.io/api/entityruler#labels DOCS: https://spacy.io/api/entityruler#labels
""" """
keys = set(self.token_patterns.keys()) keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys()) keys.update(self.phrase_patterns.keys())
@ -211,7 +211,7 @@ class EntityRuler(Pipe):
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
patterns Optional[Iterable[PatternType]]: The list of patterns. patterns Optional[Iterable[PatternType]]: The list of patterns.
DOCS: https://nightly.spacy.io/api/entityruler#initialize DOCS: https://spacy.io/api/entityruler#initialize
""" """
self.clear() self.clear()
if patterns: if patterns:
@ -223,7 +223,7 @@ class EntityRuler(Pipe):
RETURNS (set): The string entity ids. RETURNS (set): The string entity ids.
DOCS: https://nightly.spacy.io/api/entityruler#ent_ids DOCS: https://spacy.io/api/entityruler#ent_ids
""" """
keys = set(self.token_patterns.keys()) keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys()) keys.update(self.phrase_patterns.keys())
@ -241,7 +241,7 @@ class EntityRuler(Pipe):
RETURNS (list): The original patterns, one dictionary per pattern. RETURNS (list): The original patterns, one dictionary per pattern.
DOCS: https://nightly.spacy.io/api/entityruler#patterns DOCS: https://spacy.io/api/entityruler#patterns
""" """
all_patterns = [] all_patterns = []
for label, patterns in self.token_patterns.items(): for label, patterns in self.token_patterns.items():
@ -268,7 +268,7 @@ class EntityRuler(Pipe):
patterns (list): The patterns to add. patterns (list): The patterns to add.
DOCS: https://nightly.spacy.io/api/entityruler#add_patterns DOCS: https://spacy.io/api/entityruler#add_patterns
""" """
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet # disable the nlp components after this one in case they hadn't been initialized / deserialised yet
@ -366,7 +366,7 @@ class EntityRuler(Pipe):
patterns_bytes (bytes): The bytestring to load. patterns_bytes (bytes): The bytestring to load.
RETURNS (EntityRuler): The loaded entity ruler. RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://nightly.spacy.io/api/entityruler#from_bytes DOCS: https://spacy.io/api/entityruler#from_bytes
""" """
cfg = srsly.msgpack_loads(patterns_bytes) cfg = srsly.msgpack_loads(patterns_bytes)
self.clear() self.clear()
@ -388,7 +388,7 @@ class EntityRuler(Pipe):
RETURNS (bytes): The serialized patterns. RETURNS (bytes): The serialized patterns.
DOCS: https://nightly.spacy.io/api/entityruler#to_bytes DOCS: https://spacy.io/api/entityruler#to_bytes
""" """
serial = { serial = {
"overwrite": self.overwrite, "overwrite": self.overwrite,
@ -407,7 +407,7 @@ class EntityRuler(Pipe):
path (str / Path): The JSONL file to load. path (str / Path): The JSONL file to load.
RETURNS (EntityRuler): The loaded entity ruler. RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://nightly.spacy.io/api/entityruler#from_disk DOCS: https://spacy.io/api/entityruler#from_disk
""" """
path = ensure_path(path) path = ensure_path(path)
self.clear() self.clear()
@ -443,7 +443,7 @@ class EntityRuler(Pipe):
path (str / Path): The JSONL file to save. path (str / Path): The JSONL file to save.
DOCS: https://nightly.spacy.io/api/entityruler#to_disk DOCS: https://spacy.io/api/entityruler#to_disk
""" """
path = ensure_path(path) path = ensure_path(path)
cfg = { cfg = {

View File

@ -18,7 +18,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
doc (Doc): The Doc object. doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged noun chunks. RETURNS (Doc): The Doc object with merged noun chunks.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
""" """
if not doc.has_annotation("DEP"): if not doc.has_annotation("DEP"):
return doc return doc
@ -40,7 +40,7 @@ def merge_entities(doc: Doc):
doc (Doc): The Doc object. doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged entities. RETURNS (Doc): The Doc object with merged entities.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities DOCS: https://spacy.io/api/pipeline-functions#merge_entities
""" """
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
for ent in doc.ents: for ent in doc.ents:
@ -57,7 +57,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
label (str): The subtoken dependency label. label (str): The subtoken dependency label.
RETURNS (Doc): The Doc object with merged subtokens. RETURNS (Doc): The Doc object with merged subtokens.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
""" """
# TODO: make stateful component with "label" config # TODO: make stateful component with "label" config
merger = Matcher(doc.vocab) merger = Matcher(doc.vocab)

View File

@ -32,7 +32,7 @@ class Lemmatizer(Pipe):
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
lookup tables. lookup tables.
DOCS: https://nightly.spacy.io/api/lemmatizer DOCS: https://spacy.io/api/lemmatizer
""" """
@classmethod @classmethod
@ -68,7 +68,7 @@ class Lemmatizer(Pipe):
overwrite (bool): Whether to overwrite existing lemmas. Defaults to overwrite (bool): Whether to overwrite existing lemmas. Defaults to
`False`. `False`.
DOCS: https://nightly.spacy.io/api/lemmatizer#init DOCS: https://spacy.io/api/lemmatizer#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -98,7 +98,7 @@ class Lemmatizer(Pipe):
doc (Doc): The Doc to process. doc (Doc): The Doc to process.
RETURNS (Doc): The processed Doc. RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/lemmatizer#call DOCS: https://spacy.io/api/lemmatizer#call
""" """
if not self._validated: if not self._validated:
self._validate_tables(Errors.E1004) self._validate_tables(Errors.E1004)
@ -159,7 +159,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize. token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string. RETURNS (list): The available lemmas for the string.
DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
""" """
lookup_table = self.lookups.get_table("lemma_lookup", {}) lookup_table = self.lookups.get_table("lemma_lookup", {})
result = lookup_table.get(token.text, token.text) result = lookup_table.get(token.text, token.text)
@ -173,7 +173,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize. token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string. RETURNS (list): The available lemmas for the string.
DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
""" """
cache_key = (token.orth, token.pos, token.morph) cache_key = (token.orth, token.pos, token.morph)
if cache_key in self.cache: if cache_key in self.cache:
@ -241,7 +241,7 @@ class Lemmatizer(Pipe):
token (Token): The token. token (Token): The token.
RETURNS (bool): Whether the token is a base form. RETURNS (bool): Whether the token is a base form.
DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form DOCS: https://spacy.io/api/lemmatizer#is_base_form
""" """
return False return False
@ -251,7 +251,7 @@ class Lemmatizer(Pipe):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores. RETURNS (Dict[str, Any]): The scores.
DOCS: https://nightly.spacy.io/api/lemmatizer#score DOCS: https://spacy.io/api/lemmatizer#score
""" """
validate_examples(examples, "Lemmatizer.score") validate_examples(examples, "Lemmatizer.score")
return Scorer.score_token_attr(examples, "lemma", **kwargs) return Scorer.score_token_attr(examples, "lemma", **kwargs)
@ -264,7 +264,7 @@ class Lemmatizer(Pipe):
path (str / Path): Path to a directory. path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/lemmatizer#to_disk DOCS: https://spacy.io/api/lemmatizer#to_disk
""" """
serialize = {} serialize = {}
serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@ -280,7 +280,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The modified Lemmatizer object. RETURNS (Lemmatizer): The modified Lemmatizer object.
DOCS: https://nightly.spacy.io/api/lemmatizer#from_disk DOCS: https://spacy.io/api/lemmatizer#from_disk
""" """
deserialize = {} deserialize = {}
deserialize["vocab"] = lambda p: self.vocab.from_disk(p) deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
@ -295,7 +295,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object. RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/lemmatizer#to_bytes DOCS: https://spacy.io/api/lemmatizer#to_bytes
""" """
serialize = {} serialize = {}
serialize["vocab"] = self.vocab.to_bytes serialize["vocab"] = self.vocab.to_bytes
@ -311,7 +311,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The loaded Lemmatizer. RETURNS (Lemmatizer): The loaded Lemmatizer.
DOCS: https://nightly.spacy.io/api/lemmatizer#from_bytes DOCS: https://spacy.io/api/lemmatizer#from_bytes
""" """
deserialize = {} deserialize = {}
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b) deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)

View File

@ -75,7 +75,7 @@ class Morphologizer(Tagger):
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
DOCS: https://nightly.spacy.io/api/morphologizer#init DOCS: https://spacy.io/api/morphologizer#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -104,7 +104,7 @@ class Morphologizer(Tagger):
label (str): The label to add. label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1. RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/morphologizer#add_label DOCS: https://spacy.io/api/morphologizer#add_label
""" """
if not isinstance(label, str): if not isinstance(label, str):
raise ValueError(Errors.E187) raise ValueError(Errors.E187)
@ -134,7 +134,7 @@ class Morphologizer(Tagger):
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/morphologizer#initialize DOCS: https://spacy.io/api/morphologizer#initialize
""" """
validate_get_examples(get_examples, "Morphologizer.initialize") validate_get_examples(get_examples, "Morphologizer.initialize")
if labels is not None: if labels is not None:
@ -185,7 +185,7 @@ class Morphologizer(Tagger):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Morphologizer.predict. batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations DOCS: https://spacy.io/api/morphologizer#set_annotations
""" """
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
@ -208,7 +208,7 @@ class Morphologizer(Tagger):
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss DOCS: https://spacy.io/api/morphologizer#get_loss
""" """
validate_examples(examples, "Morphologizer.get_loss") validate_examples(examples, "Morphologizer.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -254,7 +254,7 @@ class Morphologizer(Tagger):
Scorer.score_token_attr for the attributes "pos" and "morph" and Scorer.score_token_attr for the attributes "pos" and "morph" and
Scorer.score_token_attr_per_feat for the attribute "morph". Scorer.score_token_attr_per_feat for the attribute "morph".
DOCS: https://nightly.spacy.io/api/morphologizer#score DOCS: https://spacy.io/api/morphologizer#score
""" """
def morph_key_getter(token, attr): def morph_key_getter(token, attr):
return getattr(token, attr).key return getattr(token, attr).key

View File

@ -163,7 +163,7 @@ def make_beam_ner(
cdef class EntityRecognizer(Parser): cdef class EntityRecognizer(Parser):
"""Pipeline component for named entity recognition. """Pipeline component for named entity recognition.
DOCS: https://nightly.spacy.io/api/entityrecognizer DOCS: https://spacy.io/api/entityrecognizer
""" """
TransitionSystem = BiluoPushDown TransitionSystem = BiluoPushDown
@ -194,7 +194,7 @@ cdef class EntityRecognizer(Parser):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The NER precision, recall and f-scores. RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
DOCS: https://nightly.spacy.io/api/entityrecognizer#score DOCS: https://spacy.io/api/entityrecognizer#score
""" """
validate_examples(examples, "EntityRecognizer.score") validate_examples(examples, "EntityRecognizer.score")
return get_ner_prf(examples) return get_ner_prf(examples)

View File

@ -16,7 +16,7 @@ cdef class Pipe:
Trainable pipeline components like the EntityRecognizer or TextCategorizer Trainable pipeline components like the EntityRecognizer or TextCategorizer
should inherit from the subclass 'TrainablePipe'. should inherit from the subclass 'TrainablePipe'.
DOCS: https://nightly.spacy.io/api/pipe DOCS: https://spacy.io/api/pipe
""" """
@classmethod @classmethod
@ -34,7 +34,7 @@ cdef class Pipe:
docs (Doc): The Doc to process. docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc. RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/pipe#call DOCS: https://spacy.io/api/pipe#call
""" """
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name))
@ -47,7 +47,7 @@ cdef class Pipe:
batch_size (int): The number of documents to buffer. batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order. YIELDS (Doc): Processed documents in order.
DOCS: https://nightly.spacy.io/api/pipe#pipe DOCS: https://spacy.io/api/pipe#pipe
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
for doc in stream: for doc in stream:
@ -69,7 +69,7 @@ cdef class Pipe:
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/pipe#initialize DOCS: https://spacy.io/api/pipe#initialize
""" """
pass pass
@ -79,7 +79,7 @@ cdef class Pipe:
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores. RETURNS (Dict[str, Any]): The scores.
DOCS: https://nightly.spacy.io/api/pipe#score DOCS: https://spacy.io/api/pipe#score
""" """
return {} return {}
@ -111,7 +111,7 @@ cdef class Pipe:
the component's name, the component itself, the offending batch of documents, and the exception the component's name, the component itself, the offending batch of documents, and the exception
that was thrown. that was thrown.
DOCS: https://nightly.spacy.io/api/pipe#set_error_handler DOCS: https://spacy.io/api/pipe#set_error_handler
""" """
self.error_handler = error_handler self.error_handler = error_handler
@ -120,7 +120,7 @@ cdef class Pipe:
RETURNS (Callable): The error handler, or if it's not set a default function that just reraises. RETURNS (Callable): The error handler, or if it's not set a default function that just reraises.
DOCS: https://nightly.spacy.io/api/pipe#get_error_handler DOCS: https://spacy.io/api/pipe#get_error_handler
""" """
if hasattr(self, "error_handler"): if hasattr(self, "error_handler"):
return self.error_handler return self.error_handler

View File

@ -26,7 +26,7 @@ def make_sentencizer(
class Sentencizer(Pipe): class Sentencizer(Pipe):
"""Segment the Doc into sentences using a rule-based strategy. """Segment the Doc into sentences using a rule-based strategy.
DOCS: https://nightly.spacy.io/api/sentencizer DOCS: https://spacy.io/api/sentencizer
""" """
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹', default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
@ -48,7 +48,7 @@ class Sentencizer(Pipe):
serialized with the nlp object. serialized with the nlp object.
RETURNS (Sentencizer): The sentencizer component. RETURNS (Sentencizer): The sentencizer component.
DOCS: https://nightly.spacy.io/api/sentencizer#init DOCS: https://spacy.io/api/sentencizer#init
""" """
self.name = name self.name = name
if punct_chars: if punct_chars:
@ -62,7 +62,7 @@ class Sentencizer(Pipe):
doc (Doc): The document to process. doc (Doc): The document to process.
RETURNS (Doc): The processed Doc. RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/sentencizer#call DOCS: https://spacy.io/api/sentencizer#call
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
try: try:
@ -142,7 +142,7 @@ class Sentencizer(Pipe):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://nightly.spacy.io/api/sentencizer#score DOCS: https://spacy.io/api/sentencizer#score
""" """
def has_sents(doc): def has_sents(doc):
return doc.has_annotation("SENT_START") return doc.has_annotation("SENT_START")
@ -157,7 +157,7 @@ class Sentencizer(Pipe):
RETURNS (bytes): The serialized object. RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes DOCS: https://spacy.io/api/sentencizer#to_bytes
""" """
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)}) return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
@ -167,7 +167,7 @@ class Sentencizer(Pipe):
bytes_data (bytes): The data to load. bytes_data (bytes): The data to load.
returns (Sentencizer): The loaded object. returns (Sentencizer): The loaded object.
DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes DOCS: https://spacy.io/api/sentencizer#from_bytes
""" """
cfg = srsly.msgpack_loads(bytes_data) cfg = srsly.msgpack_loads(bytes_data)
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars)) self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
@ -176,7 +176,7 @@ class Sentencizer(Pipe):
def to_disk(self, path, *, exclude=tuple()): def to_disk(self, path, *, exclude=tuple()):
"""Serialize the sentencizer to disk. """Serialize the sentencizer to disk.
DOCS: https://nightly.spacy.io/api/sentencizer#to_disk DOCS: https://spacy.io/api/sentencizer#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
path = path.with_suffix(".json") path = path.with_suffix(".json")
@ -186,7 +186,7 @@ class Sentencizer(Pipe):
def from_disk(self, path, *, exclude=tuple()): def from_disk(self, path, *, exclude=tuple()):
"""Load the sentencizer from disk. """Load the sentencizer from disk.
DOCS: https://nightly.spacy.io/api/sentencizer#from_disk DOCS: https://spacy.io/api/sentencizer#from_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
path = path.with_suffix(".json") path = path.with_suffix(".json")

View File

@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
class SentenceRecognizer(Tagger): class SentenceRecognizer(Tagger):
"""Pipeline component for sentence segmentation. """Pipeline component for sentence segmentation.
DOCS: https://nightly.spacy.io/api/sentencerecognizer DOCS: https://spacy.io/api/sentencerecognizer
""" """
def __init__(self, vocab, model, name="senter"): def __init__(self, vocab, model, name="senter"):
"""Initialize a sentence recognizer. """Initialize a sentence recognizer.
@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#init DOCS: https://spacy.io/api/sentencerecognizer#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -80,7 +80,7 @@ class SentenceRecognizer(Tagger):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict. batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
""" """
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
@ -105,7 +105,7 @@ class SentenceRecognizer(Tagger):
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss DOCS: https://spacy.io/api/sentencerecognizer#get_loss
""" """
validate_examples(examples, "SentenceRecognizer.get_loss") validate_examples(examples, "SentenceRecognizer.get_loss")
labels = self.labels labels = self.labels
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize DOCS: https://spacy.io/api/sentencerecognizer#initialize
""" """
validate_get_examples(get_examples, "SentenceRecognizer.initialize") validate_get_examples(get_examples, "SentenceRecognizer.initialize")
doc_sample = [] doc_sample = []
@ -158,7 +158,7 @@ class SentenceRecognizer(Tagger):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score DOCS: https://spacy.io/api/sentencerecognizer#score
""" """
def has_sents(doc): def has_sents(doc):
return doc.has_annotation("SENT_START") return doc.has_annotation("SENT_START")

View File

@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
class Tagger(TrainablePipe): class Tagger(TrainablePipe):
"""Pipeline component for part-of-speech tagging. """Pipeline component for part-of-speech tagging.
DOCS: https://nightly.spacy.io/api/tagger DOCS: https://spacy.io/api/tagger
""" """
def __init__(self, vocab, model, name="tagger"): def __init__(self, vocab, model, name="tagger"):
"""Initialize a part-of-speech tagger. """Initialize a part-of-speech tagger.
@ -68,7 +68,7 @@ class Tagger(TrainablePipe):
name (str): The component instance name, used to add entries to the name (str): The component instance name, used to add entries to the
losses during training. losses during training.
DOCS: https://nightly.spacy.io/api/tagger#init DOCS: https://spacy.io/api/tagger#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -85,7 +85,7 @@ class Tagger(TrainablePipe):
RETURNS (Tuple[str]): The labels. RETURNS (Tuple[str]): The labels.
DOCS: https://nightly.spacy.io/api/tagger#labels DOCS: https://spacy.io/api/tagger#labels
""" """
return tuple(self.cfg["labels"]) return tuple(self.cfg["labels"])
@ -100,7 +100,7 @@ class Tagger(TrainablePipe):
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document. RETURNS: The models prediction for each document.
DOCS: https://nightly.spacy.io/api/tagger#predict DOCS: https://spacy.io/api/tagger#predict
""" """
if not any(len(doc) for doc in docs): if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs. # Handle cases where there are no tokens in any docs.
@ -129,7 +129,7 @@ class Tagger(TrainablePipe):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Tagger.predict. batch_tag_ids: The IDs to set, produced by Tagger.predict.
DOCS: https://nightly.spacy.io/api/tagger#set_annotations DOCS: https://spacy.io/api/tagger#set_annotations
""" """
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
@ -155,7 +155,7 @@ class Tagger(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tagger#update DOCS: https://spacy.io/api/tagger#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -190,7 +190,7 @@ class Tagger(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tagger#rehearse DOCS: https://spacy.io/api/tagger#rehearse
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -219,7 +219,7 @@ class Tagger(TrainablePipe):
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/tagger#get_loss DOCS: https://spacy.io/api/tagger#get_loss
""" """
validate_examples(examples, "Tagger.get_loss") validate_examples(examples, "Tagger.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -246,7 +246,7 @@ class Tagger(TrainablePipe):
`init labels` command. If no labels are provided, the get_examples `init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data. callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/tagger#initialize DOCS: https://spacy.io/api/tagger#initialize
""" """
validate_get_examples(get_examples, "Tagger.initialize") validate_get_examples(get_examples, "Tagger.initialize")
if labels is not None: if labels is not None:
@ -278,7 +278,7 @@ class Tagger(TrainablePipe):
label (str): The label to add. label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1. RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/tagger#add_label DOCS: https://spacy.io/api/tagger#add_label
""" """
if not isinstance(label, str): if not isinstance(label, str):
raise ValueError(Errors.E187) raise ValueError(Errors.E187)
@ -296,7 +296,7 @@ class Tagger(TrainablePipe):
RETURNS (Dict[str, Any]): The scores, produced by RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag". Scorer.score_token_attr for the attributes "tag".
DOCS: https://nightly.spacy.io/api/tagger#score DOCS: https://spacy.io/api/tagger#score
""" """
validate_examples(examples, "Tagger.score") validate_examples(examples, "Tagger.score")
return Scorer.score_token_attr(examples, "tag", **kwargs) return Scorer.score_token_attr(examples, "tag", **kwargs)

View File

@ -104,7 +104,7 @@ def make_textcat(
class TextCategorizer(TrainablePipe): class TextCategorizer(TrainablePipe):
"""Pipeline component for single-label text classification. """Pipeline component for single-label text classification.
DOCS: https://nightly.spacy.io/api/textcategorizer DOCS: https://spacy.io/api/textcategorizer
""" """
def __init__( def __init__(
@ -118,7 +118,7 @@ class TextCategorizer(TrainablePipe):
losses during training. losses during training.
threshold (float): Cutoff to consider a prediction "positive". threshold (float): Cutoff to consider a prediction "positive".
DOCS: https://nightly.spacy.io/api/textcategorizer#init DOCS: https://spacy.io/api/textcategorizer#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -131,7 +131,7 @@ class TextCategorizer(TrainablePipe):
def labels(self) -> Tuple[str]: def labels(self) -> Tuple[str]:
"""RETURNS (Tuple[str]): The labels currently added to the component. """RETURNS (Tuple[str]): The labels currently added to the component.
DOCS: https://nightly.spacy.io/api/textcategorizer#labels DOCS: https://spacy.io/api/textcategorizer#labels
""" """
return tuple(self.cfg["labels"]) return tuple(self.cfg["labels"])
@ -139,7 +139,7 @@ class TextCategorizer(TrainablePipe):
def label_data(self) -> List[str]: def label_data(self) -> List[str]:
"""RETURNS (List[str]): Information about the component's labels. """RETURNS (List[str]): Information about the component's labels.
DOCS: https://nightly.spacy.io/api/textcategorizer#label_data DOCS: https://spacy.io/api/textcategorizer#label_data
""" """
return self.labels return self.labels
@ -149,7 +149,7 @@ class TextCategorizer(TrainablePipe):
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document. RETURNS: The models prediction for each document.
DOCS: https://nightly.spacy.io/api/textcategorizer#predict DOCS: https://spacy.io/api/textcategorizer#predict
""" """
if not any(len(doc) for doc in docs): if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs. # Handle cases where there are no tokens in any docs.
@ -167,7 +167,7 @@ class TextCategorizer(TrainablePipe):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by TextCategorizer.predict. scores: The scores to set, produced by TextCategorizer.predict.
DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations DOCS: https://spacy.io/api/textcategorizer#set_annotations
""" """
for i, doc in enumerate(docs): for i, doc in enumerate(docs):
for j, label in enumerate(self.labels): for j, label in enumerate(self.labels):
@ -191,7 +191,7 @@ class TextCategorizer(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/textcategorizer#update DOCS: https://spacy.io/api/textcategorizer#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -230,7 +230,7 @@ class TextCategorizer(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse DOCS: https://spacy.io/api/textcategorizer#rehearse
""" """
if losses is not None: if losses is not None:
losses.setdefault(self.name, 0.0) losses.setdefault(self.name, 0.0)
@ -275,7 +275,7 @@ class TextCategorizer(TrainablePipe):
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss DOCS: https://spacy.io/api/textcategorizer#get_loss
""" """
validate_examples(examples, "TextCategorizer.get_loss") validate_examples(examples, "TextCategorizer.get_loss")
self._validate_categories(examples) self._validate_categories(examples)
@ -292,7 +292,7 @@ class TextCategorizer(TrainablePipe):
label (str): The label to add. label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1. RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/textcategorizer#add_label DOCS: https://spacy.io/api/textcategorizer#add_label
""" """
if not isinstance(label, str): if not isinstance(label, str):
raise ValueError(Errors.E187) raise ValueError(Errors.E187)
@ -321,7 +321,7 @@ class TextCategorizer(TrainablePipe):
`init labels` command. If no labels are provided, the get_examples `init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data. callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/textcategorizer#initialize DOCS: https://spacy.io/api/textcategorizer#initialize
""" """
validate_get_examples(get_examples, "TextCategorizer.initialize") validate_get_examples(get_examples, "TextCategorizer.initialize")
self._validate_categories(get_examples()) self._validate_categories(get_examples())
@ -354,7 +354,7 @@ class TextCategorizer(TrainablePipe):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
DOCS: https://nightly.spacy.io/api/textcategorizer#score DOCS: https://spacy.io/api/textcategorizer#score
""" """
validate_examples(examples, "TextCategorizer.score") validate_examples(examples, "TextCategorizer.score")
self._validate_categories(examples) self._validate_categories(examples)

View File

@ -104,7 +104,7 @@ def make_multilabel_textcat(
class MultiLabel_TextCategorizer(TextCategorizer): class MultiLabel_TextCategorizer(TextCategorizer):
"""Pipeline component for multi-label text classification. """Pipeline component for multi-label text classification.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer DOCS: https://spacy.io/api/multilabel_textcategorizer
""" """
def __init__( def __init__(
@ -123,7 +123,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
losses during training. losses during training.
threshold (float): Cutoff to consider a prediction "positive". threshold (float): Cutoff to consider a prediction "positive".
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#init DOCS: https://spacy.io/api/multilabel_textcategorizer#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -149,7 +149,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
`init labels` command. If no labels are provided, the get_examples `init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data. callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#initialize DOCS: https://spacy.io/api/multilabel_textcategorizer#initialize
""" """
validate_get_examples(get_examples, "MultiLabel_TextCategorizer.initialize") validate_get_examples(get_examples, "MultiLabel_TextCategorizer.initialize")
if labels is None: if labels is None:
@ -173,7 +173,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
examples (Iterable[Example]): The examples to score. examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#score DOCS: https://spacy.io/api/multilabel_textcategorizer#score
""" """
validate_examples(examples, "MultiLabel_TextCategorizer.score") validate_examples(examples, "MultiLabel_TextCategorizer.score")
return Scorer.score_cats( return Scorer.score_cats(

View File

@ -55,7 +55,7 @@ class Tok2Vec(TrainablePipe):
a list of Doc objects as input, and output a list of 2d float arrays. a list of Doc objects as input, and output a list of 2d float arrays.
name (str): The component instance name. name (str): The component instance name.
DOCS: https://nightly.spacy.io/api/tok2vec#init DOCS: https://spacy.io/api/tok2vec#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -115,7 +115,7 @@ class Tok2Vec(TrainablePipe):
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations for each token in the documents. RETURNS: Vector representations for each token in the documents.
DOCS: https://nightly.spacy.io/api/tok2vec#predict DOCS: https://spacy.io/api/tok2vec#predict
""" """
tokvecs = self.model.predict(docs) tokvecs = self.model.predict(docs)
batch_id = Tok2VecListener.get_batch_id(docs) batch_id = Tok2VecListener.get_batch_id(docs)
@ -129,7 +129,7 @@ class Tok2Vec(TrainablePipe):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
tokvecses: The tensors to set, produced by Tok2Vec.predict. tokvecses: The tensors to set, produced by Tok2Vec.predict.
DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations DOCS: https://spacy.io/api/tok2vec#set_annotations
""" """
for doc, tokvecs in zip(docs, tokvecses): for doc, tokvecs in zip(docs, tokvecses):
assert tokvecs.shape[0] == len(doc) assert tokvecs.shape[0] == len(doc)
@ -153,7 +153,7 @@ class Tok2Vec(TrainablePipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tok2vec#update DOCS: https://spacy.io/api/tok2vec#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -204,7 +204,7 @@ class Tok2Vec(TrainablePipe):
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/tok2vec#initialize DOCS: https://spacy.io/api/tok2vec#initialize
""" """
validate_get_examples(get_examples, "Tok2Vec.initialize") validate_get_examples(get_examples, "Tok2Vec.initialize")
doc_sample = [] doc_sample = []

View File

@ -20,7 +20,7 @@ cdef class TrainablePipe(Pipe):
from it and it defines the interface that components should follow to from it and it defines the interface that components should follow to
function as trainable components in a spaCy pipeline. function as trainable components in a spaCy pipeline.
DOCS: https://nightly.spacy.io/api/pipe DOCS: https://spacy.io/api/pipe
""" """
def __init__(self, vocab: Vocab, model: Model, name: str, **cfg): def __init__(self, vocab: Vocab, model: Model, name: str, **cfg):
"""Initialize a pipeline component. """Initialize a pipeline component.
@ -30,7 +30,7 @@ cdef class TrainablePipe(Pipe):
name (str): The component instance name. name (str): The component instance name.
**cfg: Additional settings and config parameters. **cfg: Additional settings and config parameters.
DOCS: https://nightly.spacy.io/api/pipe#init DOCS: https://spacy.io/api/pipe#init
""" """
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -45,7 +45,7 @@ cdef class TrainablePipe(Pipe):
docs (Doc): The Doc to process. docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc. RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/pipe#call DOCS: https://spacy.io/api/pipe#call
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
try: try:
@ -67,7 +67,7 @@ cdef class TrainablePipe(Pipe):
the exception. the exception.
YIELDS (Doc): Processed documents in order. YIELDS (Doc): Processed documents in order.
DOCS: https://nightly.spacy.io/api/pipe#pipe DOCS: https://spacy.io/api/pipe#pipe
""" """
error_handler = self.get_error_handler() error_handler = self.get_error_handler()
for docs in util.minibatch(stream, size=batch_size): for docs in util.minibatch(stream, size=batch_size):
@ -85,7 +85,7 @@ cdef class TrainablePipe(Pipe):
docs (Iterable[Doc]): The documents to predict. docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations of the predictions. RETURNS: Vector representations of the predictions.
DOCS: https://nightly.spacy.io/api/pipe#predict DOCS: https://spacy.io/api/pipe#predict
""" """
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="predict", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="predict", name=self.name))
@ -95,7 +95,7 @@ cdef class TrainablePipe(Pipe):
docs (Iterable[Doc]): The documents to modify. docs (Iterable[Doc]): The documents to modify.
scores: The scores to assign. scores: The scores to assign.
DOCS: https://nightly.spacy.io/api/pipe#set_annotations DOCS: https://spacy.io/api/pipe#set_annotations
""" """
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="set_annotations", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="set_annotations", name=self.name))
@ -114,7 +114,7 @@ cdef class TrainablePipe(Pipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/pipe#update DOCS: https://spacy.io/api/pipe#update
""" """
if losses is None: if losses is None:
losses = {} losses = {}
@ -151,7 +151,7 @@ cdef class TrainablePipe(Pipe):
Updated using the component name as the key. Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary. RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/pipe#rehearse DOCS: https://spacy.io/api/pipe#rehearse
""" """
pass pass
@ -163,7 +163,7 @@ cdef class TrainablePipe(Pipe):
scores: Scores representing the model's predictions. scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient. RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/pipe#get_loss DOCS: https://spacy.io/api/pipe#get_loss
""" """
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_loss", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_loss", name=self.name))
@ -172,7 +172,7 @@ cdef class TrainablePipe(Pipe):
RETURNS (thinc.api.Optimizer): The optimizer. RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/pipe#create_optimizer DOCS: https://spacy.io/api/pipe#create_optimizer
""" """
return util.create_default_optimizer() return util.create_default_optimizer()
@ -186,7 +186,7 @@ cdef class TrainablePipe(Pipe):
returns a representative sample of gold-standard Example objects. returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of. nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/pipe#initialize DOCS: https://spacy.io/api/pipe#initialize
""" """
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="initialize", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="initialize", name=self.name))
@ -199,7 +199,7 @@ cdef class TrainablePipe(Pipe):
label (str): The label to add. label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1. RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/pipe#add_label DOCS: https://spacy.io/api/pipe#add_label
""" """
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="add_label", name=self.name)) raise NotImplementedError(Errors.E931.format(parent="Pipe", method="add_label", name=self.name))
@ -229,7 +229,7 @@ cdef class TrainablePipe(Pipe):
params (dict): The parameter values to use in the model. params (dict): The parameter values to use in the model.
DOCS: https://nightly.spacy.io/api/pipe#use_params DOCS: https://spacy.io/api/pipe#use_params
""" """
with self.model.use_params(params): with self.model.use_params(params):
yield yield
@ -241,7 +241,7 @@ cdef class TrainablePipe(Pipe):
sgd (thinc.api.Optimizer): The optimizer. sgd (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/pipe#finish_update DOCS: https://spacy.io/api/pipe#finish_update
""" """
self.model.finish_update(sgd) self.model.finish_update(sgd)
@ -261,7 +261,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object. RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/pipe#to_bytes DOCS: https://spacy.io/api/pipe#to_bytes
""" """
self._validate_serialization_attrs() self._validate_serialization_attrs()
serialize = {} serialize = {}
@ -277,7 +277,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (TrainablePipe): The loaded object. RETURNS (TrainablePipe): The loaded object.
DOCS: https://nightly.spacy.io/api/pipe#from_bytes DOCS: https://spacy.io/api/pipe#from_bytes
""" """
self._validate_serialization_attrs() self._validate_serialization_attrs()
@ -301,7 +301,7 @@ cdef class TrainablePipe(Pipe):
path (str / Path): Path to a directory. path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/pipe#to_disk DOCS: https://spacy.io/api/pipe#to_disk
""" """
self._validate_serialization_attrs() self._validate_serialization_attrs()
serialize = {} serialize = {}
@ -318,7 +318,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (TrainablePipe): The loaded object. RETURNS (TrainablePipe): The loaded object.
DOCS: https://nightly.spacy.io/api/pipe#from_disk DOCS: https://spacy.io/api/pipe#from_disk
""" """
self._validate_serialization_attrs() self._validate_serialization_attrs()

View File

@ -103,7 +103,7 @@ class Scorer:
) -> None: ) -> None:
"""Initialize the Scorer. """Initialize the Scorer.
DOCS: https://nightly.spacy.io/api/scorer#init DOCS: https://spacy.io/api/scorer#init
""" """
self.nlp = nlp self.nlp = nlp
self.cfg = cfg self.cfg = cfg
@ -119,7 +119,7 @@ class Scorer:
examples (Iterable[Example]): The predicted annotations + correct annotations. examples (Iterable[Example]): The predicted annotations + correct annotations.
RETURNS (Dict): A dictionary of scores. RETURNS (Dict): A dictionary of scores.
DOCS: https://nightly.spacy.io/api/scorer#score DOCS: https://spacy.io/api/scorer#score
""" """
scores = {} scores = {}
if hasattr(self.nlp.tokenizer, "score"): if hasattr(self.nlp.tokenizer, "score"):
@ -139,7 +139,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the scores RETURNS (Dict[str, Any]): A dictionary containing the scores
token_acc/p/r/f. token_acc/p/r/f.
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization DOCS: https://spacy.io/api/scorer#score_tokenization
""" """
acc_score = PRFScore() acc_score = PRFScore()
prf_score = PRFScore() prf_score = PRFScore()
@ -198,7 +198,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the accuracy score RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
under the key attr_acc. under the key attr_acc.
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr DOCS: https://spacy.io/api/scorer#score_token_attr
""" """
tag_score = PRFScore() tag_score = PRFScore()
for example in examples: for example in examples:
@ -317,7 +317,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
the keys attr_p/r/f and the per-type PRF scores under attr_per_type. the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
DOCS: https://nightly.spacy.io/api/scorer#score_spans DOCS: https://spacy.io/api/scorer#score_spans
""" """
score = PRFScore() score = PRFScore()
score_per_type = dict() score_per_type = dict()
@ -413,7 +413,7 @@ class Scorer:
attr_f_per_type, attr_f_per_type,
attr_auc_per_type attr_auc_per_type
DOCS: https://nightly.spacy.io/api/scorer#score_cats DOCS: https://spacy.io/api/scorer#score_cats
""" """
if threshold is None: if threshold is None:
threshold = 0.5 if multi_label else 0.0 threshold = 0.5 if multi_label else 0.0
@ -519,7 +519,7 @@ class Scorer:
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL") negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
RETURNS (Dict[str, Any]): A dictionary containing the scores. RETURNS (Dict[str, Any]): A dictionary containing the scores.
DOCS: https://nightly.spacy.io/api/scorer#score_links DOCS: https://spacy.io/api/scorer#score_links
""" """
f_per_type = {} f_per_type = {}
for example in examples: for example in examples:
@ -603,7 +603,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the scores: RETURNS (Dict[str, Any]): A dictionary containing the scores:
attr_uas, attr_las, and attr_las_per_type. attr_uas, attr_las, and attr_las_per_type.
DOCS: https://nightly.spacy.io/api/scorer#score_deps DOCS: https://spacy.io/api/scorer#score_deps
""" """
unlabelled = PRFScore() unlabelled = PRFScore()
labelled = PRFScore() labelled = PRFScore()

View File

@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
cdef class StringStore: cdef class StringStore:
"""Look up strings by 64-bit hashes. """Look up strings by 64-bit hashes.
DOCS: https://nightly.spacy.io/api/stringstore DOCS: https://spacy.io/api/stringstore
""" """
def __init__(self, strings=None, freeze=False): def __init__(self, strings=None, freeze=False):
"""Create the StringStore. """Create the StringStore.

View File

@ -31,7 +31,7 @@ cdef class Tokenizer:
"""Segment text, and create Doc objects with the discovered segment """Segment text, and create Doc objects with the discovered segment
boundaries. boundaries.
DOCS: https://nightly.spacy.io/api/tokenizer DOCS: https://spacy.io/api/tokenizer
""" """
def __init__(self, Vocab vocab, rules=None, prefix_search=None, def __init__(self, Vocab vocab, rules=None, prefix_search=None,
suffix_search=None, infix_finditer=None, token_match=None, suffix_search=None, infix_finditer=None, token_match=None,
@ -54,7 +54,7 @@ cdef class Tokenizer:
EXAMPLE: EXAMPLE:
>>> tokenizer = Tokenizer(nlp.vocab) >>> tokenizer = Tokenizer(nlp.vocab)
DOCS: https://nightly.spacy.io/api/tokenizer#init DOCS: https://spacy.io/api/tokenizer#init
""" """
self.mem = Pool() self.mem = Pool()
self._cache = PreshMap() self._cache = PreshMap()
@ -147,7 +147,7 @@ cdef class Tokenizer:
string (str): The string to tokenize. string (str): The string to tokenize.
RETURNS (Doc): A container for linguistic annotations. RETURNS (Doc): A container for linguistic annotations.
DOCS: https://nightly.spacy.io/api/tokenizer#call DOCS: https://spacy.io/api/tokenizer#call
""" """
doc = self._tokenize_affixes(string, True) doc = self._tokenize_affixes(string, True)
self._apply_special_cases(doc) self._apply_special_cases(doc)
@ -209,7 +209,7 @@ cdef class Tokenizer:
Defaults to 1000. Defaults to 1000.
YIELDS (Doc): A sequence of Doc objects, in order. YIELDS (Doc): A sequence of Doc objects, in order.
DOCS: https://nightly.spacy.io/api/tokenizer#pipe DOCS: https://spacy.io/api/tokenizer#pipe
""" """
for text in texts: for text in texts:
yield self(text) yield self(text)
@ -529,7 +529,7 @@ cdef class Tokenizer:
and `.end()` methods, denoting the placement of internal segment and `.end()` methods, denoting the placement of internal segment
separators, e.g. hyphens. separators, e.g. hyphens.
DOCS: https://nightly.spacy.io/api/tokenizer#find_infix DOCS: https://spacy.io/api/tokenizer#find_infix
""" """
if self.infix_finditer is None: if self.infix_finditer is None:
return 0 return 0
@ -542,7 +542,7 @@ cdef class Tokenizer:
string (str): The string to segment. string (str): The string to segment.
RETURNS (int): The length of the prefix if present, otherwise `None`. RETURNS (int): The length of the prefix if present, otherwise `None`.
DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix DOCS: https://spacy.io/api/tokenizer#find_prefix
""" """
if self.prefix_search is None: if self.prefix_search is None:
return 0 return 0
@ -556,7 +556,7 @@ cdef class Tokenizer:
string (str): The string to segment. string (str): The string to segment.
Returns (int): The length of the suffix if present, otherwise `None`. Returns (int): The length of the suffix if present, otherwise `None`.
DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix DOCS: https://spacy.io/api/tokenizer#find_suffix
""" """
if self.suffix_search is None: if self.suffix_search is None:
return 0 return 0
@ -596,7 +596,7 @@ cdef class Tokenizer:
a token and its attributes. The `ORTH` fields of the attributes a token and its attributes. The `ORTH` fields of the attributes
must exactly match the string when they are concatenated. must exactly match the string when they are concatenated.
DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case DOCS: https://spacy.io/api/tokenizer#add_special_case
""" """
self._validate_special_case(string, substrings) self._validate_special_case(string, substrings)
substrings = list(substrings) substrings = list(substrings)
@ -635,7 +635,7 @@ cdef class Tokenizer:
string (str): The string to tokenize. string (str): The string to tokenize.
RETURNS (list): A list of (pattern_string, token_string) tuples RETURNS (list): A list of (pattern_string, token_string) tuples
DOCS: https://nightly.spacy.io/api/tokenizer#explain DOCS: https://spacy.io/api/tokenizer#explain
""" """
prefix_search = self.prefix_search prefix_search = self.prefix_search
suffix_search = self.suffix_search suffix_search = self.suffix_search
@ -718,7 +718,7 @@ cdef class Tokenizer:
it doesn't exist. it doesn't exist.
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/tokenizer#to_disk DOCS: https://spacy.io/api/tokenizer#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
with path.open("wb") as file_: with path.open("wb") as file_:
@ -732,7 +732,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The modified `Tokenizer` object. RETURNS (Tokenizer): The modified `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#from_disk DOCS: https://spacy.io/api/tokenizer#from_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
with path.open("rb") as file_: with path.open("rb") as file_:
@ -746,7 +746,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Tokenizer` object. RETURNS (bytes): The serialized form of the `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes DOCS: https://spacy.io/api/tokenizer#to_bytes
""" """
serializers = { serializers = {
"vocab": lambda: self.vocab.to_bytes(), "vocab": lambda: self.vocab.to_bytes(),
@ -766,7 +766,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The `Tokenizer` object. RETURNS (Tokenizer): The `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes DOCS: https://spacy.io/api/tokenizer#from_bytes
""" """
data = {} data = {}
deserializers = { deserializers = {

View File

@ -24,8 +24,8 @@ from ..strings import get_string_id
cdef class Retokenizer: cdef class Retokenizer:
"""Helper class for doc.retokenize() context manager. """Helper class for doc.retokenize() context manager.
DOCS: https://nightly.spacy.io/api/doc#retokenize DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization USAGE: https://spacy.io/usage/linguistic-features#retokenization
""" """
cdef Doc doc cdef Doc doc
cdef list merges cdef list merges
@ -47,7 +47,7 @@ cdef class Retokenizer:
span (Span): The span to merge. span (Span): The span to merge.
attrs (dict): Attributes to set on the merged token. attrs (dict): Attributes to set on the merged token.
DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge DOCS: https://spacy.io/api/doc#retokenizer.merge
""" """
if (span.start, span.end) in self._spans_to_merge: if (span.start, span.end) in self._spans_to_merge:
return return
@ -73,7 +73,7 @@ cdef class Retokenizer:
attrs (dict): Attributes to set on all split tokens. Attribute names attrs (dict): Attributes to set on all split tokens. Attribute names
mapped to list of per-token attribute values. mapped to list of per-token attribute values.
DOCS: https://nightly.spacy.io/api/doc#retokenizer.split DOCS: https://spacy.io/api/doc#retokenizer.split
""" """
if ''.join(orths) != token.text: if ''.join(orths) != token.text:
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text)) raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))

View File

@ -62,7 +62,7 @@ class DocBin:
store_user_data (bool): Whether to write the `Doc.user_data` to bytes/file. store_user_data (bool): Whether to write the `Doc.user_data` to bytes/file.
docs (Iterable[Doc]): Docs to add. docs (Iterable[Doc]): Docs to add.
DOCS: https://nightly.spacy.io/api/docbin#init DOCS: https://spacy.io/api/docbin#init
""" """
attrs = sorted([intify_attr(attr) for attr in attrs]) attrs = sorted([intify_attr(attr) for attr in attrs])
self.version = "0.1" self.version = "0.1"
@ -88,7 +88,7 @@ class DocBin:
doc (Doc): The Doc object to add. doc (Doc): The Doc object to add.
DOCS: https://nightly.spacy.io/api/docbin#add DOCS: https://spacy.io/api/docbin#add
""" """
array = doc.to_array(self.attrs) array = doc.to_array(self.attrs)
if len(array.shape) == 1: if len(array.shape) == 1:
@ -122,7 +122,7 @@ class DocBin:
vocab (Vocab): The shared vocab. vocab (Vocab): The shared vocab.
YIELDS (Doc): The Doc objects. YIELDS (Doc): The Doc objects.
DOCS: https://nightly.spacy.io/api/docbin#get_docs DOCS: https://spacy.io/api/docbin#get_docs
""" """
for string in self.strings: for string in self.strings:
vocab[string] vocab[string]
@ -153,7 +153,7 @@ class DocBin:
other (DocBin): The DocBin to merge into the current bin. other (DocBin): The DocBin to merge into the current bin.
DOCS: https://nightly.spacy.io/api/docbin#merge DOCS: https://spacy.io/api/docbin#merge
""" """
if self.attrs != other.attrs: if self.attrs != other.attrs:
raise ValueError( raise ValueError(
@ -180,7 +180,7 @@ class DocBin:
RETURNS (bytes): The serialized DocBin. RETURNS (bytes): The serialized DocBin.
DOCS: https://nightly.spacy.io/api/docbin#to_bytes DOCS: https://spacy.io/api/docbin#to_bytes
""" """
for tokens in self.tokens: for tokens in self.tokens:
assert len(tokens.shape) == 2, tokens.shape # this should never happen assert len(tokens.shape) == 2, tokens.shape # this should never happen
@ -208,7 +208,7 @@ class DocBin:
bytes_data (bytes): The data to load from. bytes_data (bytes): The data to load from.
RETURNS (DocBin): The loaded DocBin. RETURNS (DocBin): The loaded DocBin.
DOCS: https://nightly.spacy.io/api/docbin#from_bytes DOCS: https://spacy.io/api/docbin#from_bytes
""" """
try: try:
msg = srsly.msgpack_loads(zlib.decompress(bytes_data)) msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
@ -240,7 +240,7 @@ class DocBin:
path (str / Path): The file path. path (str / Path): The file path.
DOCS: https://nightly.spacy.io/api/docbin#to_disk DOCS: https://spacy.io/api/docbin#to_disk
""" """
path = ensure_path(path) path = ensure_path(path)
with path.open("wb") as file_: with path.open("wb") as file_:
@ -252,7 +252,7 @@ class DocBin:
path (str / Path): The file path. path (str / Path): The file path.
RETURNS (DocBin): The loaded DocBin. RETURNS (DocBin): The loaded DocBin.
DOCS: https://nightly.spacy.io/api/docbin#to_disk DOCS: https://spacy.io/api/docbin#to_disk
""" """
path = ensure_path(path) path = ensure_path(path)
with path.open("rb") as file_: with path.open("rb") as file_:

View File

@ -116,7 +116,7 @@ cdef class Doc:
>>> from spacy.tokens import Doc >>> from spacy.tokens import Doc
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False]) >>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
DOCS: https://nightly.spacy.io/api/doc DOCS: https://spacy.io/api/doc
""" """
@classmethod @classmethod
@ -130,8 +130,8 @@ cdef class Doc:
method (callable): Optional method for method extension. method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute. force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/doc#set_extension DOCS: https://spacy.io/api/doc#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
""" """
if cls.has_extension(name) and not kwargs.get("force", False): if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Doc")) raise ValueError(Errors.E090.format(name=name, obj="Doc"))
@ -144,7 +144,7 @@ cdef class Doc:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple. RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/doc#get_extension DOCS: https://spacy.io/api/doc#get_extension
""" """
return Underscore.doc_extensions.get(name) return Underscore.doc_extensions.get(name)
@ -155,7 +155,7 @@ cdef class Doc:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered. RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/doc#has_extension DOCS: https://spacy.io/api/doc#has_extension
""" """
return name in Underscore.doc_extensions return name in Underscore.doc_extensions
@ -167,7 +167,7 @@ cdef class Doc:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension. removed extension.
DOCS: https://nightly.spacy.io/api/doc#remove_extension DOCS: https://spacy.io/api/doc#remove_extension
""" """
if not cls.has_extension(name): if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name)) raise ValueError(Errors.E046.format(name=name))
@ -219,7 +219,7 @@ cdef class Doc:
length as words, as IOB tags to assign as token.ent_iob and length as words, as IOB tags to assign as token.ent_iob and
token.ent_type. Defaults to None. token.ent_type. Defaults to None.
DOCS: https://nightly.spacy.io/api/doc#init DOCS: https://spacy.io/api/doc#init
""" """
self.vocab = vocab self.vocab = vocab
size = max(20, (len(words) if words is not None else 0)) size = max(20, (len(words) if words is not None else 0))
@ -399,7 +399,7 @@ cdef class Doc:
every token in the doc. every token in the doc.
RETURNS (bool): Whether annotation is present. RETURNS (bool): Whether annotation is present.
DOCS: https://nightly.spacy.io/api/doc#has_annotation DOCS: https://spacy.io/api/doc#has_annotation
""" """
# empty docs are always annotated # empty docs are always annotated
@ -450,7 +450,7 @@ cdef class Doc:
You can use negative indices and open-ended ranges, which have You can use negative indices and open-ended ranges, which have
their normal Python semantics. their normal Python semantics.
DOCS: https://nightly.spacy.io/api/doc#getitem DOCS: https://spacy.io/api/doc#getitem
""" """
if isinstance(i, slice): if isinstance(i, slice):
start, stop = util.normalize_slice(len(self), i.start, i.stop, i.step) start, stop = util.normalize_slice(len(self), i.start, i.stop, i.step)
@ -467,7 +467,7 @@ cdef class Doc:
than-Python speeds are required, you can instead access the annotations than-Python speeds are required, you can instead access the annotations
as a numpy array, or access the underlying C data directly from Cython. as a numpy array, or access the underlying C data directly from Cython.
DOCS: https://nightly.spacy.io/api/doc#iter DOCS: https://spacy.io/api/doc#iter
""" """
cdef int i cdef int i
for i in range(self.length): for i in range(self.length):
@ -478,7 +478,7 @@ cdef class Doc:
RETURNS (int): The number of tokens in the document. RETURNS (int): The number of tokens in the document.
DOCS: https://nightly.spacy.io/api/doc#len DOCS: https://spacy.io/api/doc#len
""" """
return self.length return self.length
@ -519,7 +519,7 @@ cdef class Doc:
partially covered by the character span). Defaults to "strict". partially covered by the character span). Defaults to "strict".
RETURNS (Span): The newly constructed object. RETURNS (Span): The newly constructed object.
DOCS: https://nightly.spacy.io/api/doc#char_span DOCS: https://spacy.io/api/doc#char_span
""" """
if not isinstance(label, int): if not isinstance(label, int):
label = self.vocab.strings.add(label) label = self.vocab.strings.add(label)
@ -562,7 +562,7 @@ cdef class Doc:
`Span`, `Token` and `Lexeme` objects. `Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar. RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/doc#similarity DOCS: https://spacy.io/api/doc#similarity
""" """
if "similarity" in self.user_hooks: if "similarity" in self.user_hooks:
return self.user_hooks["similarity"](self, other) return self.user_hooks["similarity"](self, other)
@ -595,7 +595,7 @@ cdef class Doc:
RETURNS (bool): Whether a word vector is associated with the object. RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/doc#has_vector DOCS: https://spacy.io/api/doc#has_vector
""" """
if "has_vector" in self.user_hooks: if "has_vector" in self.user_hooks:
return self.user_hooks["has_vector"](self) return self.user_hooks["has_vector"](self)
@ -613,7 +613,7 @@ cdef class Doc:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the document's semantics. representing the document's semantics.
DOCS: https://nightly.spacy.io/api/doc#vector DOCS: https://spacy.io/api/doc#vector
""" """
def __get__(self): def __get__(self):
if "vector" in self.user_hooks: if "vector" in self.user_hooks:
@ -641,7 +641,7 @@ cdef class Doc:
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/doc#vector_norm DOCS: https://spacy.io/api/doc#vector_norm
""" """
def __get__(self): def __get__(self):
if "vector_norm" in self.user_hooks: if "vector_norm" in self.user_hooks:
@ -681,7 +681,7 @@ cdef class Doc:
RETURNS (tuple): Entities in the document, one `Span` per entity. RETURNS (tuple): Entities in the document, one `Span` per entity.
DOCS: https://nightly.spacy.io/api/doc#ents DOCS: https://spacy.io/api/doc#ents
""" """
def __get__(self): def __get__(self):
cdef int i cdef int i
@ -827,7 +827,7 @@ cdef class Doc:
YIELDS (Span): Noun chunks in the document. YIELDS (Span): Noun chunks in the document.
DOCS: https://nightly.spacy.io/api/doc#noun_chunks DOCS: https://spacy.io/api/doc#noun_chunks
""" """
if self.noun_chunks_iterator is None: if self.noun_chunks_iterator is None:
raise NotImplementedError(Errors.E894.format(lang=self.vocab.lang)) raise NotImplementedError(Errors.E894.format(lang=self.vocab.lang))
@ -850,7 +850,7 @@ cdef class Doc:
YIELDS (Span): Sentences in the document. YIELDS (Span): Sentences in the document.
DOCS: https://nightly.spacy.io/api/doc#sents DOCS: https://spacy.io/api/doc#sents
""" """
if not self.has_annotation("SENT_START"): if not self.has_annotation("SENT_START"):
raise ValueError(Errors.E030) raise ValueError(Errors.E030)
@ -959,7 +959,7 @@ cdef class Doc:
attr_id (int): The attribute ID to key the counts. attr_id (int): The attribute ID to key the counts.
RETURNS (dict): A dictionary mapping attributes to integer counts. RETURNS (dict): A dictionary mapping attributes to integer counts.
DOCS: https://nightly.spacy.io/api/doc#count_by DOCS: https://spacy.io/api/doc#count_by
""" """
cdef int i cdef int i
cdef attr_t attr cdef attr_t attr
@ -1006,7 +1006,7 @@ cdef class Doc:
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values. array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
RETURNS (Doc): Itself. RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_array DOCS: https://spacy.io/api/doc#from_array
""" """
# Handle scalar/list inputs of strings/ints for py_attr_ids # Handle scalar/list inputs of strings/ints for py_attr_ids
# See also #3064 # See also #3064
@ -1098,7 +1098,7 @@ cdef class Doc:
attrs (list): Optional list of attribute ID ints or attribute name strings. attrs (list): Optional list of attribute ID ints or attribute name strings.
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given. RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
DOCS: https://nightly.spacy.io/api/doc#from_docs DOCS: https://spacy.io/api/doc#from_docs
""" """
if not docs: if not docs:
return None return None
@ -1170,7 +1170,7 @@ cdef class Doc:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self). (n, n), where n = len(self).
DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix DOCS: https://spacy.io/api/doc#get_lca_matrix
""" """
return numpy.asarray(_get_lca_matrix(self, 0, len(self))) return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
@ -1203,7 +1203,7 @@ cdef class Doc:
it doesn't exist. Paths may be either strings or Path-like objects. it doesn't exist. Paths may be either strings or Path-like objects.
exclude (Iterable[str]): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/doc#to_disk DOCS: https://spacy.io/api/doc#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
with path.open("wb") as file_: with path.open("wb") as file_:
@ -1218,7 +1218,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): The modified `Doc` object. RETURNS (Doc): The modified `Doc` object.
DOCS: https://nightly.spacy.io/api/doc#from_disk DOCS: https://spacy.io/api/doc#from_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
with path.open("rb") as file_: with path.open("rb") as file_:
@ -1232,7 +1232,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations. all annotations.
DOCS: https://nightly.spacy.io/api/doc#to_bytes DOCS: https://spacy.io/api/doc#to_bytes
""" """
return srsly.msgpack_dumps(self.to_dict(exclude=exclude)) return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
@ -1243,7 +1243,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself. RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_bytes DOCS: https://spacy.io/api/doc#from_bytes
""" """
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude) return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
@ -1254,7 +1254,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations. all annotations.
DOCS: https://nightly.spacy.io/api/doc#to_bytes DOCS: https://spacy.io/api/doc#to_bytes
""" """
array_head = Doc._get_array_attrs() array_head = Doc._get_array_attrs()
strings = set() strings = set()
@ -1302,7 +1302,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself. RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_dict DOCS: https://spacy.io/api/doc#from_dict
""" """
if self.length != 0: if self.length != 0:
raise ValueError(Errors.E033.format(length=self.length)) raise ValueError(Errors.E033.format(length=self.length))
@ -1373,8 +1373,8 @@ cdef class Doc:
retokenization are invalidated, although they may accidentally retokenization are invalidated, although they may accidentally
continue to work. continue to work.
DOCS: https://nightly.spacy.io/api/doc#retokenize DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization USAGE: https://spacy.io/usage/linguistic-features#retokenization
""" """
return Retokenizer(self) return Retokenizer(self)

View File

@ -24,7 +24,7 @@ from .underscore import Underscore, get_ext_args
cdef class Span: cdef class Span:
"""A slice from a Doc object. """A slice from a Doc object.
DOCS: https://nightly.spacy.io/api/span DOCS: https://spacy.io/api/span
""" """
@classmethod @classmethod
def set_extension(cls, name, **kwargs): def set_extension(cls, name, **kwargs):
@ -37,8 +37,8 @@ cdef class Span:
method (callable): Optional method for method extension. method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute. force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/span#set_extension DOCS: https://spacy.io/api/span#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
""" """
if cls.has_extension(name) and not kwargs.get("force", False): if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Span")) raise ValueError(Errors.E090.format(name=name, obj="Span"))
@ -51,7 +51,7 @@ cdef class Span:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple. RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/span#get_extension DOCS: https://spacy.io/api/span#get_extension
""" """
return Underscore.span_extensions.get(name) return Underscore.span_extensions.get(name)
@ -62,7 +62,7 @@ cdef class Span:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered. RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/span#has_extension DOCS: https://spacy.io/api/span#has_extension
""" """
return name in Underscore.span_extensions return name in Underscore.span_extensions
@ -74,7 +74,7 @@ cdef class Span:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension. removed extension.
DOCS: https://nightly.spacy.io/api/span#remove_extension DOCS: https://spacy.io/api/span#remove_extension
""" """
if not cls.has_extension(name): if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name)) raise ValueError(Errors.E046.format(name=name))
@ -92,7 +92,7 @@ cdef class Span:
vector (ndarray[ndim=1, dtype='float32']): A meaning representation vector (ndarray[ndim=1, dtype='float32']): A meaning representation
of the span. of the span.
DOCS: https://nightly.spacy.io/api/span#init DOCS: https://spacy.io/api/span#init
""" """
if not (0 <= start <= end <= len(doc)): if not (0 <= start <= end <= len(doc)):
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc))) raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
@ -162,7 +162,7 @@ cdef class Span:
RETURNS (int): The number of tokens in the span. RETURNS (int): The number of tokens in the span.
DOCS: https://nightly.spacy.io/api/span#len DOCS: https://spacy.io/api/span#len
""" """
if self.c.end < self.c.start: if self.c.end < self.c.start:
return 0 return 0
@ -178,7 +178,7 @@ cdef class Span:
the span to get. the span to get.
RETURNS (Token or Span): The token at `span[i]`. RETURNS (Token or Span): The token at `span[i]`.
DOCS: https://nightly.spacy.io/api/span#getitem DOCS: https://spacy.io/api/span#getitem
""" """
if isinstance(i, slice): if isinstance(i, slice):
start, end = normalize_slice(len(self), i.start, i.stop, i.step) start, end = normalize_slice(len(self), i.start, i.stop, i.step)
@ -198,7 +198,7 @@ cdef class Span:
YIELDS (Token): A `Token` object. YIELDS (Token): A `Token` object.
DOCS: https://nightly.spacy.io/api/span#iter DOCS: https://spacy.io/api/span#iter
""" """
for i in range(self.c.start, self.c.end): for i in range(self.c.start, self.c.end):
yield self.doc[i] yield self.doc[i]
@ -218,7 +218,7 @@ cdef class Span:
copy_user_data (bool): Whether or not to copy the original doc's user data. copy_user_data (bool): Whether or not to copy the original doc's user data.
RETURNS (Doc): The `Doc` copy of the span. RETURNS (Doc): The `Doc` copy of the span.
DOCS: https://nightly.spacy.io/api/span#as_doc DOCS: https://spacy.io/api/span#as_doc
""" """
words = [t.text for t in self] words = [t.text for t in self]
spaces = [bool(t.whitespace_) for t in self] spaces = [bool(t.whitespace_) for t in self]
@ -291,7 +291,7 @@ cdef class Span:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self). (n, n), where n = len(self).
DOCS: https://nightly.spacy.io/api/span#get_lca_matrix DOCS: https://spacy.io/api/span#get_lca_matrix
""" """
return numpy.asarray(_get_lca_matrix(self.doc, self.c.start, self.c.end)) return numpy.asarray(_get_lca_matrix(self.doc, self.c.start, self.c.end))
@ -303,7 +303,7 @@ cdef class Span:
`Span`, `Token` and `Lexeme` objects. `Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar. RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/span#similarity DOCS: https://spacy.io/api/span#similarity
""" """
if "similarity" in self.doc.user_span_hooks: if "similarity" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["similarity"](self, other) return self.doc.user_span_hooks["similarity"](self, other)
@ -385,7 +385,7 @@ cdef class Span:
RETURNS (tuple): Entities in the span, one `Span` per entity. RETURNS (tuple): Entities in the span, one `Span` per entity.
DOCS: https://nightly.spacy.io/api/span#ents DOCS: https://spacy.io/api/span#ents
""" """
cdef Span ent cdef Span ent
ents = [] ents = []
@ -404,7 +404,7 @@ cdef class Span:
RETURNS (bool): Whether a word vector is associated with the object. RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/span#has_vector DOCS: https://spacy.io/api/span#has_vector
""" """
if "has_vector" in self.doc.user_span_hooks: if "has_vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["has_vector"](self) return self.doc.user_span_hooks["has_vector"](self)
@ -423,7 +423,7 @@ cdef class Span:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the span's semantics. representing the span's semantics.
DOCS: https://nightly.spacy.io/api/span#vector DOCS: https://spacy.io/api/span#vector
""" """
if "vector" in self.doc.user_span_hooks: if "vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self) return self.doc.user_span_hooks["vector"](self)
@ -437,7 +437,7 @@ cdef class Span:
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/span#vector_norm DOCS: https://spacy.io/api/span#vector_norm
""" """
if "vector_norm" in self.doc.user_span_hooks: if "vector_norm" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self) return self.doc.user_span_hooks["vector"](self)
@ -501,7 +501,7 @@ cdef class Span:
YIELDS (Span): Noun chunks in the span. YIELDS (Span): Noun chunks in the span.
DOCS: https://nightly.spacy.io/api/span#noun_chunks DOCS: https://spacy.io/api/span#noun_chunks
""" """
for span in self.doc.noun_chunks: for span in self.doc.noun_chunks:
if span.start >= self.start and span.end <= self.end: if span.start >= self.start and span.end <= self.end:
@ -515,7 +515,7 @@ cdef class Span:
RETURNS (Token): The root token. RETURNS (Token): The root token.
DOCS: https://nightly.spacy.io/api/span#root DOCS: https://spacy.io/api/span#root
""" """
if "root" in self.doc.user_span_hooks: if "root" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["root"](self) return self.doc.user_span_hooks["root"](self)
@ -571,7 +571,7 @@ cdef class Span:
RETURNS (tuple): A tuple of Token objects. RETURNS (tuple): A tuple of Token objects.
DOCS: https://nightly.spacy.io/api/span#lefts DOCS: https://spacy.io/api/span#lefts
""" """
return self.root.conjuncts return self.root.conjuncts
@ -582,7 +582,7 @@ cdef class Span:
YIELDS (Token):A left-child of a token of the span. YIELDS (Token):A left-child of a token of the span.
DOCS: https://nightly.spacy.io/api/span#lefts DOCS: https://spacy.io/api/span#lefts
""" """
for token in reversed(self): # Reverse, so we get tokens in order for token in reversed(self): # Reverse, so we get tokens in order
for left in token.lefts: for left in token.lefts:
@ -596,7 +596,7 @@ cdef class Span:
YIELDS (Token): A right-child of a token of the span. YIELDS (Token): A right-child of a token of the span.
DOCS: https://nightly.spacy.io/api/span#rights DOCS: https://spacy.io/api/span#rights
""" """
for token in self: for token in self:
for right in token.rights: for right in token.rights:
@ -611,7 +611,7 @@ cdef class Span:
RETURNS (int): The number of leftward immediate children of the RETURNS (int): The number of leftward immediate children of the
span, in the syntactic dependency parse. span, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/span#n_lefts DOCS: https://spacy.io/api/span#n_lefts
""" """
return len(list(self.lefts)) return len(list(self.lefts))
@ -623,7 +623,7 @@ cdef class Span:
RETURNS (int): The number of rightward immediate children of the RETURNS (int): The number of rightward immediate children of the
span, in the syntactic dependency parse. span, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/span#n_rights DOCS: https://spacy.io/api/span#n_rights
""" """
return len(list(self.rights)) return len(list(self.rights))
@ -633,7 +633,7 @@ cdef class Span:
YIELDS (Token): A token within the span, or a descendant from it. YIELDS (Token): A token within the span, or a descendant from it.
DOCS: https://nightly.spacy.io/api/span#subtree DOCS: https://spacy.io/api/span#subtree
""" """
for word in self.lefts: for word in self.lefts:
yield from word.subtree yield from word.subtree

View File

@ -27,7 +27,7 @@ cdef class SpanGroup:
>>> doc.spans["errors"] = [doc[0:1], doc[2:4]] >>> doc.spans["errors"] = [doc[0:1], doc[2:4]]
>>> assert isinstance(doc.spans["errors"], SpanGroup) >>> assert isinstance(doc.spans["errors"], SpanGroup)
DOCS: https://nightly.spacy.io/api/spangroup DOCS: https://spacy.io/api/spangroup
""" """
def __init__(self, doc, *, name="", attrs={}, spans=[]): def __init__(self, doc, *, name="", attrs={}, spans=[]):
"""Create a SpanGroup. """Create a SpanGroup.
@ -37,7 +37,7 @@ cdef class SpanGroup:
attrs (Dict[str, Any]): Optional JSON-serializable attributes to attach. attrs (Dict[str, Any]): Optional JSON-serializable attributes to attach.
spans (Iterable[Span]): The spans to add to the group. spans (Iterable[Span]): The spans to add to the group.
DOCS: https://nightly.spacy.io/api/spangroup#init DOCS: https://spacy.io/api/spangroup#init
""" """
# We need to make this a weak reference, so that the Doc object can # We need to make this a weak reference, so that the Doc object can
# own the SpanGroup without circular references. We do want to get # own the SpanGroup without circular references. We do want to get
@ -56,7 +56,7 @@ cdef class SpanGroup:
def doc(self): def doc(self):
"""RETURNS (Doc): The reference document. """RETURNS (Doc): The reference document.
DOCS: https://nightly.spacy.io/api/spangroup#doc DOCS: https://spacy.io/api/spangroup#doc
""" """
return self._doc_ref() return self._doc_ref()
@ -64,7 +64,7 @@ cdef class SpanGroup:
def has_overlap(self): def has_overlap(self):
"""RETURNS (bool): Whether the group contains overlapping spans. """RETURNS (bool): Whether the group contains overlapping spans.
DOCS: https://nightly.spacy.io/api/spangroup#has_overlap DOCS: https://spacy.io/api/spangroup#has_overlap
""" """
if not len(self): if not len(self):
return False return False
@ -79,7 +79,7 @@ cdef class SpanGroup:
def __len__(self): def __len__(self):
"""RETURNS (int): The number of spans in the group. """RETURNS (int): The number of spans in the group.
DOCS: https://nightly.spacy.io/api/spangroup#len DOCS: https://spacy.io/api/spangroup#len
""" """
return self.c.size() return self.c.size()
@ -89,7 +89,7 @@ cdef class SpanGroup:
span (Span): The span to append. span (Span): The span to append.
DOCS: https://nightly.spacy.io/api/spangroup#append DOCS: https://spacy.io/api/spangroup#append
""" """
if span.doc is not self.doc: if span.doc is not self.doc:
raise ValueError("Cannot add span to group: refers to different Doc.") raise ValueError("Cannot add span to group: refers to different Doc.")
@ -101,7 +101,7 @@ cdef class SpanGroup:
spans (Iterable[Span]): The spans to add. spans (Iterable[Span]): The spans to add.
DOCS: https://nightly.spacy.io/api/spangroup#extend DOCS: https://spacy.io/api/spangroup#extend
""" """
cdef Span span cdef Span span
for span in spans: for span in spans:
@ -113,7 +113,7 @@ cdef class SpanGroup:
i (int): The item index. i (int): The item index.
RETURNS (Span): The span at the given index. RETURNS (Span): The span at the given index.
DOCS: https://nightly.spacy.io/api/spangroup#getitem DOCS: https://spacy.io/api/spangroup#getitem
""" """
cdef int size = self.c.size() cdef int size = self.c.size()
if i < -size or i >= size: if i < -size or i >= size:
@ -127,7 +127,7 @@ cdef class SpanGroup:
RETURNS (bytes): The serialized span group. RETURNS (bytes): The serialized span group.
DOCS: https://nightly.spacy.io/api/spangroup#to_bytes DOCS: https://spacy.io/api/spangroup#to_bytes
""" """
output = {"name": self.name, "attrs": self.attrs, "spans": []} output = {"name": self.name, "attrs": self.attrs, "spans": []}
for i in range(self.c.size()): for i in range(self.c.size()):
@ -159,7 +159,7 @@ cdef class SpanGroup:
bytes_data (bytes): The span group to load. bytes_data (bytes): The span group to load.
RETURNS (SpanGroup): The deserialized span group. RETURNS (SpanGroup): The deserialized span group.
DOCS: https://nightly.spacy.io/api/spangroup#from_bytes DOCS: https://spacy.io/api/spangroup#from_bytes
""" """
msg = srsly.msgpack_loads(bytes_data) msg = srsly.msgpack_loads(bytes_data)
self.name = msg["name"] self.name = msg["name"]

View File

@ -27,7 +27,7 @@ cdef class Token:
"""An individual token i.e. a word, punctuation symbol, whitespace, """An individual token i.e. a word, punctuation symbol, whitespace,
etc. etc.
DOCS: https://nightly.spacy.io/api/token DOCS: https://spacy.io/api/token
""" """
@classmethod @classmethod
def set_extension(cls, name, **kwargs): def set_extension(cls, name, **kwargs):
@ -40,8 +40,8 @@ cdef class Token:
method (callable): Optional method for method extension. method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute. force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/token#set_extension DOCS: https://spacy.io/api/token#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
""" """
if cls.has_extension(name) and not kwargs.get("force", False): if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Token")) raise ValueError(Errors.E090.format(name=name, obj="Token"))
@ -54,7 +54,7 @@ cdef class Token:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple. RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/token#get_extension DOCS: https://spacy.io/api/token#get_extension
""" """
return Underscore.token_extensions.get(name) return Underscore.token_extensions.get(name)
@ -65,7 +65,7 @@ cdef class Token:
name (str): Name of the extension. name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered. RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/token#has_extension DOCS: https://spacy.io/api/token#has_extension
""" """
return name in Underscore.token_extensions return name in Underscore.token_extensions
@ -77,7 +77,7 @@ cdef class Token:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension. removed extension.
DOCS: https://nightly.spacy.io/api/token#remove_extension DOCS: https://spacy.io/api/token#remove_extension
""" """
if not cls.has_extension(name): if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name)) raise ValueError(Errors.E046.format(name=name))
@ -90,7 +90,7 @@ cdef class Token:
doc (Doc): The parent document. doc (Doc): The parent document.
offset (int): The index of the token within the document. offset (int): The index of the token within the document.
DOCS: https://nightly.spacy.io/api/token#init DOCS: https://spacy.io/api/token#init
""" """
self.vocab = vocab self.vocab = vocab
self.doc = doc self.doc = doc
@ -105,7 +105,7 @@ cdef class Token:
RETURNS (int): The number of unicode characters in the token. RETURNS (int): The number of unicode characters in the token.
DOCS: https://nightly.spacy.io/api/token#len DOCS: https://spacy.io/api/token#len
""" """
return self.c.lex.length return self.c.lex.length
@ -168,7 +168,7 @@ cdef class Token:
flag_id (int): The ID of the flag attribute. flag_id (int): The ID of the flag attribute.
RETURNS (bool): Whether the flag is set. RETURNS (bool): Whether the flag is set.
DOCS: https://nightly.spacy.io/api/token#check_flag DOCS: https://spacy.io/api/token#check_flag
""" """
return Lexeme.c_check_flag(self.c.lex, flag_id) return Lexeme.c_check_flag(self.c.lex, flag_id)
@ -178,7 +178,7 @@ cdef class Token:
i (int): The relative position of the token to get. Defaults to 1. i (int): The relative position of the token to get. Defaults to 1.
RETURNS (Token): The token at position `self.doc[self.i+i]`. RETURNS (Token): The token at position `self.doc[self.i+i]`.
DOCS: https://nightly.spacy.io/api/token#nbor DOCS: https://spacy.io/api/token#nbor
""" """
if self.i+i < 0 or (self.i+i >= len(self.doc)): if self.i+i < 0 or (self.i+i >= len(self.doc)):
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc))) raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
@ -192,7 +192,7 @@ cdef class Token:
`Span`, `Token` and `Lexeme` objects. `Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar. RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/token#similarity DOCS: https://spacy.io/api/token#similarity
""" """
if "similarity" in self.doc.user_token_hooks: if "similarity" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["similarity"](self, other) return self.doc.user_token_hooks["similarity"](self, other)
@ -388,7 +388,7 @@ cdef class Token:
RETURNS (bool): Whether a word vector is associated with the object. RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/token#has_vector DOCS: https://spacy.io/api/token#has_vector
""" """
if "has_vector" in self.doc.user_token_hooks: if "has_vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["has_vector"](self) return self.doc.user_token_hooks["has_vector"](self)
@ -403,7 +403,7 @@ cdef class Token:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the token's semantics. representing the token's semantics.
DOCS: https://nightly.spacy.io/api/token#vector DOCS: https://spacy.io/api/token#vector
""" """
if "vector" in self.doc.user_token_hooks: if "vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector"](self) return self.doc.user_token_hooks["vector"](self)
@ -418,7 +418,7 @@ cdef class Token:
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/token#vector_norm DOCS: https://spacy.io/api/token#vector_norm
""" """
if "vector_norm" in self.doc.user_token_hooks: if "vector_norm" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector_norm"](self) return self.doc.user_token_hooks["vector_norm"](self)
@ -441,7 +441,7 @@ cdef class Token:
RETURNS (int): The number of leftward immediate children of the RETURNS (int): The number of leftward immediate children of the
word, in the syntactic dependency parse. word, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/token#n_lefts DOCS: https://spacy.io/api/token#n_lefts
""" """
return self.c.l_kids return self.c.l_kids
@ -453,7 +453,7 @@ cdef class Token:
RETURNS (int): The number of rightward immediate children of the RETURNS (int): The number of rightward immediate children of the
word, in the syntactic dependency parse. word, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/token#n_rights DOCS: https://spacy.io/api/token#n_rights
""" """
return self.c.r_kids return self.c.r_kids
@ -485,7 +485,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token starts a sentence. RETURNS (bool / None): Whether the token starts a sentence.
None if unknown. None if unknown.
DOCS: https://nightly.spacy.io/api/token#is_sent_start DOCS: https://spacy.io/api/token#is_sent_start
""" """
def __get__(self): def __get__(self):
if self.c.sent_start == 0: if self.c.sent_start == 0:
@ -514,7 +514,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token ends a sentence. RETURNS (bool / None): Whether the token ends a sentence.
None if unknown. None if unknown.
DOCS: https://nightly.spacy.io/api/token#is_sent_end DOCS: https://spacy.io/api/token#is_sent_end
""" """
def __get__(self): def __get__(self):
if self.i + 1 == len(self.doc): if self.i + 1 == len(self.doc):
@ -536,7 +536,7 @@ cdef class Token:
YIELDS (Token): A left-child of the token. YIELDS (Token): A left-child of the token.
DOCS: https://nightly.spacy.io/api/token#lefts DOCS: https://spacy.io/api/token#lefts
""" """
cdef int nr_iter = 0 cdef int nr_iter = 0
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge) cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
@ -556,7 +556,7 @@ cdef class Token:
YIELDS (Token): A right-child of the token. YIELDS (Token): A right-child of the token.
DOCS: https://nightly.spacy.io/api/token#rights DOCS: https://spacy.io/api/token#rights
""" """
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i) cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
tokens = [] tokens = []
@ -578,7 +578,7 @@ cdef class Token:
YIELDS (Token): A child token such that `child.head==self`. YIELDS (Token): A child token such that `child.head==self`.
DOCS: https://nightly.spacy.io/api/token#children DOCS: https://spacy.io/api/token#children
""" """
yield from self.lefts yield from self.lefts
yield from self.rights yield from self.rights
@ -591,7 +591,7 @@ cdef class Token:
YIELDS (Token): A descendent token such that YIELDS (Token): A descendent token such that
`self.is_ancestor(descendent) or token == self`. `self.is_ancestor(descendent) or token == self`.
DOCS: https://nightly.spacy.io/api/token#subtree DOCS: https://spacy.io/api/token#subtree
""" """
for word in self.lefts: for word in self.lefts:
yield from word.subtree yield from word.subtree
@ -622,7 +622,7 @@ cdef class Token:
YIELDS (Token): A sequence of ancestor tokens such that YIELDS (Token): A sequence of ancestor tokens such that
`ancestor.is_ancestor(self)`. `ancestor.is_ancestor(self)`.
DOCS: https://nightly.spacy.io/api/token#ancestors DOCS: https://spacy.io/api/token#ancestors
""" """
cdef const TokenC* head_ptr = self.c cdef const TokenC* head_ptr = self.c
# Guard against infinite loop, no token can have # Guard against infinite loop, no token can have
@ -640,7 +640,7 @@ cdef class Token:
descendant (Token): Another token. descendant (Token): Another token.
RETURNS (bool): Whether this token is the ancestor of the descendant. RETURNS (bool): Whether this token is the ancestor of the descendant.
DOCS: https://nightly.spacy.io/api/token#is_ancestor DOCS: https://spacy.io/api/token#is_ancestor
""" """
if self.doc is not descendant.doc: if self.doc is not descendant.doc:
return False return False
@ -655,8 +655,8 @@ cdef class Token:
return not Token.missing_head(self.c) return not Token.missing_head(self.c)
property head: property head:
"""The syntactic parent, or "governor", of this token. """The syntactic parent, or "governor", of this token.
If token.has_head() is `False`, this method will return itself. If token.has_head() is `False`, this method will return itself.
RETURNS (Token): The token predicted by the parser to be the head of RETURNS (Token): The token predicted by the parser to be the head of
the current token. the current token.
@ -696,7 +696,7 @@ cdef class Token:
RETURNS (tuple): The coordinated tokens. RETURNS (tuple): The coordinated tokens.
DOCS: https://nightly.spacy.io/api/token#conjuncts DOCS: https://spacy.io/api/token#conjuncts
""" """
cdef Token word, child cdef Token word, child
if "conjuncts" in self.doc.user_token_hooks: if "conjuncts" in self.doc.user_token_hooks:

View File

@ -97,7 +97,7 @@ class Corpus:
augment (Callable[Example, Iterable[Example]]): Optional data augmentation augment (Callable[Example, Iterable[Example]]): Optional data augmentation
function, to extrapolate additional examples from your annotations. function, to extrapolate additional examples from your annotations.
DOCS: https://nightly.spacy.io/api/corpus DOCS: https://spacy.io/api/corpus
""" """
def __init__( def __init__(
@ -121,7 +121,7 @@ class Corpus:
nlp (Language): The current nlp object. nlp (Language): The current nlp object.
YIELDS (Example): The examples. YIELDS (Example): The examples.
DOCS: https://nightly.spacy.io/api/corpus#call DOCS: https://spacy.io/api/corpus#call
""" """
ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE)) ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE))
if self.gold_preproc: if self.gold_preproc:
@ -206,7 +206,7 @@ class JsonlCorpus:
limit (int): Limit corpus to a subset of examples, e.g. for debugging. limit (int): Limit corpus to a subset of examples, e.g. for debugging.
Defaults to 0, which indicates no limit. Defaults to 0, which indicates no limit.
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus DOCS: https://spacy.io/api/corpus#jsonlcorpus
""" """
file_type = "jsonl" file_type = "jsonl"
@ -230,7 +230,7 @@ class JsonlCorpus:
nlp (Language): The current nlp object. nlp (Language): The current nlp object.
YIELDS (Example): The example objects. YIELDS (Example): The example objects.
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus-call DOCS: https://spacy.io/api/corpus#jsonlcorpus-call
""" """
for loc in walk_corpus(self.path, ".jsonl"): for loc in walk_corpus(self.path, ".jsonl"):
records = srsly.read_jsonl(loc) records = srsly.read_jsonl(loc)

View File

@ -44,7 +44,7 @@ cdef class Vectors:
the table need to be assigned - so len(list(vectors.keys())) may be the table need to be assigned - so len(list(vectors.keys())) may be
greater or smaller than vectors.shape[0]. greater or smaller than vectors.shape[0].
DOCS: https://nightly.spacy.io/api/vectors DOCS: https://spacy.io/api/vectors
""" """
cdef public object name cdef public object name
cdef public object data cdef public object data
@ -59,7 +59,7 @@ cdef class Vectors:
keys (iterable): A sequence of keys, aligned with the data. keys (iterable): A sequence of keys, aligned with the data.
name (str): A name to identify the vectors table. name (str): A name to identify the vectors table.
DOCS: https://nightly.spacy.io/api/vectors#init DOCS: https://spacy.io/api/vectors#init
""" """
self.name = name self.name = name
if data is None: if data is None:
@ -83,7 +83,7 @@ cdef class Vectors:
RETURNS (tuple): A `(rows, dims)` pair. RETURNS (tuple): A `(rows, dims)` pair.
DOCS: https://nightly.spacy.io/api/vectors#shape DOCS: https://spacy.io/api/vectors#shape
""" """
return self.data.shape return self.data.shape
@ -93,7 +93,7 @@ cdef class Vectors:
RETURNS (int): The vector size. RETURNS (int): The vector size.
DOCS: https://nightly.spacy.io/api/vectors#size DOCS: https://spacy.io/api/vectors#size
""" """
return self.data.shape[0] * self.data.shape[1] return self.data.shape[0] * self.data.shape[1]
@ -103,7 +103,7 @@ cdef class Vectors:
RETURNS (bool): `True` if no slots are available for new keys. RETURNS (bool): `True` if no slots are available for new keys.
DOCS: https://nightly.spacy.io/api/vectors#is_full DOCS: https://spacy.io/api/vectors#is_full
""" """
return self._unset.size() == 0 return self._unset.size() == 0
@ -114,7 +114,7 @@ cdef class Vectors:
RETURNS (int): The number of keys in the table. RETURNS (int): The number of keys in the table.
DOCS: https://nightly.spacy.io/api/vectors#n_keys DOCS: https://spacy.io/api/vectors#n_keys
""" """
return len(self.key2row) return len(self.key2row)
@ -127,7 +127,7 @@ cdef class Vectors:
key (int): The key to get the vector for. key (int): The key to get the vector for.
RETURNS (ndarray): The vector for the key. RETURNS (ndarray): The vector for the key.
DOCS: https://nightly.spacy.io/api/vectors#getitem DOCS: https://spacy.io/api/vectors#getitem
""" """
i = self.key2row[key] i = self.key2row[key]
if i is None: if i is None:
@ -141,7 +141,7 @@ cdef class Vectors:
key (int): The key to set the vector for. key (int): The key to set the vector for.
vector (ndarray): The vector to set. vector (ndarray): The vector to set.
DOCS: https://nightly.spacy.io/api/vectors#setitem DOCS: https://spacy.io/api/vectors#setitem
""" """
i = self.key2row[key] i = self.key2row[key]
self.data[i] = vector self.data[i] = vector
@ -153,7 +153,7 @@ cdef class Vectors:
YIELDS (int): A key in the table. YIELDS (int): A key in the table.
DOCS: https://nightly.spacy.io/api/vectors#iter DOCS: https://spacy.io/api/vectors#iter
""" """
yield from self.key2row yield from self.key2row
@ -162,7 +162,7 @@ cdef class Vectors:
RETURNS (int): The number of vectors in the data. RETURNS (int): The number of vectors in the data.
DOCS: https://nightly.spacy.io/api/vectors#len DOCS: https://spacy.io/api/vectors#len
""" """
return self.data.shape[0] return self.data.shape[0]
@ -172,7 +172,7 @@ cdef class Vectors:
key (int): The key to check. key (int): The key to check.
RETURNS (bool): Whether the key has a vector entry. RETURNS (bool): Whether the key has a vector entry.
DOCS: https://nightly.spacy.io/api/vectors#contains DOCS: https://spacy.io/api/vectors#contains
""" """
return key in self.key2row return key in self.key2row
@ -189,7 +189,7 @@ cdef class Vectors:
inplace (bool): Reallocate the memory. inplace (bool): Reallocate the memory.
RETURNS (list): The removed items as a list of `(key, row)` tuples. RETURNS (list): The removed items as a list of `(key, row)` tuples.
DOCS: https://nightly.spacy.io/api/vectors#resize DOCS: https://spacy.io/api/vectors#resize
""" """
xp = get_array_module(self.data) xp = get_array_module(self.data)
if inplace: if inplace:
@ -224,7 +224,7 @@ cdef class Vectors:
YIELDS (ndarray): A vector in the table. YIELDS (ndarray): A vector in the table.
DOCS: https://nightly.spacy.io/api/vectors#values DOCS: https://spacy.io/api/vectors#values
""" """
for row, vector in enumerate(range(self.data.shape[0])): for row, vector in enumerate(range(self.data.shape[0])):
if not self._unset.count(row): if not self._unset.count(row):
@ -235,7 +235,7 @@ cdef class Vectors:
YIELDS (tuple): A key/vector pair. YIELDS (tuple): A key/vector pair.
DOCS: https://nightly.spacy.io/api/vectors#items DOCS: https://spacy.io/api/vectors#items
""" """
for key, row in self.key2row.items(): for key, row in self.key2row.items():
yield key, self.data[row] yield key, self.data[row]
@ -281,7 +281,7 @@ cdef class Vectors:
row (int / None): The row number of a vector to map the key to. row (int / None): The row number of a vector to map the key to.
RETURNS (int): The row the vector was added to. RETURNS (int): The row the vector was added to.
DOCS: https://nightly.spacy.io/api/vectors#add DOCS: https://spacy.io/api/vectors#add
""" """
# use int for all keys and rows in key2row for more efficient access # use int for all keys and rows in key2row for more efficient access
# and serialization # and serialization
@ -368,7 +368,7 @@ cdef class Vectors:
path (str / Path): A path to a directory, which will be created if path (str / Path): A path to a directory, which will be created if
it doesn't exists. it doesn't exists.
DOCS: https://nightly.spacy.io/api/vectors#to_disk DOCS: https://spacy.io/api/vectors#to_disk
""" """
xp = get_array_module(self.data) xp = get_array_module(self.data)
if xp is numpy: if xp is numpy:
@ -396,7 +396,7 @@ cdef class Vectors:
path (str / Path): Directory path, string or Path-like object. path (str / Path): Directory path, string or Path-like object.
RETURNS (Vectors): The modified object. RETURNS (Vectors): The modified object.
DOCS: https://nightly.spacy.io/api/vectors#from_disk DOCS: https://spacy.io/api/vectors#from_disk
""" """
def load_key2row(path): def load_key2row(path):
if path.exists(): if path.exists():
@ -432,7 +432,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vectors` object. RETURNS (bytes): The serialized form of the `Vectors` object.
DOCS: https://nightly.spacy.io/api/vectors#to_bytes DOCS: https://spacy.io/api/vectors#to_bytes
""" """
def serialize_weights(): def serialize_weights():
if hasattr(self.data, "to_bytes"): if hasattr(self.data, "to_bytes"):
@ -453,7 +453,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Vectors): The `Vectors` object. RETURNS (Vectors): The `Vectors` object.
DOCS: https://nightly.spacy.io/api/vectors#from_bytes DOCS: https://spacy.io/api/vectors#from_bytes
""" """
def deserialize_weights(b): def deserialize_weights(b):
if hasattr(self.data, "from_bytes"): if hasattr(self.data, "from_bytes"):

View File

@ -47,7 +47,7 @@ cdef class Vocab:
instance also provides access to the `StringStore`, and owns underlying instance also provides access to the `StringStore`, and owns underlying
C-data that is shared between `Doc` objects. C-data that is shared between `Doc` objects.
DOCS: https://nightly.spacy.io/api/vocab DOCS: https://spacy.io/api/vocab
""" """
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None, def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
oov_prob=-20., vectors_name=None, writing_system={}, oov_prob=-20., vectors_name=None, writing_system={},
@ -110,7 +110,7 @@ cdef class Vocab:
available bit will be chosen. available bit will be chosen.
RETURNS (int): The integer ID by which the flag value can be checked. RETURNS (int): The integer ID by which the flag value can be checked.
DOCS: https://nightly.spacy.io/api/vocab#add_flag DOCS: https://spacy.io/api/vocab#add_flag
""" """
if flag_id == -1: if flag_id == -1:
for bit in range(1, 64): for bit in range(1, 64):
@ -202,7 +202,7 @@ cdef class Vocab:
string (unicode): The ID string. string (unicode): The ID string.
RETURNS (bool) Whether the string has an entry in the vocabulary. RETURNS (bool) Whether the string has an entry in the vocabulary.
DOCS: https://nightly.spacy.io/api/vocab#contains DOCS: https://spacy.io/api/vocab#contains
""" """
cdef hash_t int_key cdef hash_t int_key
if isinstance(key, bytes): if isinstance(key, bytes):
@ -219,7 +219,7 @@ cdef class Vocab:
YIELDS (Lexeme): An entry in the vocabulary. YIELDS (Lexeme): An entry in the vocabulary.
DOCS: https://nightly.spacy.io/api/vocab#iter DOCS: https://spacy.io/api/vocab#iter
""" """
cdef attr_t key cdef attr_t key
cdef size_t addr cdef size_t addr
@ -242,7 +242,7 @@ cdef class Vocab:
>>> apple = nlp.vocab.strings["apple"] >>> apple = nlp.vocab.strings["apple"]
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"] >>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
DOCS: https://nightly.spacy.io/api/vocab#getitem DOCS: https://spacy.io/api/vocab#getitem
""" """
cdef attr_t orth cdef attr_t orth
if isinstance(id_or_string, unicode): if isinstance(id_or_string, unicode):
@ -310,7 +310,7 @@ cdef class Vocab:
word was mapped to, and `score` the similarity score between the word was mapped to, and `score` the similarity score between the
two words. two words.
DOCS: https://nightly.spacy.io/api/vocab#prune_vectors DOCS: https://spacy.io/api/vocab#prune_vectors
""" """
xp = get_array_module(self.vectors.data) xp = get_array_module(self.vectors.data)
# Make sure all vectors are in the vocab # Make sure all vectors are in the vocab
@ -353,7 +353,7 @@ cdef class Vocab:
and shape determined by the `vocab.vectors` instance. Usually, a and shape determined by the `vocab.vectors` instance. Usually, a
numpy ndarray of shape (300,) and dtype float32. numpy ndarray of shape (300,) and dtype float32.
DOCS: https://nightly.spacy.io/api/vocab#get_vector DOCS: https://spacy.io/api/vocab#get_vector
""" """
if isinstance(orth, str): if isinstance(orth, str):
orth = self.strings.add(orth) orth = self.strings.add(orth)
@ -400,7 +400,7 @@ cdef class Vocab:
orth (int / unicode): The word. orth (int / unicode): The word.
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set. vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
DOCS: https://nightly.spacy.io/api/vocab#set_vector DOCS: https://spacy.io/api/vocab#set_vector
""" """
if isinstance(orth, str): if isinstance(orth, str):
orth = self.strings.add(orth) orth = self.strings.add(orth)
@ -422,7 +422,7 @@ cdef class Vocab:
orth (int / unicode): The word. orth (int / unicode): The word.
RETURNS (bool): Whether the word has a vector. RETURNS (bool): Whether the word has a vector.
DOCS: https://nightly.spacy.io/api/vocab#has_vector DOCS: https://spacy.io/api/vocab#has_vector
""" """
if isinstance(orth, str): if isinstance(orth, str):
orth = self.strings.add(orth) orth = self.strings.add(orth)
@ -448,7 +448,7 @@ cdef class Vocab:
it doesn't exist. it doesn't exist.
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/vocab#to_disk DOCS: https://spacy.io/api/vocab#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
if not path.exists(): if not path.exists():
@ -469,7 +469,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The modified `Vocab` object. RETURNS (Vocab): The modified `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#to_disk DOCS: https://spacy.io/api/vocab#to_disk
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
getters = ["strings", "vectors"] getters = ["strings", "vectors"]
@ -494,7 +494,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vocab` object. RETURNS (bytes): The serialized form of the `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#to_bytes DOCS: https://spacy.io/api/vocab#to_bytes
""" """
def deserialize_vectors(): def deserialize_vectors():
if self.vectors is None: if self.vectors is None:
@ -516,7 +516,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude. exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The `Vocab` object. RETURNS (Vocab): The `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#from_bytes DOCS: https://spacy.io/api/vocab#from_bytes
""" """
def serialize_vectors(b): def serialize_vectors(b):
if self.vectors is None: if self.vectors is None: