Replace links to nightly docs [ci skip]

This commit is contained in:
Ines Montani 2021-01-30 20:09:38 +11:00
parent b26a3daa9a
commit d0c3775712
57 changed files with 389 additions and 389 deletions

View File

@ -29,7 +29,7 @@ COMMAND = "python -m spacy"
NAME = "spacy"
HELP = """spaCy Command-line Interface
DOCS: https://nightly.spacy.io/api/cli
DOCS: https://spacy.io/api/cli
"""
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
You'd typically start by cloning a project template to a local directory and

View File

@ -64,7 +64,7 @@ def convert_cli(
is written to stdout, so you can pipe them forward to a JSON file:
$ spacy convert some_file.conllu --file-type json > some_file.json
DOCS: https://nightly.spacy.io/api/cli#convert
DOCS: https://spacy.io/api/cli#convert
"""
if isinstance(file_type, FileTypes):
# We get an instance of the FileTypes from the CLI so we need its string value
@ -268,6 +268,6 @@ def _get_converter(msg, converter, input_path):
msg.warn(
"Can't automatically detect NER format. "
"Conversion may not succeed. "
"See https://nightly.spacy.io/api/cli#convert"
"See https://spacy.io/api/cli#convert"
)
return converter

View File

@ -34,7 +34,7 @@ def debug_config_cli(
as command line options. For instance, --training.batch_size 128 overrides
the value of "batch_size" in the block "[training]".
DOCS: https://nightly.spacy.io/api/cli#debug-config
DOCS: https://spacy.io/api/cli#debug-config
"""
overrides = parse_config_overrides(ctx.args)
import_code(code_path)

View File

@ -50,7 +50,7 @@ def debug_data_cli(
useful stats, and can help you find problems like invalid entity annotations,
cyclic dependencies, low data labels and more.
DOCS: https://nightly.spacy.io/api/cli#debug-data
DOCS: https://spacy.io/api/cli#debug-data
"""
if ctx.command.name == "debug-data":
msg.warn(

View File

@ -40,7 +40,7 @@ def debug_model_cli(
Analyze a Thinc model implementation. Includes checks for internal structure
and activations during training.
DOCS: https://nightly.spacy.io/api/cli#debug-model
DOCS: https://spacy.io/api/cli#debug-model
"""
setup_gpu(use_gpu)
layers = string_to_list(layers, intify=True)

View File

@ -28,7 +28,7 @@ def download_cli(
additional arguments provided to this command will be passed to `pip install`
on package installation.
DOCS: https://nightly.spacy.io/api/cli#download
DOCS: https://spacy.io/api/cli#download
AVAILABLE PACKAGES: https://spacy.io/models
"""
download(model, direct, *ctx.args)
@ -80,7 +80,7 @@ def get_compatibility() -> dict:
f"Couldn't fetch compatibility table. Please find a package for your spaCy "
f"installation (v{about.__version__}), and download it manually. "
f"For more details, see the documentation: "
f"https://nightly.spacy.io/usage/models",
f"https://spacy.io/usage/models",
exits=1,
)
comp_table = r.json()

View File

@ -36,7 +36,7 @@ def evaluate_cli(
dependency parses in a HTML file, set as output directory as the
displacy_path argument.
DOCS: https://nightly.spacy.io/api/cli#evaluate
DOCS: https://spacy.io/api/cli#evaluate
"""
import_code(code_path)
evaluate(

View File

@ -23,7 +23,7 @@ def info_cli(
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.
DOCS: https://nightly.spacy.io/api/cli#info
DOCS: https://spacy.io/api/cli#info
"""
exclude = string_to_list(exclude)
info(model, markdown=markdown, silent=silent, exclude=exclude)

View File

@ -41,7 +41,7 @@ def init_config_cli(
optimal settings for your use case. This includes the choice of architecture,
pretrained weights and related hyperparameters.
DOCS: https://nightly.spacy.io/api/cli#init-config
DOCS: https://spacy.io/api/cli#init-config
"""
if isinstance(optimize, Optimizations): # instance of enum from the CLI
optimize = optimize.value
@ -78,9 +78,9 @@ def init_fill_config_cli(
from the default config and will create all objects, check the registered
functions for their default values and update the base config. This command
can be used with a config generated via the training quickstart widget:
https://nightly.spacy.io/usage/training#quickstart
https://spacy.io/usage/training#quickstart
DOCS: https://nightly.spacy.io/api/cli#init-fill-config
DOCS: https://spacy.io/api/cli#init-fill-config
"""
fill_config(output_file, base_path, pretraining=pretraining, diff=diff)

View File

@ -38,7 +38,7 @@ def package_cli(
registered functions like pipeline components), they are copied into the
package and imported in the __init__.py.
DOCS: https://nightly.spacy.io/api/cli#package
DOCS: https://spacy.io/api/cli#package
"""
create_sdist, create_wheel = get_build_formats(string_to_list(build))
code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]

View File

@ -44,7 +44,7 @@ def pretrain_cli(
all settings are the same between pretraining and training. Ideally,
this is done by using the same config file for both commands.
DOCS: https://nightly.spacy.io/api/cli#pretrain
DOCS: https://spacy.io/api/cli#pretrain
"""
config_overrides = parse_config_overrides(ctx.args)
import_code(code_path)

View File

@ -30,7 +30,7 @@ def profile_cli(
It can either be provided as a JSONL file, or be read from sys.sytdin.
If no input file is specified, the IMDB dataset is loaded via Thinc.
DOCS: https://nightly.spacy.io/api/cli#debug-profile
DOCS: https://spacy.io/api/cli#debug-profile
"""
if ctx.parent.command.name == NAME: # called as top-level command
msg.warn(

View File

@ -22,7 +22,7 @@ def project_assets_cli(
provided in the project.yml, the file is only downloaded if no local file
with the same checksum exists.
DOCS: https://nightly.spacy.io/api/cli#project-assets
DOCS: https://spacy.io/api/cli#project-assets
"""
project_assets(project_dir, sparse_checkout=sparse_checkout)

View File

@ -25,7 +25,7 @@ def project_clone_cli(
defaults to the official spaCy template repo, but can be customized
(including using a private repo).
DOCS: https://nightly.spacy.io/api/cli#project-clone
DOCS: https://spacy.io/api/cli#project-clone
"""
if dest is None:
dest = Path.cwd() / Path(name).parts[-1]

View File

@ -5,7 +5,7 @@ from ...util import working_dir
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
DOCS_URL = "https://nightly.spacy.io"
DOCS_URL = "https://spacy.io"
INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which
> is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`:
> `pip install spacy-nightly`. Make sure to use a fresh virtual environment."""
@ -44,7 +44,7 @@ def project_document_cli(
auto-generated section and only the auto-generated docs will be replaced
when you re-run the command.
DOCS: https://nightly.spacy.io/api/cli#project-document
DOCS: https://spacy.io/api/cli#project-document
"""
project_document(project_dir, output_file, no_emoji=no_emoji)

View File

@ -34,7 +34,7 @@ def project_update_dvc_cli(
workflow is used. The DVC config will only be updated if the project.yml
changed.
DOCS: https://nightly.spacy.io/api/cli#project-dvc
DOCS: https://spacy.io/api/cli#project-dvc
"""
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)

View File

@ -19,7 +19,7 @@ def project_pull_cli(
A storage can be anything that the smart-open library can upload to, e.g.
AWS, Google Cloud Storage, SSH, local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-pull
DOCS: https://spacy.io/api/cli#project-pull
"""
for url, output_path in project_pull(project_dir, remote):
if url is not None:

View File

@ -18,7 +18,7 @@ def project_push_cli(
the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-push
DOCS: https://spacy.io/api/cli#project-push
"""
for output_path, url in project_push(project_dir, remote):
if url is None:

View File

@ -28,7 +28,7 @@ def project_run_cli(
commands define dependencies and/or outputs, they will only be re-run if
state has changed.
DOCS: https://nightly.spacy.io/api/cli#project-run
DOCS: https://spacy.io/api/cli#project-run
"""
if show_help or not subcommand:
print_run_help(project_dir, subcommand)

View File

@ -37,7 +37,7 @@ def train_cli(
used to register custom functions and architectures that can then be
referenced in the config.
DOCS: https://nightly.spacy.io/api/cli#train
DOCS: https://spacy.io/api/cli#train
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
# Make sure all files and paths exists if they are needed

View File

@ -17,7 +17,7 @@ def validate_cli():
if the installed packages are compatible and shows upgrade instructions if
available. Should be run after `pip install -U spacy`.
DOCS: https://nightly.spacy.io/api/cli#validate
DOCS: https://spacy.io/api/cli#validate
"""
validate()

View File

@ -1,8 +1,8 @@
"""
spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://nightly.spacy.io/api/top-level#displacy
USAGE: https://nightly.spacy.io/usage/visualizers
DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://spacy.io/usage/visualizers
"""
from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings
@ -37,8 +37,8 @@ def render(
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
RETURNS (str): Rendered HTML markup.
DOCS: https://nightly.spacy.io/api/top-level#displacy.render
USAGE: https://nightly.spacy.io/usage/visualizers
DOCS: https://spacy.io/api/top-level#displacy.render
USAGE: https://spacy.io/usage/visualizers
"""
factories = {
"dep": (DependencyRenderer, parse_deps),
@ -88,8 +88,8 @@ def serve(
port (int): Port to serve visualisation.
host (str): Host to serve visualisation.
DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
USAGE: https://nightly.spacy.io/usage/visualizers
DOCS: https://spacy.io/api/top-level#displacy.serve
USAGE: https://spacy.io/usage/visualizers
"""
from wsgiref import simple_server

View File

@ -20,7 +20,7 @@ class Warnings:
"generate a dependency visualization for it. Make sure the Doc "
"was processed with a model that supports dependency parsing, and "
"not just a language class like `English()`. For more info, see "
"the docs:\nhttps://nightly.spacy.io/usage/models")
"the docs:\nhttps://spacy.io/usage/models")
W006 = ("No entities to visualize found in Doc object. If this is "
"surprising to you, make sure the Doc was processed using a model "
"that supports named entity recognition, and check the `doc.ents` "
@ -86,7 +86,7 @@ class Warnings:
"the config block to replace its token-to-vector listener with a copy "
"and make it independent. For example, `replace_listeners = "
"[\"model.tok2vec\"]` See the documentation for details: "
"https://nightly.spacy.io/usage/training#config-components-listeners")
"https://spacy.io/usage/training#config-components-listeners")
W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
"depends on it via a listener and is frozen. This means that the "
"performance of '{listener}' will be degraded. You can either freeze "
@ -95,12 +95,12 @@ class Warnings:
"the config block to replace its token-to-vector listener with a copy "
"and make it independent. For example, `replace_listeners = "
"[\"model.tok2vec\"]` See the documentation for details: "
"https://nightly.spacy.io/usage/training#config-components-listeners")
"https://spacy.io/usage/training#config-components-listeners")
W088 = ("The pipeline component {name} implements a `begin_training` "
"method, which won't be called by spaCy. As of v3.0, `begin_training` "
"has been renamed to `initialize`, so you likely want to rename the "
"component method. See the documentation for details: "
"https://nightly.spacy.io/api/language#initialize")
"https://spacy.io/api/language#initialize")
W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed "
"to `nlp.initialize`.")
W090 = ("Could not locate any {format} files in path '{path}'.")
@ -180,7 +180,7 @@ class Errors:
E010 = ("Word vectors set to length 0. This may be because you don't have "
"a model installed or loaded, or because your model doesn't "
"include word vectors. For more info, see the docs:\n"
"https://nightly.spacy.io/usage/models")
"https://spacy.io/usage/models")
E011 = ("Unknown operator: '{op}'. Options: {opts}")
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
E016 = ("MultitaskObjective target should be function or one of: dep, "
@ -211,7 +211,7 @@ class Errors:
E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}")
E029 = ("`noun_chunks` requires the dependency parse, which requires a "
"statistical model to be installed and loaded. For more info, see "
"the documentation:\nhttps://nightly.spacy.io/usage/models")
"the documentation:\nhttps://spacy.io/usage/models")
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
"component to the pipeline with: `nlp.add_pipe('sentencizer')`. "
"Alternatively, add the dependency parser or sentence recognizer, "
@ -318,7 +318,7 @@ class Errors:
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
"tokens to merge. If you want to find the longest non-overlapping "
"spans, you can use the util.filter_spans helper:\n"
"https://nightly.spacy.io/api/top-level#util.filter_spans")
"https://spacy.io/api/top-level#util.filter_spans")
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
"token can only be part of one entity, so make sure the entities "
"you're setting don't overlap.")
@ -536,9 +536,9 @@ class Errors:
"solve this, remove the existing directories in the output directory.")
E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
"Try checking whitespace and delimiters. See "
"https://nightly.spacy.io/api/cli#convert")
"https://spacy.io/api/cli#convert")
E903 = ("The token-per-line NER file is not formatted correctly. Try checking "
"whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert")
"whitespace and delimiters. See https://spacy.io/api/cli#convert")
E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
"dimension refers to the output width, after the linear projection "
"has been applied.")

View File

@ -23,7 +23,7 @@ cdef class Candidate:
algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity) pair is assigned to a certain prior probability.
DOCS: https://nightly.spacy.io/api/kb/#candidate_init
DOCS: https://spacy.io/api/kb/#candidate_init
"""
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
@ -81,7 +81,7 @@ cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts.
DOCS: https://nightly.spacy.io/api/kb
DOCS: https://spacy.io/api/kb
"""
def __init__(self, Vocab vocab, entity_vector_length):

View File

@ -104,7 +104,7 @@ class Language:
object and processing pipeline.
lang (str): Two-letter language ID, i.e. ISO code.
DOCS: https://nightly.spacy.io/api/language
DOCS: https://spacy.io/api/language
"""
Defaults = BaseDefaults
@ -141,7 +141,7 @@ class Language:
returns a tokenizer.
batch_size (int): Default batch size for pipe and evaluate.
DOCS: https://nightly.spacy.io/api/language#init
DOCS: https://spacy.io/api/language#init
"""
# We're only calling this to import all factories provided via entry
# points. The factory decorator applied to these functions takes care
@ -194,7 +194,7 @@ class Language:
RETURNS (Dict[str, Any]): The meta.
DOCS: https://nightly.spacy.io/api/language#meta
DOCS: https://spacy.io/api/language#meta
"""
spacy_version = util.get_model_version_range(about.__version__)
if self.vocab.lang:
@ -235,7 +235,7 @@ class Language:
RETURNS (thinc.api.Config): The config.
DOCS: https://nightly.spacy.io/api/language#config
DOCS: https://spacy.io/api/language#config
"""
self._config.setdefault("nlp", {})
self._config.setdefault("training", {})
@ -444,7 +444,7 @@ class Language:
the score won't be shown in the logs or be weighted.
func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://nightly.spacy.io/api/language#factory
DOCS: https://spacy.io/api/language#factory
"""
if not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="factory"))
@ -524,7 +524,7 @@ class Language:
Used for pipeline analysis.
func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://nightly.spacy.io/api/language#component
DOCS: https://spacy.io/api/language#component
"""
if name is not None and not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="component"))
@ -590,7 +590,7 @@ class Language:
name (str): Name of pipeline component to get.
RETURNS (callable): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#get_pipe
DOCS: https://spacy.io/api/language#get_pipe
"""
for pipe_name, component in self._components:
if pipe_name == name:
@ -619,7 +619,7 @@ class Language:
arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#create_pipe
DOCS: https://spacy.io/api/language#create_pipe
"""
name = name if name is not None else factory_name
if not isinstance(config, dict):
@ -740,7 +740,7 @@ class Language:
arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://nightly.spacy.io/api/language#add_pipe
DOCS: https://spacy.io/api/language#add_pipe
"""
if not isinstance(factory_name, str):
bad_val = repr(factory_name)
@ -838,7 +838,7 @@ class Language:
name (str): Name of the component.
RETURNS (bool): Whether a component of the name exists in the pipeline.
DOCS: https://nightly.spacy.io/api/language#has_pipe
DOCS: https://spacy.io/api/language#has_pipe
"""
return name in self.pipe_names
@ -860,7 +860,7 @@ class Language:
arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The new pipeline component.
DOCS: https://nightly.spacy.io/api/language#replace_pipe
DOCS: https://spacy.io/api/language#replace_pipe
"""
if name not in self.pipe_names:
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
@ -891,7 +891,7 @@ class Language:
old_name (str): Name of the component to rename.
new_name (str): New name of the component.
DOCS: https://nightly.spacy.io/api/language#rename_pipe
DOCS: https://spacy.io/api/language#rename_pipe
"""
if old_name not in self.component_names:
raise ValueError(
@ -916,7 +916,7 @@ class Language:
name (str): Name of the component to remove.
RETURNS (tuple): A `(name, component)` tuple of the removed component.
DOCS: https://nightly.spacy.io/api/language#remove_pipe
DOCS: https://spacy.io/api/language#remove_pipe
"""
if name not in self.component_names:
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
@ -972,7 +972,7 @@ class Language:
keyword arguments for specific components.
RETURNS (Doc): A container for accessing the annotations.
DOCS: https://nightly.spacy.io/api/language#call
DOCS: https://spacy.io/api/language#call
"""
doc = self.make_doc(text)
if component_cfg is None:
@ -1023,7 +1023,7 @@ class Language:
disable (str or iterable): The name(s) of the pipes to disable
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
DOCS: https://nightly.spacy.io/api/language#select_pipes
DOCS: https://spacy.io/api/language#select_pipes
"""
if enable is None and disable is None:
raise ValueError(Errors.E991)
@ -1081,7 +1081,7 @@ class Language:
exclude (Iterable[str]): Names of components that shouldn't be updated.
RETURNS (Dict[str, float]): The updated losses dictionary
DOCS: https://nightly.spacy.io/api/language#update
DOCS: https://spacy.io/api/language#update
"""
if _ is not None:
raise ValueError(Errors.E989)
@ -1144,7 +1144,7 @@ class Language:
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
>>> nlp.rehearse(raw_batch)
DOCS: https://nightly.spacy.io/api/language#rehearse
DOCS: https://spacy.io/api/language#rehearse
"""
if len(examples) == 0:
return
@ -1199,7 +1199,7 @@ class Language:
provided, will be created using the .create_optimizer() method.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/language#initialize
DOCS: https://spacy.io/api/language#initialize
"""
if get_examples is None:
util.logger.debug(
@ -1266,7 +1266,7 @@ class Language:
RETURNS (Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/language#resume_training
DOCS: https://spacy.io/api/language#resume_training
"""
ops = get_current_ops()
if self.vocab.vectors.data.shape[1] >= 1:
@ -1293,7 +1293,7 @@ class Language:
Function that deals with a failing batch of documents. This callable function should take in
the component's name, the component itself, the offending batch of documents, and the exception
that was thrown.
DOCS: https://nightly.spacy.io/api/language#set_error_handler
DOCS: https://spacy.io/api/language#set_error_handler
"""
self.default_error_handler = error_handler
for name, pipe in self.pipeline:
@ -1322,7 +1322,7 @@ class Language:
RETURNS (Scorer): The scorer containing the evaluation results.
DOCS: https://nightly.spacy.io/api/language#evaluate
DOCS: https://spacy.io/api/language#evaluate
"""
examples = list(examples)
validate_examples(examples, "Language.evaluate")
@ -1377,7 +1377,7 @@ class Language:
>>> with nlp.use_params(optimizer.averages):
>>> nlp.to_disk("/tmp/checkpoint")
DOCS: https://nightly.spacy.io/api/language#use_params
DOCS: https://spacy.io/api/language#use_params
"""
if not params:
yield
@ -1424,7 +1424,7 @@ class Language:
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
YIELDS (Doc): Documents in the order of the original text.
DOCS: https://nightly.spacy.io/api/language#pipe
DOCS: https://spacy.io/api/language#pipe
"""
if n_process == -1:
n_process = mp.cpu_count()
@ -1568,7 +1568,7 @@ class Language:
the types expected by the factory.
RETURNS (Language): The initialized Language class.
DOCS: https://nightly.spacy.io/api/language#from_config
DOCS: https://spacy.io/api/language#from_config
"""
if auto_fill:
config = Config(
@ -1712,7 +1712,7 @@ class Language:
either be an empty list to not replace any listeners, or a complete
(!) list of the paths to all listener layers used by the model.
DOCS: https://nightly.spacy.io/api/language#replace_listeners
DOCS: https://spacy.io/api/language#replace_listeners
"""
if tok2vec_name not in self.pipe_names:
err = Errors.E889.format(
@ -1782,7 +1782,7 @@ class Language:
it doesn't exist.
exclude (list): Names of components or serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/language#to_disk
DOCS: https://spacy.io/api/language#to_disk
"""
path = util.ensure_path(path)
serializers = {}
@ -1811,7 +1811,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The modified `Language` object.
DOCS: https://nightly.spacy.io/api/language#from_disk
DOCS: https://spacy.io/api/language#from_disk
"""
def deserialize_meta(path: Path) -> None:
@ -1859,7 +1859,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Language` object.
DOCS: https://nightly.spacy.io/api/language#to_bytes
DOCS: https://spacy.io/api/language#to_bytes
"""
serializers = {}
serializers["vocab"] = lambda: self.vocab.to_bytes()
@ -1883,7 +1883,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The `Language` object.
DOCS: https://nightly.spacy.io/api/language#from_bytes
DOCS: https://spacy.io/api/language#from_bytes
"""
def deserialize_meta(b):

View File

@ -30,7 +30,7 @@ cdef class Lexeme:
tag, dependency parse, or lemma (lemmatization depends on the
part-of-speech tag).
DOCS: https://nightly.spacy.io/api/lexeme
DOCS: https://spacy.io/api/lexeme
"""
def __init__(self, Vocab vocab, attr_t orth):
"""Create a Lexeme object.

View File

@ -57,7 +57,7 @@ class Table(OrderedDict):
data (dict): The dictionary.
name (str): Optional table name for reference.
DOCS: https://nightly.spacy.io/api/lookups#table.from_dict
DOCS: https://spacy.io/api/lookups#table.from_dict
"""
self = cls(name=name)
self.update(data)
@ -69,7 +69,7 @@ class Table(OrderedDict):
name (str): Optional table name for reference.
data (dict): Initial data, used to hint Bloom Filter.
DOCS: https://nightly.spacy.io/api/lookups#table.init
DOCS: https://spacy.io/api/lookups#table.init
"""
OrderedDict.__init__(self)
self.name = name
@ -135,7 +135,7 @@ class Table(OrderedDict):
RETURNS (bytes): The serialized table.
DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes
DOCS: https://spacy.io/api/lookups#table.to_bytes
"""
data = {
"name": self.name,
@ -150,7 +150,7 @@ class Table(OrderedDict):
bytes_data (bytes): The data to load.
RETURNS (Table): The loaded table.
DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes
DOCS: https://spacy.io/api/lookups#table.from_bytes
"""
loaded = srsly.msgpack_loads(bytes_data)
data = loaded.get("dict", {})
@ -172,7 +172,7 @@ class Lookups:
def __init__(self) -> None:
"""Initialize the Lookups object.
DOCS: https://nightly.spacy.io/api/lookups#init
DOCS: https://spacy.io/api/lookups#init
"""
self._tables = {}
@ -201,7 +201,7 @@ class Lookups:
data (dict): Optional data to add to the table.
RETURNS (Table): The newly added table.
DOCS: https://nightly.spacy.io/api/lookups#add_table
DOCS: https://spacy.io/api/lookups#add_table
"""
if name in self.tables:
raise ValueError(Errors.E158.format(name=name))
@ -215,7 +215,7 @@ class Lookups:
name (str): Name of the table to set.
table (Table): The Table to set.
DOCS: https://nightly.spacy.io/api/lookups#set_table
DOCS: https://spacy.io/api/lookups#set_table
"""
self._tables[name] = table
@ -227,7 +227,7 @@ class Lookups:
default (Any): Optional default value to return if table doesn't exist.
RETURNS (Table): The table.
DOCS: https://nightly.spacy.io/api/lookups#get_table
DOCS: https://spacy.io/api/lookups#get_table
"""
if name not in self._tables:
if default == UNSET:
@ -241,7 +241,7 @@ class Lookups:
name (str): Name of the table to remove.
RETURNS (Table): The removed table.
DOCS: https://nightly.spacy.io/api/lookups#remove_table
DOCS: https://spacy.io/api/lookups#remove_table
"""
if name not in self._tables:
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
@ -253,7 +253,7 @@ class Lookups:
name (str): Name of the table.
RETURNS (bool): Whether a table of that name exists.
DOCS: https://nightly.spacy.io/api/lookups#has_table
DOCS: https://spacy.io/api/lookups#has_table
"""
return name in self._tables
@ -262,7 +262,7 @@ class Lookups:
RETURNS (bytes): The serialized Lookups.
DOCS: https://nightly.spacy.io/api/lookups#to_bytes
DOCS: https://spacy.io/api/lookups#to_bytes
"""
return srsly.msgpack_dumps(self._tables)
@ -272,7 +272,7 @@ class Lookups:
bytes_data (bytes): The data to load.
RETURNS (Lookups): The loaded Lookups.
DOCS: https://nightly.spacy.io/api/lookups#from_bytes
DOCS: https://spacy.io/api/lookups#from_bytes
"""
self._tables = {}
for key, value in srsly.msgpack_loads(bytes_data).items():
@ -287,7 +287,7 @@ class Lookups:
path (str / Path): The file path.
DOCS: https://nightly.spacy.io/api/lookups#to_disk
DOCS: https://spacy.io/api/lookups#to_disk
"""
path = ensure_path(path)
if not path.exists():
@ -305,7 +305,7 @@ class Lookups:
path (str / Path): The directory path.
RETURNS (Lookups): The loaded lookups.
DOCS: https://nightly.spacy.io/api/lookups#from_disk
DOCS: https://spacy.io/api/lookups#from_disk
"""
path = ensure_path(path)
filepath = path / filename

View File

@ -32,8 +32,8 @@ DEF PADDING = 5
cdef class Matcher:
"""Match sequences of tokens, based on pattern rules.
DOCS: https://nightly.spacy.io/api/matcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching
DOCS: https://spacy.io/api/matcher
USAGE: https://spacy.io/usage/rule-based-matching
"""
def __init__(self, vocab, validate=True):

View File

@ -20,8 +20,8 @@ cdef class PhraseMatcher:
sequences based on lists of token descriptions, the `PhraseMatcher` accepts
match patterns in the form of `Doc` objects.
DOCS: https://nightly.spacy.io/api/phrasematcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher
DOCS: https://spacy.io/api/phrasematcher
USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
Adapted from FlashText: https://github.com/vi3k6i5/flashtext
MIT License (see `LICENSE`)
@ -35,7 +35,7 @@ cdef class PhraseMatcher:
attr (int / str): Token attribute to match on.
validate (bool): Perform additional validation when patterns are added.
DOCS: https://nightly.spacy.io/api/phrasematcher#init
DOCS: https://spacy.io/api/phrasematcher#init
"""
self.vocab = vocab
self._callbacks = {}
@ -64,7 +64,7 @@ cdef class PhraseMatcher:
RETURNS (int): The number of rules.
DOCS: https://nightly.spacy.io/api/phrasematcher#len
DOCS: https://spacy.io/api/phrasematcher#len
"""
return len(self._callbacks)
@ -74,7 +74,7 @@ cdef class PhraseMatcher:
key (str): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID.
DOCS: https://nightly.spacy.io/api/phrasematcher#contains
DOCS: https://spacy.io/api/phrasematcher#contains
"""
return key in self._callbacks
@ -88,7 +88,7 @@ cdef class PhraseMatcher:
key (str): The match ID.
DOCS: https://nightly.spacy.io/api/phrasematcher#remove
DOCS: https://spacy.io/api/phrasematcher#remove
"""
if key not in self._docs:
raise KeyError(key)
@ -167,7 +167,7 @@ cdef class PhraseMatcher:
as variable arguments. Will be ignored if a list of patterns is
provided as the second argument.
DOCS: https://nightly.spacy.io/api/phrasematcher#add
DOCS: https://spacy.io/api/phrasematcher#add
"""
if docs is None or hasattr(docs, "__call__"): # old API
on_match = docs
@ -241,7 +241,7 @@ cdef class PhraseMatcher:
`doc[start:end]`. The `match_id` is an integer. If as_spans is set
to True, a list of Span objects is returned.
DOCS: https://nightly.spacy.io/api/phrasematcher#call
DOCS: https://spacy.io/api/phrasematcher#call
"""
matches = []
if doc is None or len(doc) == 0:

View File

@ -32,7 +32,7 @@ class AttributeRuler(Pipe):
"""Set token-level attributes for tokens matched by Matcher patterns.
Additionally supports importing patterns from tag maps and morph rules.
DOCS: https://nightly.spacy.io/api/attributeruler
DOCS: https://spacy.io/api/attributeruler
"""
def __init__(
@ -48,7 +48,7 @@ class AttributeRuler(Pipe):
RETURNS (AttributeRuler): The AttributeRuler component.
DOCS: https://nightly.spacy.io/api/attributeruler#init
DOCS: https://spacy.io/api/attributeruler#init
"""
self.name = name
self.vocab = vocab
@ -94,7 +94,7 @@ class AttributeRuler(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/attributeruler#call
DOCS: https://spacy.io/api/attributeruler#call
"""
error_handler = self.get_error_handler()
try:
@ -143,7 +143,7 @@ class AttributeRuler(Pipe):
tag_map (dict): The tag map that maps fine-grained tags to
coarse-grained tags and morphological features.
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
"""
for tag, attrs in tag_map.items():
pattern = [{"TAG": tag}]
@ -165,7 +165,7 @@ class AttributeRuler(Pipe):
fine-grained tags to coarse-grained tags, lemmas and morphological
features.
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
"""
for tag in morph_rules:
for word in morph_rules[tag]:
@ -193,7 +193,7 @@ class AttributeRuler(Pipe):
index (int): The index of the token in the matched span to modify. May
be negative to index from the end of the span. Defaults to 0.
DOCS: https://nightly.spacy.io/api/attributeruler#add
DOCS: https://spacy.io/api/attributeruler#add
"""
# We need to make a string here, because otherwise the ID we pass back
# will be interpreted as the hash of a string, rather than an ordinal.
@ -211,7 +211,7 @@ class AttributeRuler(Pipe):
as the arguments to AttributeRuler.add (patterns/attrs/index) to
add as patterns.
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
DOCS: https://spacy.io/api/attributeruler#add_patterns
"""
for p in patterns:
self.add(**p)
@ -236,7 +236,7 @@ class AttributeRuler(Pipe):
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
and "lemma" for the target token attributes.
DOCS: https://nightly.spacy.io/api/tagger#score
DOCS: https://spacy.io/api/tagger#score
"""
def morph_key_getter(token, attr):
@ -273,7 +273,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes
DOCS: https://spacy.io/api/attributeruler#to_bytes
"""
serialize = {}
serialize["vocab"] = self.vocab.to_bytes
@ -289,7 +289,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
returns (AttributeRuler): The loaded object.
DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes
DOCS: https://spacy.io/api/attributeruler#from_bytes
"""
def load_patterns(b):
@ -310,7 +310,7 @@ class AttributeRuler(Pipe):
path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
DOCS: https://spacy.io/api/attributeruler#to_disk
"""
serialize = {
"vocab": lambda p: self.vocab.to_disk(p),
@ -327,7 +327,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (AttributeRuler): The loaded object.
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
DOCS: https://spacy.io/api/attributeruler#from_disk
"""
def load_patterns(p):

View File

@ -202,7 +202,7 @@ def make_beam_parser(
cdef class DependencyParser(Parser):
"""Pipeline component for dependency parsing.
DOCS: https://nightly.spacy.io/api/dependencyparser
DOCS: https://spacy.io/api/dependencyparser
"""
TransitionSystem = ArcEager
@ -243,7 +243,7 @@ cdef class DependencyParser(Parser):
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
and Scorer.score_deps.
DOCS: https://nightly.spacy.io/api/dependencyparser#score
DOCS: https://spacy.io/api/dependencyparser#score
"""
def has_sents(doc):
return doc.has_annotation("SENT_START")

View File

@ -94,7 +94,7 @@ def make_entity_linker(
class EntityLinker(TrainablePipe):
"""Pipeline component for named entity linking.
DOCS: https://nightly.spacy.io/api/entitylinker
DOCS: https://spacy.io/api/entitylinker
"""
NIL = "NIL" # string used to refer to a non-existing link
@ -124,7 +124,7 @@ class EntityLinker(TrainablePipe):
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
produces a list of candidates, given a certain knowledge base and a textual mention.
DOCS: https://nightly.spacy.io/api/entitylinker#init
DOCS: https://spacy.io/api/entitylinker#init
"""
self.vocab = vocab
self.model = model
@ -171,7 +171,7 @@ class EntityLinker(TrainablePipe):
Note that providing this argument, will overwrite all data accumulated in the current KB.
Use this only when loading a KB as-such from file.
DOCS: https://nightly.spacy.io/api/entitylinker#initialize
DOCS: https://spacy.io/api/entitylinker#initialize
"""
validate_get_examples(get_examples, "EntityLinker.initialize")
if kb_loader is not None:
@ -207,7 +207,7 @@ class EntityLinker(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/entitylinker#update
DOCS: https://spacy.io/api/entitylinker#update
"""
self.validate_kb()
if losses is None:
@ -283,7 +283,7 @@ class EntityLinker(TrainablePipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS (List[int]): The models prediction for each document.
DOCS: https://nightly.spacy.io/api/entitylinker#predict
DOCS: https://spacy.io/api/entitylinker#predict
"""
self.validate_kb()
entity_count = 0
@ -380,7 +380,7 @@ class EntityLinker(TrainablePipe):
docs (Iterable[Doc]): The documents to modify.
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations
DOCS: https://spacy.io/api/entitylinker#set_annotations
"""
count_ents = len([ent for doc in docs for ent in doc.ents])
if count_ents != len(kb_ids):
@ -399,7 +399,7 @@ class EntityLinker(TrainablePipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS TODO: https://nightly.spacy.io/api/entity_linker#score
DOCS TODO: https://spacy.io/api/entity_linker#score
"""
validate_examples(examples, "EntityLinker.score")
return Scorer.score_links(examples, negative_labels=[self.NIL])
@ -412,7 +412,7 @@ class EntityLinker(TrainablePipe):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/entitylinker#to_disk
DOCS: https://spacy.io/api/entitylinker#to_disk
"""
serialize = {}
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@ -430,7 +430,7 @@ class EntityLinker(TrainablePipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (EntityLinker): The modified EntityLinker object.
DOCS: https://nightly.spacy.io/api/entitylinker#from_disk
DOCS: https://spacy.io/api/entitylinker#from_disk
"""
def load_model(p):

View File

@ -59,8 +59,8 @@ class EntityRuler(Pipe):
purely rule-based entity recognition system. After initialization, the
component is typically added to the pipeline using `nlp.add_pipe`.
DOCS: https://nightly.spacy.io/api/entityruler
USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler
DOCS: https://spacy.io/api/entityruler
USAGE: https://spacy.io/usage/rule-based-matching#entityruler
"""
def __init__(
@ -94,7 +94,7 @@ class EntityRuler(Pipe):
added by the model, overwrite them by matches if necessary.
ent_id_sep (str): Separator used internally for entity IDs.
DOCS: https://nightly.spacy.io/api/entityruler#init
DOCS: https://spacy.io/api/entityruler#init
"""
self.nlp = nlp
self.name = name
@ -133,7 +133,7 @@ class EntityRuler(Pipe):
doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available.
DOCS: https://nightly.spacy.io/api/entityruler#call
DOCS: https://spacy.io/api/entityruler#call
"""
error_handler = self.get_error_handler()
try:
@ -183,7 +183,7 @@ class EntityRuler(Pipe):
RETURNS (set): The string labels.
DOCS: https://nightly.spacy.io/api/entityruler#labels
DOCS: https://spacy.io/api/entityruler#labels
"""
keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys())
@ -211,7 +211,7 @@ class EntityRuler(Pipe):
nlp (Language): The current nlp object the component is part of.
patterns Optional[Iterable[PatternType]]: The list of patterns.
DOCS: https://nightly.spacy.io/api/entityruler#initialize
DOCS: https://spacy.io/api/entityruler#initialize
"""
self.clear()
if patterns:
@ -223,7 +223,7 @@ class EntityRuler(Pipe):
RETURNS (set): The string entity ids.
DOCS: https://nightly.spacy.io/api/entityruler#ent_ids
DOCS: https://spacy.io/api/entityruler#ent_ids
"""
keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys())
@ -241,7 +241,7 @@ class EntityRuler(Pipe):
RETURNS (list): The original patterns, one dictionary per pattern.
DOCS: https://nightly.spacy.io/api/entityruler#patterns
DOCS: https://spacy.io/api/entityruler#patterns
"""
all_patterns = []
for label, patterns in self.token_patterns.items():
@ -268,7 +268,7 @@ class EntityRuler(Pipe):
patterns (list): The patterns to add.
DOCS: https://nightly.spacy.io/api/entityruler#add_patterns
DOCS: https://spacy.io/api/entityruler#add_patterns
"""
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet
@ -366,7 +366,7 @@ class EntityRuler(Pipe):
patterns_bytes (bytes): The bytestring to load.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://nightly.spacy.io/api/entityruler#from_bytes
DOCS: https://spacy.io/api/entityruler#from_bytes
"""
cfg = srsly.msgpack_loads(patterns_bytes)
self.clear()
@ -388,7 +388,7 @@ class EntityRuler(Pipe):
RETURNS (bytes): The serialized patterns.
DOCS: https://nightly.spacy.io/api/entityruler#to_bytes
DOCS: https://spacy.io/api/entityruler#to_bytes
"""
serial = {
"overwrite": self.overwrite,
@ -407,7 +407,7 @@ class EntityRuler(Pipe):
path (str / Path): The JSONL file to load.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://nightly.spacy.io/api/entityruler#from_disk
DOCS: https://spacy.io/api/entityruler#from_disk
"""
path = ensure_path(path)
self.clear()
@ -443,7 +443,7 @@ class EntityRuler(Pipe):
path (str / Path): The JSONL file to save.
DOCS: https://nightly.spacy.io/api/entityruler#to_disk
DOCS: https://spacy.io/api/entityruler#to_disk
"""
path = ensure_path(path)
cfg = {

View File

@ -18,7 +18,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged noun chunks.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks
DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
"""
if not doc.has_annotation("DEP"):
return doc
@ -40,7 +40,7 @@ def merge_entities(doc: Doc):
doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged entities.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities
DOCS: https://spacy.io/api/pipeline-functions#merge_entities
"""
with doc.retokenize() as retokenizer:
for ent in doc.ents:
@ -57,7 +57,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
label (str): The subtoken dependency label.
RETURNS (Doc): The Doc object with merged subtokens.
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens
DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
"""
# TODO: make stateful component with "label" config
merger = Matcher(doc.vocab)

View File

@ -32,7 +32,7 @@ class Lemmatizer(Pipe):
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
lookup tables.
DOCS: https://nightly.spacy.io/api/lemmatizer
DOCS: https://spacy.io/api/lemmatizer
"""
@classmethod
@ -68,7 +68,7 @@ class Lemmatizer(Pipe):
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
`False`.
DOCS: https://nightly.spacy.io/api/lemmatizer#init
DOCS: https://spacy.io/api/lemmatizer#init
"""
self.vocab = vocab
self.model = model
@ -98,7 +98,7 @@ class Lemmatizer(Pipe):
doc (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/lemmatizer#call
DOCS: https://spacy.io/api/lemmatizer#call
"""
if not self._validated:
self._validate_tables(Errors.E1004)
@ -159,7 +159,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.
DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize
DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
"""
lookup_table = self.lookups.get_table("lemma_lookup", {})
result = lookup_table.get(token.text, token.text)
@ -173,7 +173,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.
DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
"""
cache_key = (token.orth, token.pos, token.morph)
if cache_key in self.cache:
@ -241,7 +241,7 @@ class Lemmatizer(Pipe):
token (Token): The token.
RETURNS (bool): Whether the token is a base form.
DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form
DOCS: https://spacy.io/api/lemmatizer#is_base_form
"""
return False
@ -251,7 +251,7 @@ class Lemmatizer(Pipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS: https://nightly.spacy.io/api/lemmatizer#score
DOCS: https://spacy.io/api/lemmatizer#score
"""
validate_examples(examples, "Lemmatizer.score")
return Scorer.score_token_attr(examples, "lemma", **kwargs)
@ -264,7 +264,7 @@ class Lemmatizer(Pipe):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/lemmatizer#to_disk
DOCS: https://spacy.io/api/lemmatizer#to_disk
"""
serialize = {}
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@ -280,7 +280,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The modified Lemmatizer object.
DOCS: https://nightly.spacy.io/api/lemmatizer#from_disk
DOCS: https://spacy.io/api/lemmatizer#from_disk
"""
deserialize = {}
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
@ -295,7 +295,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/lemmatizer#to_bytes
DOCS: https://spacy.io/api/lemmatizer#to_bytes
"""
serialize = {}
serialize["vocab"] = self.vocab.to_bytes
@ -311,7 +311,7 @@ class Lemmatizer(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Lemmatizer): The loaded Lemmatizer.
DOCS: https://nightly.spacy.io/api/lemmatizer#from_bytes
DOCS: https://spacy.io/api/lemmatizer#from_bytes
"""
deserialize = {}
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)

View File

@ -75,7 +75,7 @@ class Morphologizer(Tagger):
name (str): The component instance name, used to add entries to the
losses during training.
DOCS: https://nightly.spacy.io/api/morphologizer#init
DOCS: https://spacy.io/api/morphologizer#init
"""
self.vocab = vocab
self.model = model
@ -104,7 +104,7 @@ class Morphologizer(Tagger):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/morphologizer#add_label
DOCS: https://spacy.io/api/morphologizer#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -134,7 +134,7 @@ class Morphologizer(Tagger):
returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/morphologizer#initialize
DOCS: https://spacy.io/api/morphologizer#initialize
"""
validate_get_examples(get_examples, "Morphologizer.initialize")
if labels is not None:
@ -185,7 +185,7 @@ class Morphologizer(Tagger):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations
DOCS: https://spacy.io/api/morphologizer#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -208,7 +208,7 @@ class Morphologizer(Tagger):
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
DOCS: https://spacy.io/api/morphologizer#get_loss
"""
validate_examples(examples, "Morphologizer.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -254,7 +254,7 @@ class Morphologizer(Tagger):
Scorer.score_token_attr for the attributes "pos" and "morph" and
Scorer.score_token_attr_per_feat for the attribute "morph".
DOCS: https://nightly.spacy.io/api/morphologizer#score
DOCS: https://spacy.io/api/morphologizer#score
"""
def morph_key_getter(token, attr):
return getattr(token, attr).key

View File

@ -163,7 +163,7 @@ def make_beam_ner(
cdef class EntityRecognizer(Parser):
"""Pipeline component for named entity recognition.
DOCS: https://nightly.spacy.io/api/entityrecognizer
DOCS: https://spacy.io/api/entityrecognizer
"""
TransitionSystem = BiluoPushDown
@ -194,7 +194,7 @@ cdef class EntityRecognizer(Parser):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
DOCS: https://spacy.io/api/entityrecognizer#score
"""
validate_examples(examples, "EntityRecognizer.score")
return get_ner_prf(examples)

View File

@ -16,7 +16,7 @@ cdef class Pipe:
Trainable pipeline components like the EntityRecognizer or TextCategorizer
should inherit from the subclass 'TrainablePipe'.
DOCS: https://nightly.spacy.io/api/pipe
DOCS: https://spacy.io/api/pipe
"""
@classmethod
@ -34,7 +34,7 @@ cdef class Pipe:
docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/pipe#call
DOCS: https://spacy.io/api/pipe#call
"""
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name))
@ -47,7 +47,7 @@ cdef class Pipe:
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://nightly.spacy.io/api/pipe#pipe
DOCS: https://spacy.io/api/pipe#pipe
"""
error_handler = self.get_error_handler()
for doc in stream:
@ -69,7 +69,7 @@ cdef class Pipe:
returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/pipe#initialize
DOCS: https://spacy.io/api/pipe#initialize
"""
pass
@ -79,7 +79,7 @@ cdef class Pipe:
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS: https://nightly.spacy.io/api/pipe#score
DOCS: https://spacy.io/api/pipe#score
"""
return {}
@ -111,7 +111,7 @@ cdef class Pipe:
the component's name, the component itself, the offending batch of documents, and the exception
that was thrown.
DOCS: https://nightly.spacy.io/api/pipe#set_error_handler
DOCS: https://spacy.io/api/pipe#set_error_handler
"""
self.error_handler = error_handler
@ -120,7 +120,7 @@ cdef class Pipe:
RETURNS (Callable): The error handler, or if it's not set a default function that just reraises.
DOCS: https://nightly.spacy.io/api/pipe#get_error_handler
DOCS: https://spacy.io/api/pipe#get_error_handler
"""
if hasattr(self, "error_handler"):
return self.error_handler

View File

@ -26,7 +26,7 @@ def make_sentencizer(
class Sentencizer(Pipe):
"""Segment the Doc into sentences using a rule-based strategy.
DOCS: https://nightly.spacy.io/api/sentencizer
DOCS: https://spacy.io/api/sentencizer
"""
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
@ -48,7 +48,7 @@ class Sentencizer(Pipe):
serialized with the nlp object.
RETURNS (Sentencizer): The sentencizer component.
DOCS: https://nightly.spacy.io/api/sentencizer#init
DOCS: https://spacy.io/api/sentencizer#init
"""
self.name = name
if punct_chars:
@ -62,7 +62,7 @@ class Sentencizer(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/sentencizer#call
DOCS: https://spacy.io/api/sentencizer#call
"""
error_handler = self.get_error_handler()
try:
@ -142,7 +142,7 @@ class Sentencizer(Pipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://nightly.spacy.io/api/sentencizer#score
DOCS: https://spacy.io/api/sentencizer#score
"""
def has_sents(doc):
return doc.has_annotation("SENT_START")
@ -157,7 +157,7 @@ class Sentencizer(Pipe):
RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes
DOCS: https://spacy.io/api/sentencizer#to_bytes
"""
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
@ -167,7 +167,7 @@ class Sentencizer(Pipe):
bytes_data (bytes): The data to load.
returns (Sentencizer): The loaded object.
DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes
DOCS: https://spacy.io/api/sentencizer#from_bytes
"""
cfg = srsly.msgpack_loads(bytes_data)
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
@ -176,7 +176,7 @@ class Sentencizer(Pipe):
def to_disk(self, path, *, exclude=tuple()):
"""Serialize the sentencizer to disk.
DOCS: https://nightly.spacy.io/api/sentencizer#to_disk
DOCS: https://spacy.io/api/sentencizer#to_disk
"""
path = util.ensure_path(path)
path = path.with_suffix(".json")
@ -186,7 +186,7 @@ class Sentencizer(Pipe):
def from_disk(self, path, *, exclude=tuple()):
"""Load the sentencizer from disk.
DOCS: https://nightly.spacy.io/api/sentencizer#from_disk
DOCS: https://spacy.io/api/sentencizer#from_disk
"""
path = util.ensure_path(path)
path = path.with_suffix(".json")

View File

@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
class SentenceRecognizer(Tagger):
"""Pipeline component for sentence segmentation.
DOCS: https://nightly.spacy.io/api/sentencerecognizer
DOCS: https://spacy.io/api/sentencerecognizer
"""
def __init__(self, vocab, model, name="senter"):
"""Initialize a sentence recognizer.
@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
name (str): The component instance name, used to add entries to the
losses during training.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#init
DOCS: https://spacy.io/api/sentencerecognizer#init
"""
self.vocab = vocab
self.model = model
@ -80,7 +80,7 @@ class SentenceRecognizer(Tagger):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations
DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -105,7 +105,7 @@ class SentenceRecognizer(Tagger):
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
DOCS: https://spacy.io/api/sentencerecognizer#get_loss
"""
validate_examples(examples, "SentenceRecognizer.get_loss")
labels = self.labels
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize
DOCS: https://spacy.io/api/sentencerecognizer#initialize
"""
validate_get_examples(get_examples, "SentenceRecognizer.initialize")
doc_sample = []
@ -158,7 +158,7 @@ class SentenceRecognizer(Tagger):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
DOCS: https://spacy.io/api/sentencerecognizer#score
"""
def has_sents(doc):
return doc.has_annotation("SENT_START")

View File

@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
class Tagger(TrainablePipe):
"""Pipeline component for part-of-speech tagging.
DOCS: https://nightly.spacy.io/api/tagger
DOCS: https://spacy.io/api/tagger
"""
def __init__(self, vocab, model, name="tagger"):
"""Initialize a part-of-speech tagger.
@ -68,7 +68,7 @@ class Tagger(TrainablePipe):
name (str): The component instance name, used to add entries to the
losses during training.
DOCS: https://nightly.spacy.io/api/tagger#init
DOCS: https://spacy.io/api/tagger#init
"""
self.vocab = vocab
self.model = model
@ -85,7 +85,7 @@ class Tagger(TrainablePipe):
RETURNS (Tuple[str]): The labels.
DOCS: https://nightly.spacy.io/api/tagger#labels
DOCS: https://spacy.io/api/tagger#labels
"""
return tuple(self.cfg["labels"])
@ -100,7 +100,7 @@ class Tagger(TrainablePipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document.
DOCS: https://nightly.spacy.io/api/tagger#predict
DOCS: https://spacy.io/api/tagger#predict
"""
if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs.
@ -129,7 +129,7 @@ class Tagger(TrainablePipe):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Tagger.predict.
DOCS: https://nightly.spacy.io/api/tagger#set_annotations
DOCS: https://spacy.io/api/tagger#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -155,7 +155,7 @@ class Tagger(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tagger#update
DOCS: https://spacy.io/api/tagger#update
"""
if losses is None:
losses = {}
@ -190,7 +190,7 @@ class Tagger(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tagger#rehearse
DOCS: https://spacy.io/api/tagger#rehearse
"""
if losses is None:
losses = {}
@ -219,7 +219,7 @@ class Tagger(TrainablePipe):
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/tagger#get_loss
DOCS: https://spacy.io/api/tagger#get_loss
"""
validate_examples(examples, "Tagger.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -246,7 +246,7 @@ class Tagger(TrainablePipe):
`init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/tagger#initialize
DOCS: https://spacy.io/api/tagger#initialize
"""
validate_get_examples(get_examples, "Tagger.initialize")
if labels is not None:
@ -278,7 +278,7 @@ class Tagger(TrainablePipe):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/tagger#add_label
DOCS: https://spacy.io/api/tagger#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -296,7 +296,7 @@ class Tagger(TrainablePipe):
RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag".
DOCS: https://nightly.spacy.io/api/tagger#score
DOCS: https://spacy.io/api/tagger#score
"""
validate_examples(examples, "Tagger.score")
return Scorer.score_token_attr(examples, "tag", **kwargs)

View File

@ -104,7 +104,7 @@ def make_textcat(
class TextCategorizer(TrainablePipe):
"""Pipeline component for single-label text classification.
DOCS: https://nightly.spacy.io/api/textcategorizer
DOCS: https://spacy.io/api/textcategorizer
"""
def __init__(
@ -118,7 +118,7 @@ class TextCategorizer(TrainablePipe):
losses during training.
threshold (float): Cutoff to consider a prediction "positive".
DOCS: https://nightly.spacy.io/api/textcategorizer#init
DOCS: https://spacy.io/api/textcategorizer#init
"""
self.vocab = vocab
self.model = model
@ -131,7 +131,7 @@ class TextCategorizer(TrainablePipe):
def labels(self) -> Tuple[str]:
"""RETURNS (Tuple[str]): The labels currently added to the component.
DOCS: https://nightly.spacy.io/api/textcategorizer#labels
DOCS: https://spacy.io/api/textcategorizer#labels
"""
return tuple(self.cfg["labels"])
@ -139,7 +139,7 @@ class TextCategorizer(TrainablePipe):
def label_data(self) -> List[str]:
"""RETURNS (List[str]): Information about the component's labels.
DOCS: https://nightly.spacy.io/api/textcategorizer#label_data
DOCS: https://spacy.io/api/textcategorizer#label_data
"""
return self.labels
@ -149,7 +149,7 @@ class TextCategorizer(TrainablePipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document.
DOCS: https://nightly.spacy.io/api/textcategorizer#predict
DOCS: https://spacy.io/api/textcategorizer#predict
"""
if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs.
@ -167,7 +167,7 @@ class TextCategorizer(TrainablePipe):
docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by TextCategorizer.predict.
DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations
DOCS: https://spacy.io/api/textcategorizer#set_annotations
"""
for i, doc in enumerate(docs):
for j, label in enumerate(self.labels):
@ -191,7 +191,7 @@ class TextCategorizer(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/textcategorizer#update
DOCS: https://spacy.io/api/textcategorizer#update
"""
if losses is None:
losses = {}
@ -230,7 +230,7 @@ class TextCategorizer(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse
DOCS: https://spacy.io/api/textcategorizer#rehearse
"""
if losses is not None:
losses.setdefault(self.name, 0.0)
@ -275,7 +275,7 @@ class TextCategorizer(TrainablePipe):
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
DOCS: https://spacy.io/api/textcategorizer#get_loss
"""
validate_examples(examples, "TextCategorizer.get_loss")
self._validate_categories(examples)
@ -292,7 +292,7 @@ class TextCategorizer(TrainablePipe):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/textcategorizer#add_label
DOCS: https://spacy.io/api/textcategorizer#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -321,7 +321,7 @@ class TextCategorizer(TrainablePipe):
`init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/textcategorizer#initialize
DOCS: https://spacy.io/api/textcategorizer#initialize
"""
validate_get_examples(get_examples, "TextCategorizer.initialize")
self._validate_categories(get_examples())
@ -354,7 +354,7 @@ class TextCategorizer(TrainablePipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
DOCS: https://nightly.spacy.io/api/textcategorizer#score
DOCS: https://spacy.io/api/textcategorizer#score
"""
validate_examples(examples, "TextCategorizer.score")
self._validate_categories(examples)

View File

@ -104,7 +104,7 @@ def make_multilabel_textcat(
class MultiLabel_TextCategorizer(TextCategorizer):
"""Pipeline component for multi-label text classification.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer
DOCS: https://spacy.io/api/multilabel_textcategorizer
"""
def __init__(
@ -123,7 +123,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
losses during training.
threshold (float): Cutoff to consider a prediction "positive".
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#init
DOCS: https://spacy.io/api/multilabel_textcategorizer#init
"""
self.vocab = vocab
self.model = model
@ -149,7 +149,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
`init labels` command. If no labels are provided, the get_examples
callback is used to extract the labels from the data.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#initialize
DOCS: https://spacy.io/api/multilabel_textcategorizer#initialize
"""
validate_get_examples(get_examples, "MultiLabel_TextCategorizer.initialize")
if labels is None:
@ -173,7 +173,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#score
DOCS: https://spacy.io/api/multilabel_textcategorizer#score
"""
validate_examples(examples, "MultiLabel_TextCategorizer.score")
return Scorer.score_cats(

View File

@ -55,7 +55,7 @@ class Tok2Vec(TrainablePipe):
a list of Doc objects as input, and output a list of 2d float arrays.
name (str): The component instance name.
DOCS: https://nightly.spacy.io/api/tok2vec#init
DOCS: https://spacy.io/api/tok2vec#init
"""
self.vocab = vocab
self.model = model
@ -115,7 +115,7 @@ class Tok2Vec(TrainablePipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations for each token in the documents.
DOCS: https://nightly.spacy.io/api/tok2vec#predict
DOCS: https://spacy.io/api/tok2vec#predict
"""
tokvecs = self.model.predict(docs)
batch_id = Tok2VecListener.get_batch_id(docs)
@ -129,7 +129,7 @@ class Tok2Vec(TrainablePipe):
docs (Iterable[Doc]): The documents to modify.
tokvecses: The tensors to set, produced by Tok2Vec.predict.
DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations
DOCS: https://spacy.io/api/tok2vec#set_annotations
"""
for doc, tokvecs in zip(docs, tokvecses):
assert tokvecs.shape[0] == len(doc)
@ -153,7 +153,7 @@ class Tok2Vec(TrainablePipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/tok2vec#update
DOCS: https://spacy.io/api/tok2vec#update
"""
if losses is None:
losses = {}
@ -204,7 +204,7 @@ class Tok2Vec(TrainablePipe):
returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/tok2vec#initialize
DOCS: https://spacy.io/api/tok2vec#initialize
"""
validate_get_examples(get_examples, "Tok2Vec.initialize")
doc_sample = []

View File

@ -20,7 +20,7 @@ cdef class TrainablePipe(Pipe):
from it and it defines the interface that components should follow to
function as trainable components in a spaCy pipeline.
DOCS: https://nightly.spacy.io/api/pipe
DOCS: https://spacy.io/api/pipe
"""
def __init__(self, vocab: Vocab, model: Model, name: str, **cfg):
"""Initialize a pipeline component.
@ -30,7 +30,7 @@ cdef class TrainablePipe(Pipe):
name (str): The component instance name.
**cfg: Additional settings and config parameters.
DOCS: https://nightly.spacy.io/api/pipe#init
DOCS: https://spacy.io/api/pipe#init
"""
self.vocab = vocab
self.model = model
@ -45,7 +45,7 @@ cdef class TrainablePipe(Pipe):
docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://nightly.spacy.io/api/pipe#call
DOCS: https://spacy.io/api/pipe#call
"""
error_handler = self.get_error_handler()
try:
@ -67,7 +67,7 @@ cdef class TrainablePipe(Pipe):
the exception.
YIELDS (Doc): Processed documents in order.
DOCS: https://nightly.spacy.io/api/pipe#pipe
DOCS: https://spacy.io/api/pipe#pipe
"""
error_handler = self.get_error_handler()
for docs in util.minibatch(stream, size=batch_size):
@ -85,7 +85,7 @@ cdef class TrainablePipe(Pipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations of the predictions.
DOCS: https://nightly.spacy.io/api/pipe#predict
DOCS: https://spacy.io/api/pipe#predict
"""
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="predict", name=self.name))
@ -95,7 +95,7 @@ cdef class TrainablePipe(Pipe):
docs (Iterable[Doc]): The documents to modify.
scores: The scores to assign.
DOCS: https://nightly.spacy.io/api/pipe#set_annotations
DOCS: https://spacy.io/api/pipe#set_annotations
"""
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="set_annotations", name=self.name))
@ -114,7 +114,7 @@ cdef class TrainablePipe(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/pipe#update
DOCS: https://spacy.io/api/pipe#update
"""
if losses is None:
losses = {}
@ -151,7 +151,7 @@ cdef class TrainablePipe(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://nightly.spacy.io/api/pipe#rehearse
DOCS: https://spacy.io/api/pipe#rehearse
"""
pass
@ -163,7 +163,7 @@ cdef class TrainablePipe(Pipe):
scores: Scores representing the model's predictions.
RETURNS (Tuple[float, float]): The loss and the gradient.
DOCS: https://nightly.spacy.io/api/pipe#get_loss
DOCS: https://spacy.io/api/pipe#get_loss
"""
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_loss", name=self.name))
@ -172,7 +172,7 @@ cdef class TrainablePipe(Pipe):
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/pipe#create_optimizer
DOCS: https://spacy.io/api/pipe#create_optimizer
"""
return util.create_default_optimizer()
@ -186,7 +186,7 @@ cdef class TrainablePipe(Pipe):
returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
DOCS: https://nightly.spacy.io/api/pipe#initialize
DOCS: https://spacy.io/api/pipe#initialize
"""
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="initialize", name=self.name))
@ -199,7 +199,7 @@ cdef class TrainablePipe(Pipe):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://nightly.spacy.io/api/pipe#add_label
DOCS: https://spacy.io/api/pipe#add_label
"""
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="add_label", name=self.name))
@ -229,7 +229,7 @@ cdef class TrainablePipe(Pipe):
params (dict): The parameter values to use in the model.
DOCS: https://nightly.spacy.io/api/pipe#use_params
DOCS: https://spacy.io/api/pipe#use_params
"""
with self.model.use_params(params):
yield
@ -241,7 +241,7 @@ cdef class TrainablePipe(Pipe):
sgd (thinc.api.Optimizer): The optimizer.
DOCS: https://nightly.spacy.io/api/pipe#finish_update
DOCS: https://spacy.io/api/pipe#finish_update
"""
self.model.finish_update(sgd)
@ -261,7 +261,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://nightly.spacy.io/api/pipe#to_bytes
DOCS: https://spacy.io/api/pipe#to_bytes
"""
self._validate_serialization_attrs()
serialize = {}
@ -277,7 +277,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (TrainablePipe): The loaded object.
DOCS: https://nightly.spacy.io/api/pipe#from_bytes
DOCS: https://spacy.io/api/pipe#from_bytes
"""
self._validate_serialization_attrs()
@ -301,7 +301,7 @@ cdef class TrainablePipe(Pipe):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/pipe#to_disk
DOCS: https://spacy.io/api/pipe#to_disk
"""
self._validate_serialization_attrs()
serialize = {}
@ -318,7 +318,7 @@ cdef class TrainablePipe(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (TrainablePipe): The loaded object.
DOCS: https://nightly.spacy.io/api/pipe#from_disk
DOCS: https://spacy.io/api/pipe#from_disk
"""
self._validate_serialization_attrs()

View File

@ -103,7 +103,7 @@ class Scorer:
) -> None:
"""Initialize the Scorer.
DOCS: https://nightly.spacy.io/api/scorer#init
DOCS: https://spacy.io/api/scorer#init
"""
self.nlp = nlp
self.cfg = cfg
@ -119,7 +119,7 @@ class Scorer:
examples (Iterable[Example]): The predicted annotations + correct annotations.
RETURNS (Dict): A dictionary of scores.
DOCS: https://nightly.spacy.io/api/scorer#score
DOCS: https://spacy.io/api/scorer#score
"""
scores = {}
if hasattr(self.nlp.tokenizer, "score"):
@ -139,7 +139,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the scores
token_acc/p/r/f.
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
DOCS: https://spacy.io/api/scorer#score_tokenization
"""
acc_score = PRFScore()
prf_score = PRFScore()
@ -198,7 +198,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
under the key attr_acc.
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
DOCS: https://spacy.io/api/scorer#score_token_attr
"""
tag_score = PRFScore()
for example in examples:
@ -317,7 +317,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
DOCS: https://nightly.spacy.io/api/scorer#score_spans
DOCS: https://spacy.io/api/scorer#score_spans
"""
score = PRFScore()
score_per_type = dict()
@ -413,7 +413,7 @@ class Scorer:
attr_f_per_type,
attr_auc_per_type
DOCS: https://nightly.spacy.io/api/scorer#score_cats
DOCS: https://spacy.io/api/scorer#score_cats
"""
if threshold is None:
threshold = 0.5 if multi_label else 0.0
@ -519,7 +519,7 @@ class Scorer:
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
RETURNS (Dict[str, Any]): A dictionary containing the scores.
DOCS: https://nightly.spacy.io/api/scorer#score_links
DOCS: https://spacy.io/api/scorer#score_links
"""
f_per_type = {}
for example in examples:
@ -603,7 +603,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the scores:
attr_uas, attr_las, and attr_las_per_type.
DOCS: https://nightly.spacy.io/api/scorer#score_deps
DOCS: https://spacy.io/api/scorer#score_deps
"""
unlabelled = PRFScore()
labelled = PRFScore()

View File

@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
cdef class StringStore:
"""Look up strings by 64-bit hashes.
DOCS: https://nightly.spacy.io/api/stringstore
DOCS: https://spacy.io/api/stringstore
"""
def __init__(self, strings=None, freeze=False):
"""Create the StringStore.

View File

@ -31,7 +31,7 @@ cdef class Tokenizer:
"""Segment text, and create Doc objects with the discovered segment
boundaries.
DOCS: https://nightly.spacy.io/api/tokenizer
DOCS: https://spacy.io/api/tokenizer
"""
def __init__(self, Vocab vocab, rules=None, prefix_search=None,
suffix_search=None, infix_finditer=None, token_match=None,
@ -54,7 +54,7 @@ cdef class Tokenizer:
EXAMPLE:
>>> tokenizer = Tokenizer(nlp.vocab)
DOCS: https://nightly.spacy.io/api/tokenizer#init
DOCS: https://spacy.io/api/tokenizer#init
"""
self.mem = Pool()
self._cache = PreshMap()
@ -147,7 +147,7 @@ cdef class Tokenizer:
string (str): The string to tokenize.
RETURNS (Doc): A container for linguistic annotations.
DOCS: https://nightly.spacy.io/api/tokenizer#call
DOCS: https://spacy.io/api/tokenizer#call
"""
doc = self._tokenize_affixes(string, True)
self._apply_special_cases(doc)
@ -209,7 +209,7 @@ cdef class Tokenizer:
Defaults to 1000.
YIELDS (Doc): A sequence of Doc objects, in order.
DOCS: https://nightly.spacy.io/api/tokenizer#pipe
DOCS: https://spacy.io/api/tokenizer#pipe
"""
for text in texts:
yield self(text)
@ -529,7 +529,7 @@ cdef class Tokenizer:
and `.end()` methods, denoting the placement of internal segment
separators, e.g. hyphens.
DOCS: https://nightly.spacy.io/api/tokenizer#find_infix
DOCS: https://spacy.io/api/tokenizer#find_infix
"""
if self.infix_finditer is None:
return 0
@ -542,7 +542,7 @@ cdef class Tokenizer:
string (str): The string to segment.
RETURNS (int): The length of the prefix if present, otherwise `None`.
DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix
DOCS: https://spacy.io/api/tokenizer#find_prefix
"""
if self.prefix_search is None:
return 0
@ -556,7 +556,7 @@ cdef class Tokenizer:
string (str): The string to segment.
Returns (int): The length of the suffix if present, otherwise `None`.
DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix
DOCS: https://spacy.io/api/tokenizer#find_suffix
"""
if self.suffix_search is None:
return 0
@ -596,7 +596,7 @@ cdef class Tokenizer:
a token and its attributes. The `ORTH` fields of the attributes
must exactly match the string when they are concatenated.
DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case
DOCS: https://spacy.io/api/tokenizer#add_special_case
"""
self._validate_special_case(string, substrings)
substrings = list(substrings)
@ -635,7 +635,7 @@ cdef class Tokenizer:
string (str): The string to tokenize.
RETURNS (list): A list of (pattern_string, token_string) tuples
DOCS: https://nightly.spacy.io/api/tokenizer#explain
DOCS: https://spacy.io/api/tokenizer#explain
"""
prefix_search = self.prefix_search
suffix_search = self.suffix_search
@ -718,7 +718,7 @@ cdef class Tokenizer:
it doesn't exist.
exclude (list): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/tokenizer#to_disk
DOCS: https://spacy.io/api/tokenizer#to_disk
"""
path = util.ensure_path(path)
with path.open("wb") as file_:
@ -732,7 +732,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The modified `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#from_disk
DOCS: https://spacy.io/api/tokenizer#from_disk
"""
path = util.ensure_path(path)
with path.open("rb") as file_:
@ -746,7 +746,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes
DOCS: https://spacy.io/api/tokenizer#to_bytes
"""
serializers = {
"vocab": lambda: self.vocab.to_bytes(),
@ -766,7 +766,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The `Tokenizer` object.
DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes
DOCS: https://spacy.io/api/tokenizer#from_bytes
"""
data = {}
deserializers = {

View File

@ -24,8 +24,8 @@ from ..strings import get_string_id
cdef class Retokenizer:
"""Helper class for doc.retokenize() context manager.
DOCS: https://nightly.spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://spacy.io/usage/linguistic-features#retokenization
"""
cdef Doc doc
cdef list merges
@ -47,7 +47,7 @@ cdef class Retokenizer:
span (Span): The span to merge.
attrs (dict): Attributes to set on the merged token.
DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge
DOCS: https://spacy.io/api/doc#retokenizer.merge
"""
if (span.start, span.end) in self._spans_to_merge:
return
@ -73,7 +73,7 @@ cdef class Retokenizer:
attrs (dict): Attributes to set on all split tokens. Attribute names
mapped to list of per-token attribute values.
DOCS: https://nightly.spacy.io/api/doc#retokenizer.split
DOCS: https://spacy.io/api/doc#retokenizer.split
"""
if ''.join(orths) != token.text:
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))

View File

@ -62,7 +62,7 @@ class DocBin:
store_user_data (bool): Whether to write the `Doc.user_data` to bytes/file.
docs (Iterable[Doc]): Docs to add.
DOCS: https://nightly.spacy.io/api/docbin#init
DOCS: https://spacy.io/api/docbin#init
"""
attrs = sorted([intify_attr(attr) for attr in attrs])
self.version = "0.1"
@ -88,7 +88,7 @@ class DocBin:
doc (Doc): The Doc object to add.
DOCS: https://nightly.spacy.io/api/docbin#add
DOCS: https://spacy.io/api/docbin#add
"""
array = doc.to_array(self.attrs)
if len(array.shape) == 1:
@ -122,7 +122,7 @@ class DocBin:
vocab (Vocab): The shared vocab.
YIELDS (Doc): The Doc objects.
DOCS: https://nightly.spacy.io/api/docbin#get_docs
DOCS: https://spacy.io/api/docbin#get_docs
"""
for string in self.strings:
vocab[string]
@ -153,7 +153,7 @@ class DocBin:
other (DocBin): The DocBin to merge into the current bin.
DOCS: https://nightly.spacy.io/api/docbin#merge
DOCS: https://spacy.io/api/docbin#merge
"""
if self.attrs != other.attrs:
raise ValueError(
@ -180,7 +180,7 @@ class DocBin:
RETURNS (bytes): The serialized DocBin.
DOCS: https://nightly.spacy.io/api/docbin#to_bytes
DOCS: https://spacy.io/api/docbin#to_bytes
"""
for tokens in self.tokens:
assert len(tokens.shape) == 2, tokens.shape # this should never happen
@ -208,7 +208,7 @@ class DocBin:
bytes_data (bytes): The data to load from.
RETURNS (DocBin): The loaded DocBin.
DOCS: https://nightly.spacy.io/api/docbin#from_bytes
DOCS: https://spacy.io/api/docbin#from_bytes
"""
try:
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
@ -240,7 +240,7 @@ class DocBin:
path (str / Path): The file path.
DOCS: https://nightly.spacy.io/api/docbin#to_disk
DOCS: https://spacy.io/api/docbin#to_disk
"""
path = ensure_path(path)
with path.open("wb") as file_:
@ -252,7 +252,7 @@ class DocBin:
path (str / Path): The file path.
RETURNS (DocBin): The loaded DocBin.
DOCS: https://nightly.spacy.io/api/docbin#to_disk
DOCS: https://spacy.io/api/docbin#to_disk
"""
path = ensure_path(path)
with path.open("rb") as file_:

View File

@ -116,7 +116,7 @@ cdef class Doc:
>>> from spacy.tokens import Doc
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
DOCS: https://nightly.spacy.io/api/doc
DOCS: https://spacy.io/api/doc
"""
@classmethod
@ -130,8 +130,8 @@ cdef class Doc:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/doc#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://spacy.io/api/doc#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Doc"))
@ -144,7 +144,7 @@ cdef class Doc:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/doc#get_extension
DOCS: https://spacy.io/api/doc#get_extension
"""
return Underscore.doc_extensions.get(name)
@ -155,7 +155,7 @@ cdef class Doc:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/doc#has_extension
DOCS: https://spacy.io/api/doc#has_extension
"""
return name in Underscore.doc_extensions
@ -167,7 +167,7 @@ cdef class Doc:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://nightly.spacy.io/api/doc#remove_extension
DOCS: https://spacy.io/api/doc#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -219,7 +219,7 @@ cdef class Doc:
length as words, as IOB tags to assign as token.ent_iob and
token.ent_type. Defaults to None.
DOCS: https://nightly.spacy.io/api/doc#init
DOCS: https://spacy.io/api/doc#init
"""
self.vocab = vocab
size = max(20, (len(words) if words is not None else 0))
@ -399,7 +399,7 @@ cdef class Doc:
every token in the doc.
RETURNS (bool): Whether annotation is present.
DOCS: https://nightly.spacy.io/api/doc#has_annotation
DOCS: https://spacy.io/api/doc#has_annotation
"""
# empty docs are always annotated
@ -450,7 +450,7 @@ cdef class Doc:
You can use negative indices and open-ended ranges, which have
their normal Python semantics.
DOCS: https://nightly.spacy.io/api/doc#getitem
DOCS: https://spacy.io/api/doc#getitem
"""
if isinstance(i, slice):
start, stop = util.normalize_slice(len(self), i.start, i.stop, i.step)
@ -467,7 +467,7 @@ cdef class Doc:
than-Python speeds are required, you can instead access the annotations
as a numpy array, or access the underlying C data directly from Cython.
DOCS: https://nightly.spacy.io/api/doc#iter
DOCS: https://spacy.io/api/doc#iter
"""
cdef int i
for i in range(self.length):
@ -478,7 +478,7 @@ cdef class Doc:
RETURNS (int): The number of tokens in the document.
DOCS: https://nightly.spacy.io/api/doc#len
DOCS: https://spacy.io/api/doc#len
"""
return self.length
@ -519,7 +519,7 @@ cdef class Doc:
partially covered by the character span). Defaults to "strict".
RETURNS (Span): The newly constructed object.
DOCS: https://nightly.spacy.io/api/doc#char_span
DOCS: https://spacy.io/api/doc#char_span
"""
if not isinstance(label, int):
label = self.vocab.strings.add(label)
@ -562,7 +562,7 @@ cdef class Doc:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/doc#similarity
DOCS: https://spacy.io/api/doc#similarity
"""
if "similarity" in self.user_hooks:
return self.user_hooks["similarity"](self, other)
@ -595,7 +595,7 @@ cdef class Doc:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/doc#has_vector
DOCS: https://spacy.io/api/doc#has_vector
"""
if "has_vector" in self.user_hooks:
return self.user_hooks["has_vector"](self)
@ -613,7 +613,7 @@ cdef class Doc:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the document's semantics.
DOCS: https://nightly.spacy.io/api/doc#vector
DOCS: https://spacy.io/api/doc#vector
"""
def __get__(self):
if "vector" in self.user_hooks:
@ -641,7 +641,7 @@ cdef class Doc:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/doc#vector_norm
DOCS: https://spacy.io/api/doc#vector_norm
"""
def __get__(self):
if "vector_norm" in self.user_hooks:
@ -681,7 +681,7 @@ cdef class Doc:
RETURNS (tuple): Entities in the document, one `Span` per entity.
DOCS: https://nightly.spacy.io/api/doc#ents
DOCS: https://spacy.io/api/doc#ents
"""
def __get__(self):
cdef int i
@ -827,7 +827,7 @@ cdef class Doc:
YIELDS (Span): Noun chunks in the document.
DOCS: https://nightly.spacy.io/api/doc#noun_chunks
DOCS: https://spacy.io/api/doc#noun_chunks
"""
if self.noun_chunks_iterator is None:
raise NotImplementedError(Errors.E894.format(lang=self.vocab.lang))
@ -850,7 +850,7 @@ cdef class Doc:
YIELDS (Span): Sentences in the document.
DOCS: https://nightly.spacy.io/api/doc#sents
DOCS: https://spacy.io/api/doc#sents
"""
if not self.has_annotation("SENT_START"):
raise ValueError(Errors.E030)
@ -959,7 +959,7 @@ cdef class Doc:
attr_id (int): The attribute ID to key the counts.
RETURNS (dict): A dictionary mapping attributes to integer counts.
DOCS: https://nightly.spacy.io/api/doc#count_by
DOCS: https://spacy.io/api/doc#count_by
"""
cdef int i
cdef attr_t attr
@ -1006,7 +1006,7 @@ cdef class Doc:
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_array
DOCS: https://spacy.io/api/doc#from_array
"""
# Handle scalar/list inputs of strings/ints for py_attr_ids
# See also #3064
@ -1098,7 +1098,7 @@ cdef class Doc:
attrs (list): Optional list of attribute ID ints or attribute name strings.
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
DOCS: https://nightly.spacy.io/api/doc#from_docs
DOCS: https://spacy.io/api/doc#from_docs
"""
if not docs:
return None
@ -1170,7 +1170,7 @@ cdef class Doc:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self).
DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix
DOCS: https://spacy.io/api/doc#get_lca_matrix
"""
return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
@ -1203,7 +1203,7 @@ cdef class Doc:
it doesn't exist. Paths may be either strings or Path-like objects.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/doc#to_disk
DOCS: https://spacy.io/api/doc#to_disk
"""
path = util.ensure_path(path)
with path.open("wb") as file_:
@ -1218,7 +1218,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): The modified `Doc` object.
DOCS: https://nightly.spacy.io/api/doc#from_disk
DOCS: https://spacy.io/api/doc#from_disk
"""
path = util.ensure_path(path)
with path.open("rb") as file_:
@ -1232,7 +1232,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations.
DOCS: https://nightly.spacy.io/api/doc#to_bytes
DOCS: https://spacy.io/api/doc#to_bytes
"""
return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
@ -1243,7 +1243,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_bytes
DOCS: https://spacy.io/api/doc#from_bytes
"""
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
@ -1254,7 +1254,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations.
DOCS: https://nightly.spacy.io/api/doc#to_bytes
DOCS: https://spacy.io/api/doc#to_bytes
"""
array_head = Doc._get_array_attrs()
strings = set()
@ -1302,7 +1302,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself.
DOCS: https://nightly.spacy.io/api/doc#from_dict
DOCS: https://spacy.io/api/doc#from_dict
"""
if self.length != 0:
raise ValueError(Errors.E033.format(length=self.length))
@ -1373,8 +1373,8 @@ cdef class Doc:
retokenization are invalidated, although they may accidentally
continue to work.
DOCS: https://nightly.spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://spacy.io/usage/linguistic-features#retokenization
"""
return Retokenizer(self)

View File

@ -24,7 +24,7 @@ from .underscore import Underscore, get_ext_args
cdef class Span:
"""A slice from a Doc object.
DOCS: https://nightly.spacy.io/api/span
DOCS: https://spacy.io/api/span
"""
@classmethod
def set_extension(cls, name, **kwargs):
@ -37,8 +37,8 @@ cdef class Span:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/span#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://spacy.io/api/span#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Span"))
@ -51,7 +51,7 @@ cdef class Span:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/span#get_extension
DOCS: https://spacy.io/api/span#get_extension
"""
return Underscore.span_extensions.get(name)
@ -62,7 +62,7 @@ cdef class Span:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/span#has_extension
DOCS: https://spacy.io/api/span#has_extension
"""
return name in Underscore.span_extensions
@ -74,7 +74,7 @@ cdef class Span:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://nightly.spacy.io/api/span#remove_extension
DOCS: https://spacy.io/api/span#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -92,7 +92,7 @@ cdef class Span:
vector (ndarray[ndim=1, dtype='float32']): A meaning representation
of the span.
DOCS: https://nightly.spacy.io/api/span#init
DOCS: https://spacy.io/api/span#init
"""
if not (0 <= start <= end <= len(doc)):
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
@ -162,7 +162,7 @@ cdef class Span:
RETURNS (int): The number of tokens in the span.
DOCS: https://nightly.spacy.io/api/span#len
DOCS: https://spacy.io/api/span#len
"""
if self.c.end < self.c.start:
return 0
@ -178,7 +178,7 @@ cdef class Span:
the span to get.
RETURNS (Token or Span): The token at `span[i]`.
DOCS: https://nightly.spacy.io/api/span#getitem
DOCS: https://spacy.io/api/span#getitem
"""
if isinstance(i, slice):
start, end = normalize_slice(len(self), i.start, i.stop, i.step)
@ -198,7 +198,7 @@ cdef class Span:
YIELDS (Token): A `Token` object.
DOCS: https://nightly.spacy.io/api/span#iter
DOCS: https://spacy.io/api/span#iter
"""
for i in range(self.c.start, self.c.end):
yield self.doc[i]
@ -218,7 +218,7 @@ cdef class Span:
copy_user_data (bool): Whether or not to copy the original doc's user data.
RETURNS (Doc): The `Doc` copy of the span.
DOCS: https://nightly.spacy.io/api/span#as_doc
DOCS: https://spacy.io/api/span#as_doc
"""
words = [t.text for t in self]
spaces = [bool(t.whitespace_) for t in self]
@ -291,7 +291,7 @@ cdef class Span:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self).
DOCS: https://nightly.spacy.io/api/span#get_lca_matrix
DOCS: https://spacy.io/api/span#get_lca_matrix
"""
return numpy.asarray(_get_lca_matrix(self.doc, self.c.start, self.c.end))
@ -303,7 +303,7 @@ cdef class Span:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/span#similarity
DOCS: https://spacy.io/api/span#similarity
"""
if "similarity" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["similarity"](self, other)
@ -385,7 +385,7 @@ cdef class Span:
RETURNS (tuple): Entities in the span, one `Span` per entity.
DOCS: https://nightly.spacy.io/api/span#ents
DOCS: https://spacy.io/api/span#ents
"""
cdef Span ent
ents = []
@ -404,7 +404,7 @@ cdef class Span:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/span#has_vector
DOCS: https://spacy.io/api/span#has_vector
"""
if "has_vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["has_vector"](self)
@ -423,7 +423,7 @@ cdef class Span:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the span's semantics.
DOCS: https://nightly.spacy.io/api/span#vector
DOCS: https://spacy.io/api/span#vector
"""
if "vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self)
@ -437,7 +437,7 @@ cdef class Span:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/span#vector_norm
DOCS: https://spacy.io/api/span#vector_norm
"""
if "vector_norm" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self)
@ -501,7 +501,7 @@ cdef class Span:
YIELDS (Span): Noun chunks in the span.
DOCS: https://nightly.spacy.io/api/span#noun_chunks
DOCS: https://spacy.io/api/span#noun_chunks
"""
for span in self.doc.noun_chunks:
if span.start >= self.start and span.end <= self.end:
@ -515,7 +515,7 @@ cdef class Span:
RETURNS (Token): The root token.
DOCS: https://nightly.spacy.io/api/span#root
DOCS: https://spacy.io/api/span#root
"""
if "root" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["root"](self)
@ -571,7 +571,7 @@ cdef class Span:
RETURNS (tuple): A tuple of Token objects.
DOCS: https://nightly.spacy.io/api/span#lefts
DOCS: https://spacy.io/api/span#lefts
"""
return self.root.conjuncts
@ -582,7 +582,7 @@ cdef class Span:
YIELDS (Token):A left-child of a token of the span.
DOCS: https://nightly.spacy.io/api/span#lefts
DOCS: https://spacy.io/api/span#lefts
"""
for token in reversed(self): # Reverse, so we get tokens in order
for left in token.lefts:
@ -596,7 +596,7 @@ cdef class Span:
YIELDS (Token): A right-child of a token of the span.
DOCS: https://nightly.spacy.io/api/span#rights
DOCS: https://spacy.io/api/span#rights
"""
for token in self:
for right in token.rights:
@ -611,7 +611,7 @@ cdef class Span:
RETURNS (int): The number of leftward immediate children of the
span, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/span#n_lefts
DOCS: https://spacy.io/api/span#n_lefts
"""
return len(list(self.lefts))
@ -623,7 +623,7 @@ cdef class Span:
RETURNS (int): The number of rightward immediate children of the
span, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/span#n_rights
DOCS: https://spacy.io/api/span#n_rights
"""
return len(list(self.rights))
@ -633,7 +633,7 @@ cdef class Span:
YIELDS (Token): A token within the span, or a descendant from it.
DOCS: https://nightly.spacy.io/api/span#subtree
DOCS: https://spacy.io/api/span#subtree
"""
for word in self.lefts:
yield from word.subtree

View File

@ -27,7 +27,7 @@ cdef class SpanGroup:
>>> doc.spans["errors"] = [doc[0:1], doc[2:4]]
>>> assert isinstance(doc.spans["errors"], SpanGroup)
DOCS: https://nightly.spacy.io/api/spangroup
DOCS: https://spacy.io/api/spangroup
"""
def __init__(self, doc, *, name="", attrs={}, spans=[]):
"""Create a SpanGroup.
@ -37,7 +37,7 @@ cdef class SpanGroup:
attrs (Dict[str, Any]): Optional JSON-serializable attributes to attach.
spans (Iterable[Span]): The spans to add to the group.
DOCS: https://nightly.spacy.io/api/spangroup#init
DOCS: https://spacy.io/api/spangroup#init
"""
# We need to make this a weak reference, so that the Doc object can
# own the SpanGroup without circular references. We do want to get
@ -56,7 +56,7 @@ cdef class SpanGroup:
def doc(self):
"""RETURNS (Doc): The reference document.
DOCS: https://nightly.spacy.io/api/spangroup#doc
DOCS: https://spacy.io/api/spangroup#doc
"""
return self._doc_ref()
@ -64,7 +64,7 @@ cdef class SpanGroup:
def has_overlap(self):
"""RETURNS (bool): Whether the group contains overlapping spans.
DOCS: https://nightly.spacy.io/api/spangroup#has_overlap
DOCS: https://spacy.io/api/spangroup#has_overlap
"""
if not len(self):
return False
@ -79,7 +79,7 @@ cdef class SpanGroup:
def __len__(self):
"""RETURNS (int): The number of spans in the group.
DOCS: https://nightly.spacy.io/api/spangroup#len
DOCS: https://spacy.io/api/spangroup#len
"""
return self.c.size()
@ -89,7 +89,7 @@ cdef class SpanGroup:
span (Span): The span to append.
DOCS: https://nightly.spacy.io/api/spangroup#append
DOCS: https://spacy.io/api/spangroup#append
"""
if span.doc is not self.doc:
raise ValueError("Cannot add span to group: refers to different Doc.")
@ -101,7 +101,7 @@ cdef class SpanGroup:
spans (Iterable[Span]): The spans to add.
DOCS: https://nightly.spacy.io/api/spangroup#extend
DOCS: https://spacy.io/api/spangroup#extend
"""
cdef Span span
for span in spans:
@ -113,7 +113,7 @@ cdef class SpanGroup:
i (int): The item index.
RETURNS (Span): The span at the given index.
DOCS: https://nightly.spacy.io/api/spangroup#getitem
DOCS: https://spacy.io/api/spangroup#getitem
"""
cdef int size = self.c.size()
if i < -size or i >= size:
@ -127,7 +127,7 @@ cdef class SpanGroup:
RETURNS (bytes): The serialized span group.
DOCS: https://nightly.spacy.io/api/spangroup#to_bytes
DOCS: https://spacy.io/api/spangroup#to_bytes
"""
output = {"name": self.name, "attrs": self.attrs, "spans": []}
for i in range(self.c.size()):
@ -159,7 +159,7 @@ cdef class SpanGroup:
bytes_data (bytes): The span group to load.
RETURNS (SpanGroup): The deserialized span group.
DOCS: https://nightly.spacy.io/api/spangroup#from_bytes
DOCS: https://spacy.io/api/spangroup#from_bytes
"""
msg = srsly.msgpack_loads(bytes_data)
self.name = msg["name"]

View File

@ -27,7 +27,7 @@ cdef class Token:
"""An individual token i.e. a word, punctuation symbol, whitespace,
etc.
DOCS: https://nightly.spacy.io/api/token
DOCS: https://spacy.io/api/token
"""
@classmethod
def set_extension(cls, name, **kwargs):
@ -40,8 +40,8 @@ cdef class Token:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://nightly.spacy.io/api/token#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://spacy.io/api/token#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Token"))
@ -54,7 +54,7 @@ cdef class Token:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://nightly.spacy.io/api/token#get_extension
DOCS: https://spacy.io/api/token#get_extension
"""
return Underscore.token_extensions.get(name)
@ -65,7 +65,7 @@ cdef class Token:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://nightly.spacy.io/api/token#has_extension
DOCS: https://spacy.io/api/token#has_extension
"""
return name in Underscore.token_extensions
@ -77,7 +77,7 @@ cdef class Token:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://nightly.spacy.io/api/token#remove_extension
DOCS: https://spacy.io/api/token#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -90,7 +90,7 @@ cdef class Token:
doc (Doc): The parent document.
offset (int): The index of the token within the document.
DOCS: https://nightly.spacy.io/api/token#init
DOCS: https://spacy.io/api/token#init
"""
self.vocab = vocab
self.doc = doc
@ -105,7 +105,7 @@ cdef class Token:
RETURNS (int): The number of unicode characters in the token.
DOCS: https://nightly.spacy.io/api/token#len
DOCS: https://spacy.io/api/token#len
"""
return self.c.lex.length
@ -168,7 +168,7 @@ cdef class Token:
flag_id (int): The ID of the flag attribute.
RETURNS (bool): Whether the flag is set.
DOCS: https://nightly.spacy.io/api/token#check_flag
DOCS: https://spacy.io/api/token#check_flag
"""
return Lexeme.c_check_flag(self.c.lex, flag_id)
@ -178,7 +178,7 @@ cdef class Token:
i (int): The relative position of the token to get. Defaults to 1.
RETURNS (Token): The token at position `self.doc[self.i+i]`.
DOCS: https://nightly.spacy.io/api/token#nbor
DOCS: https://spacy.io/api/token#nbor
"""
if self.i+i < 0 or (self.i+i >= len(self.doc)):
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
@ -192,7 +192,7 @@ cdef class Token:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://nightly.spacy.io/api/token#similarity
DOCS: https://spacy.io/api/token#similarity
"""
if "similarity" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["similarity"](self, other)
@ -388,7 +388,7 @@ cdef class Token:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://nightly.spacy.io/api/token#has_vector
DOCS: https://spacy.io/api/token#has_vector
"""
if "has_vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["has_vector"](self)
@ -403,7 +403,7 @@ cdef class Token:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the token's semantics.
DOCS: https://nightly.spacy.io/api/token#vector
DOCS: https://spacy.io/api/token#vector
"""
if "vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector"](self)
@ -418,7 +418,7 @@ cdef class Token:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://nightly.spacy.io/api/token#vector_norm
DOCS: https://spacy.io/api/token#vector_norm
"""
if "vector_norm" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector_norm"](self)
@ -441,7 +441,7 @@ cdef class Token:
RETURNS (int): The number of leftward immediate children of the
word, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/token#n_lefts
DOCS: https://spacy.io/api/token#n_lefts
"""
return self.c.l_kids
@ -453,7 +453,7 @@ cdef class Token:
RETURNS (int): The number of rightward immediate children of the
word, in the syntactic dependency parse.
DOCS: https://nightly.spacy.io/api/token#n_rights
DOCS: https://spacy.io/api/token#n_rights
"""
return self.c.r_kids
@ -485,7 +485,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token starts a sentence.
None if unknown.
DOCS: https://nightly.spacy.io/api/token#is_sent_start
DOCS: https://spacy.io/api/token#is_sent_start
"""
def __get__(self):
if self.c.sent_start == 0:
@ -514,7 +514,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token ends a sentence.
None if unknown.
DOCS: https://nightly.spacy.io/api/token#is_sent_end
DOCS: https://spacy.io/api/token#is_sent_end
"""
def __get__(self):
if self.i + 1 == len(self.doc):
@ -536,7 +536,7 @@ cdef class Token:
YIELDS (Token): A left-child of the token.
DOCS: https://nightly.spacy.io/api/token#lefts
DOCS: https://spacy.io/api/token#lefts
"""
cdef int nr_iter = 0
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
@ -556,7 +556,7 @@ cdef class Token:
YIELDS (Token): A right-child of the token.
DOCS: https://nightly.spacy.io/api/token#rights
DOCS: https://spacy.io/api/token#rights
"""
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
tokens = []
@ -578,7 +578,7 @@ cdef class Token:
YIELDS (Token): A child token such that `child.head==self`.
DOCS: https://nightly.spacy.io/api/token#children
DOCS: https://spacy.io/api/token#children
"""
yield from self.lefts
yield from self.rights
@ -591,7 +591,7 @@ cdef class Token:
YIELDS (Token): A descendent token such that
`self.is_ancestor(descendent) or token == self`.
DOCS: https://nightly.spacy.io/api/token#subtree
DOCS: https://spacy.io/api/token#subtree
"""
for word in self.lefts:
yield from word.subtree
@ -622,7 +622,7 @@ cdef class Token:
YIELDS (Token): A sequence of ancestor tokens such that
`ancestor.is_ancestor(self)`.
DOCS: https://nightly.spacy.io/api/token#ancestors
DOCS: https://spacy.io/api/token#ancestors
"""
cdef const TokenC* head_ptr = self.c
# Guard against infinite loop, no token can have
@ -640,7 +640,7 @@ cdef class Token:
descendant (Token): Another token.
RETURNS (bool): Whether this token is the ancestor of the descendant.
DOCS: https://nightly.spacy.io/api/token#is_ancestor
DOCS: https://spacy.io/api/token#is_ancestor
"""
if self.doc is not descendant.doc:
return False
@ -696,7 +696,7 @@ cdef class Token:
RETURNS (tuple): The coordinated tokens.
DOCS: https://nightly.spacy.io/api/token#conjuncts
DOCS: https://spacy.io/api/token#conjuncts
"""
cdef Token word, child
if "conjuncts" in self.doc.user_token_hooks:

View File

@ -97,7 +97,7 @@ class Corpus:
augment (Callable[Example, Iterable[Example]]): Optional data augmentation
function, to extrapolate additional examples from your annotations.
DOCS: https://nightly.spacy.io/api/corpus
DOCS: https://spacy.io/api/corpus
"""
def __init__(
@ -121,7 +121,7 @@ class Corpus:
nlp (Language): The current nlp object.
YIELDS (Example): The examples.
DOCS: https://nightly.spacy.io/api/corpus#call
DOCS: https://spacy.io/api/corpus#call
"""
ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE))
if self.gold_preproc:
@ -206,7 +206,7 @@ class JsonlCorpus:
limit (int): Limit corpus to a subset of examples, e.g. for debugging.
Defaults to 0, which indicates no limit.
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus
DOCS: https://spacy.io/api/corpus#jsonlcorpus
"""
file_type = "jsonl"
@ -230,7 +230,7 @@ class JsonlCorpus:
nlp (Language): The current nlp object.
YIELDS (Example): The example objects.
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus-call
DOCS: https://spacy.io/api/corpus#jsonlcorpus-call
"""
for loc in walk_corpus(self.path, ".jsonl"):
records = srsly.read_jsonl(loc)

View File

@ -44,7 +44,7 @@ cdef class Vectors:
the table need to be assigned - so len(list(vectors.keys())) may be
greater or smaller than vectors.shape[0].
DOCS: https://nightly.spacy.io/api/vectors
DOCS: https://spacy.io/api/vectors
"""
cdef public object name
cdef public object data
@ -59,7 +59,7 @@ cdef class Vectors:
keys (iterable): A sequence of keys, aligned with the data.
name (str): A name to identify the vectors table.
DOCS: https://nightly.spacy.io/api/vectors#init
DOCS: https://spacy.io/api/vectors#init
"""
self.name = name
if data is None:
@ -83,7 +83,7 @@ cdef class Vectors:
RETURNS (tuple): A `(rows, dims)` pair.
DOCS: https://nightly.spacy.io/api/vectors#shape
DOCS: https://spacy.io/api/vectors#shape
"""
return self.data.shape
@ -93,7 +93,7 @@ cdef class Vectors:
RETURNS (int): The vector size.
DOCS: https://nightly.spacy.io/api/vectors#size
DOCS: https://spacy.io/api/vectors#size
"""
return self.data.shape[0] * self.data.shape[1]
@ -103,7 +103,7 @@ cdef class Vectors:
RETURNS (bool): `True` if no slots are available for new keys.
DOCS: https://nightly.spacy.io/api/vectors#is_full
DOCS: https://spacy.io/api/vectors#is_full
"""
return self._unset.size() == 0
@ -114,7 +114,7 @@ cdef class Vectors:
RETURNS (int): The number of keys in the table.
DOCS: https://nightly.spacy.io/api/vectors#n_keys
DOCS: https://spacy.io/api/vectors#n_keys
"""
return len(self.key2row)
@ -127,7 +127,7 @@ cdef class Vectors:
key (int): The key to get the vector for.
RETURNS (ndarray): The vector for the key.
DOCS: https://nightly.spacy.io/api/vectors#getitem
DOCS: https://spacy.io/api/vectors#getitem
"""
i = self.key2row[key]
if i is None:
@ -141,7 +141,7 @@ cdef class Vectors:
key (int): The key to set the vector for.
vector (ndarray): The vector to set.
DOCS: https://nightly.spacy.io/api/vectors#setitem
DOCS: https://spacy.io/api/vectors#setitem
"""
i = self.key2row[key]
self.data[i] = vector
@ -153,7 +153,7 @@ cdef class Vectors:
YIELDS (int): A key in the table.
DOCS: https://nightly.spacy.io/api/vectors#iter
DOCS: https://spacy.io/api/vectors#iter
"""
yield from self.key2row
@ -162,7 +162,7 @@ cdef class Vectors:
RETURNS (int): The number of vectors in the data.
DOCS: https://nightly.spacy.io/api/vectors#len
DOCS: https://spacy.io/api/vectors#len
"""
return self.data.shape[0]
@ -172,7 +172,7 @@ cdef class Vectors:
key (int): The key to check.
RETURNS (bool): Whether the key has a vector entry.
DOCS: https://nightly.spacy.io/api/vectors#contains
DOCS: https://spacy.io/api/vectors#contains
"""
return key in self.key2row
@ -189,7 +189,7 @@ cdef class Vectors:
inplace (bool): Reallocate the memory.
RETURNS (list): The removed items as a list of `(key, row)` tuples.
DOCS: https://nightly.spacy.io/api/vectors#resize
DOCS: https://spacy.io/api/vectors#resize
"""
xp = get_array_module(self.data)
if inplace:
@ -224,7 +224,7 @@ cdef class Vectors:
YIELDS (ndarray): A vector in the table.
DOCS: https://nightly.spacy.io/api/vectors#values
DOCS: https://spacy.io/api/vectors#values
"""
for row, vector in enumerate(range(self.data.shape[0])):
if not self._unset.count(row):
@ -235,7 +235,7 @@ cdef class Vectors:
YIELDS (tuple): A key/vector pair.
DOCS: https://nightly.spacy.io/api/vectors#items
DOCS: https://spacy.io/api/vectors#items
"""
for key, row in self.key2row.items():
yield key, self.data[row]
@ -281,7 +281,7 @@ cdef class Vectors:
row (int / None): The row number of a vector to map the key to.
RETURNS (int): The row the vector was added to.
DOCS: https://nightly.spacy.io/api/vectors#add
DOCS: https://spacy.io/api/vectors#add
"""
# use int for all keys and rows in key2row for more efficient access
# and serialization
@ -368,7 +368,7 @@ cdef class Vectors:
path (str / Path): A path to a directory, which will be created if
it doesn't exists.
DOCS: https://nightly.spacy.io/api/vectors#to_disk
DOCS: https://spacy.io/api/vectors#to_disk
"""
xp = get_array_module(self.data)
if xp is numpy:
@ -396,7 +396,7 @@ cdef class Vectors:
path (str / Path): Directory path, string or Path-like object.
RETURNS (Vectors): The modified object.
DOCS: https://nightly.spacy.io/api/vectors#from_disk
DOCS: https://spacy.io/api/vectors#from_disk
"""
def load_key2row(path):
if path.exists():
@ -432,7 +432,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vectors` object.
DOCS: https://nightly.spacy.io/api/vectors#to_bytes
DOCS: https://spacy.io/api/vectors#to_bytes
"""
def serialize_weights():
if hasattr(self.data, "to_bytes"):
@ -453,7 +453,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vectors): The `Vectors` object.
DOCS: https://nightly.spacy.io/api/vectors#from_bytes
DOCS: https://spacy.io/api/vectors#from_bytes
"""
def deserialize_weights(b):
if hasattr(self.data, "from_bytes"):

View File

@ -47,7 +47,7 @@ cdef class Vocab:
instance also provides access to the `StringStore`, and owns underlying
C-data that is shared between `Doc` objects.
DOCS: https://nightly.spacy.io/api/vocab
DOCS: https://spacy.io/api/vocab
"""
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
oov_prob=-20., vectors_name=None, writing_system={},
@ -110,7 +110,7 @@ cdef class Vocab:
available bit will be chosen.
RETURNS (int): The integer ID by which the flag value can be checked.
DOCS: https://nightly.spacy.io/api/vocab#add_flag
DOCS: https://spacy.io/api/vocab#add_flag
"""
if flag_id == -1:
for bit in range(1, 64):
@ -202,7 +202,7 @@ cdef class Vocab:
string (unicode): The ID string.
RETURNS (bool) Whether the string has an entry in the vocabulary.
DOCS: https://nightly.spacy.io/api/vocab#contains
DOCS: https://spacy.io/api/vocab#contains
"""
cdef hash_t int_key
if isinstance(key, bytes):
@ -219,7 +219,7 @@ cdef class Vocab:
YIELDS (Lexeme): An entry in the vocabulary.
DOCS: https://nightly.spacy.io/api/vocab#iter
DOCS: https://spacy.io/api/vocab#iter
"""
cdef attr_t key
cdef size_t addr
@ -242,7 +242,7 @@ cdef class Vocab:
>>> apple = nlp.vocab.strings["apple"]
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
DOCS: https://nightly.spacy.io/api/vocab#getitem
DOCS: https://spacy.io/api/vocab#getitem
"""
cdef attr_t orth
if isinstance(id_or_string, unicode):
@ -310,7 +310,7 @@ cdef class Vocab:
word was mapped to, and `score` the similarity score between the
two words.
DOCS: https://nightly.spacy.io/api/vocab#prune_vectors
DOCS: https://spacy.io/api/vocab#prune_vectors
"""
xp = get_array_module(self.vectors.data)
# Make sure all vectors are in the vocab
@ -353,7 +353,7 @@ cdef class Vocab:
and shape determined by the `vocab.vectors` instance. Usually, a
numpy ndarray of shape (300,) and dtype float32.
DOCS: https://nightly.spacy.io/api/vocab#get_vector
DOCS: https://spacy.io/api/vocab#get_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -400,7 +400,7 @@ cdef class Vocab:
orth (int / unicode): The word.
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
DOCS: https://nightly.spacy.io/api/vocab#set_vector
DOCS: https://spacy.io/api/vocab#set_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -422,7 +422,7 @@ cdef class Vocab:
orth (int / unicode): The word.
RETURNS (bool): Whether the word has a vector.
DOCS: https://nightly.spacy.io/api/vocab#has_vector
DOCS: https://spacy.io/api/vocab#has_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -448,7 +448,7 @@ cdef class Vocab:
it doesn't exist.
exclude (list): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/vocab#to_disk
DOCS: https://spacy.io/api/vocab#to_disk
"""
path = util.ensure_path(path)
if not path.exists():
@ -469,7 +469,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The modified `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#to_disk
DOCS: https://spacy.io/api/vocab#to_disk
"""
path = util.ensure_path(path)
getters = ["strings", "vectors"]
@ -494,7 +494,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
DOCS: https://spacy.io/api/vocab#to_bytes
"""
def deserialize_vectors():
if self.vectors is None:
@ -516,7 +516,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The `Vocab` object.
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
DOCS: https://spacy.io/api/vocab#from_bytes
"""
def serialize_vectors(b):
if self.vectors is None: