Update docs links in codebase

This commit is contained in:
Ines Montani 2020-09-04 12:58:50 +02:00
parent 2189046869
commit ab1bb421ed
58 changed files with 416 additions and 371 deletions

View File

@ -25,7 +25,7 @@ COMMAND = "python -m spacy"
NAME = "spacy"
HELP = """spaCy Command-line Interface
DOCS: https://spacy.io/api/cli
DOCS: https://nightly.spacy.io/api/cli
"""
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
You'd typically start by cloning a project template to a local directory and

View File

@ -61,6 +61,8 @@ def convert_cli(
If no output_dir is specified and the output format is JSON, the data
is written to stdout, so you can pipe them forward to a JSON file:
$ spacy convert some_file.conllu --file-type json > some_file.json
DOCS: https://nightly.spacy.io/api/cli#convert
"""
if isinstance(file_type, FileTypes):
# We get an instance of the FileTypes from the CLI so we need its string value
@ -261,6 +263,6 @@ def _get_converter(msg, converter, input_path):
msg.warn(
"Can't automatically detect NER format. "
"Conversion may not succeed. "
"See https://spacy.io/api/cli#convert"
"See https://nightly.spacy.io/api/cli#convert"
)
return converter

View File

@ -31,6 +31,8 @@ def debug_config_cli(
Similar as with the 'train' command, you can override settings from the config
as command line options. For instance, --training.batch_size 128 overrides
the value of "batch_size" in the block "[training]".
DOCS: https://nightly.spacy.io/api/cli#debug-config
"""
overrides = parse_config_overrides(ctx.args)
import_code(code_path)

View File

@ -47,6 +47,8 @@ def debug_data_cli(
Analyze, debug and validate your training and development data. Outputs
useful stats, and can help you find problems like invalid entity annotations,
cyclic dependencies, low data labels and more.
DOCS: https://nightly.spacy.io/api/cli#debug-data
"""
if ctx.command.name == "debug-data":
msg.warn(

View File

@ -30,6 +30,8 @@ def debug_model_cli(
"""
Analyze a Thinc model implementation. Includes checks for internal structure
and activations during training.
DOCS: https://nightly.spacy.io/api/cli#debug-model
"""
if use_gpu >= 0:
msg.info("Using GPU")

View File

@ -28,7 +28,7 @@ def download_cli(
additional arguments provided to this command will be passed to `pip install`
on package installation.
DOCS: https://spacy.io/api/cli#download
DOCS: https://nightly.spacy.io/api/cli#download
AVAILABLE PACKAGES: https://spacy.io/models
"""
download(model, direct, *ctx.args)
@ -77,7 +77,7 @@ def get_compatibility() -> dict:
f"Couldn't fetch compatibility table. Please find a package for your spaCy "
f"installation (v{about.__version__}), and download it manually. "
f"For more details, see the documentation: "
f"https://spacy.io/usage/models",
f"https://nightly.spacy.io/usage/models",
exits=1,
)
comp_table = r.json()

View File

@ -27,12 +27,15 @@ def evaluate_cli(
):
"""
Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
data in the binary .spacy format. The --gold-preproc option sets up the evaluation
examples with gold-standard sentences and tokens for the predictions. Gold
preprocessing helps the annotations align to the tokenization, and may
result in sequences of more consistent length. However, it may reduce
runtime accuracy due to train/test skew. To render a sample of dependency
parses in a HTML file, set as output directory as the displacy_path argument.
data in the binary .spacy format. The --gold-preproc option sets up the
evaluation examples with gold-standard sentences and tokens for the
predictions. Gold preprocessing helps the annotations align to the
tokenization, and may result in sequences of more consistent length. However,
it may reduce runtime accuracy due to train/test skew. To render a sample of
dependency parses in a HTML file, set as output directory as the
displacy_path argument.
DOCS: https://nightly.spacy.io/api/cli#evaluate
"""
evaluate(
model,

View File

@ -21,6 +21,8 @@ def info_cli(
Print info about spaCy installation. If a pipeline is speficied as an argument,
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.
DOCS: https://nightly.spacy.io/api/cli#info
"""
info(model, markdown=markdown, silent=silent)

View File

@ -37,6 +37,8 @@ def init_config_cli(
specified via the CLI arguments, this command generates a config with the
optimal settings for you use case. This includes the choice of architecture,
pretrained weights and related hyperparameters.
DOCS: https://nightly.spacy.io/api/cli#init-config
"""
if isinstance(optimize, Optimizations): # instance of enum from the CLI
optimize = optimize.value
@ -59,6 +61,8 @@ def init_fill_config_cli(
functions for their default values and update the base config. This command
can be used with a config generated via the training quickstart widget:
https://nightly.spacy.io/usage/training#quickstart
DOCS: https://nightly.spacy.io/api/cli#init-fill-config
"""
fill_config(output_file, base_path, pretraining=pretraining, diff=diff)

View File

@ -28,7 +28,7 @@ except ImportError:
DEFAULT_OOV_PROB = -20
@init_cli.command("vectors")
@init_cli.command("vocab")
@app.command(
"init-model",
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
@ -54,6 +54,8 @@ def init_model_cli(
Create a new blank pipeline directory with vocab and vectors from raw data.
If vectors are provided in Word2Vec format, they can be either a .txt or
zipped as a .zip or .tar.gz.
DOCS: https://nightly.spacy.io/api/cli#init-vocab
"""
if ctx.command.name == "init-model":
msg.warn(

View File

@ -31,6 +31,8 @@ def package_cli(
the existing values will be used as the defaults in the command-line prompt.
After packaging, "python setup.py sdist" is run in the package directory,
which will create a .tar.gz archive that can be installed via "pip install".
DOCS: https://nightly.spacy.io/api/cli#package
"""
package(
input_dir,

View File

@ -57,6 +57,8 @@ def pretrain_cli(
To load the weights back in during 'spacy train', you need to ensure
all settings are the same between pretraining and training. Ideally,
this is done by using the same config file for both commands.
DOCS: https://nightly.spacy.io/api/cli#pretrain
"""
overrides = parse_config_overrides(ctx.args)
import_code(code_path)

View File

@ -29,6 +29,8 @@ def profile_cli(
Input should be formatted as one JSON object per line with a key "text".
It can either be provided as a JSONL file, or be read from sys.sytdin.
If no input file is specified, the IMDB dataset is loaded via Thinc.
DOCS: https://nightly.spacy.io/api/cli#debug-profile
"""
if ctx.parent.command.name == NAME: # called as top-level command
msg.warn(

View File

@ -20,6 +20,8 @@ def project_assets_cli(
defined in the "assets" section of the project.yml. If a checksum is
provided in the project.yml, the file is only downloaded if no local file
with the same checksum exists.
DOCS: https://nightly.spacy.io/api/cli#project-assets
"""
project_assets(project_dir)

View File

@ -22,6 +22,8 @@ def project_clone_cli(
only download the files from the given subdirectory. The GitHub repo
defaults to the official spaCy template repo, but can be customized
(including using a private repo).
DOCS: https://nightly.spacy.io/api/cli#project-clone
"""
if dest is None:
dest = Path.cwd() / name

View File

@ -43,6 +43,8 @@ def project_document_cli(
hidden markers are added so you can add custom content before or after the
auto-generated section and only the auto-generated docs will be replaced
when you re-run the command.
DOCS: https://nightly.spacy.io/api/cli#project-document
"""
project_document(project_dir, output_file, no_emoji=no_emoji)

View File

@ -31,7 +31,10 @@ def project_update_dvc_cli(
"""Auto-generate Data Version Control (DVC) config. A DVC
project can only define one pipeline, so you need to specify one workflow
defined in the project.yml. If no workflow is specified, the first defined
workflow is used. The DVC config will only be updated if the project.yml changed.
workflow is used. The DVC config will only be updated if the project.yml
changed.
DOCS: https://nightly.spacy.io/api/cli#project-dvc
"""
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)

View File

@ -17,7 +17,9 @@ def project_pull_cli(
"""Retrieve available precomputed outputs from a remote storage.
You can alias remotes in your project.yml by mapping them to storage paths.
A storage can be anything that the smart-open library can upload to, e.g.
gcs, aws, ssh, local directories etc
AWS, Google Cloud Storage, SSH, local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-pull
"""
for url, output_path in project_pull(project_dir, remote):
if url is not None:

View File

@ -13,9 +13,12 @@ def project_push_cli(
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
# fmt: on
):
"""Persist outputs to a remote storage. You can alias remotes in your project.yml
by mapping them to storage paths. A storage can be anything that the smart-open
library can upload to, e.g. gcs, aws, ssh, local directories etc
"""Persist outputs to a remote storage. You can alias remotes in your
project.yml by mapping them to storage paths. A storage can be anything that
the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
local directories etc.
DOCS: https://nightly.spacy.io/api/cli#project-push
"""
for output_path, url in project_push(project_dir, remote):
if url is None:

View File

@ -24,6 +24,8 @@ def project_run_cli(
name is specified, all commands in the workflow are run, in order. If
commands define dependencies and/or outputs, they will only be re-run if
state has changed.
DOCS: https://nightly.spacy.io/api/cli#project-run
"""
if show_help or not subcommand:
print_run_help(project_dir, subcommand)

View File

@ -44,6 +44,8 @@ def train_cli(
lets you pass in a Python file that's imported before training. It can be
used to register custom functions and architectures that can then be
referenced in the config.
DOCS: https://nightly.spacy.io/api/cli#train
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
verify_cli_args(config_path, output_path)

View File

@ -16,6 +16,8 @@ def validate_cli():
Validate the currently installed pipeline packages and spaCy version. Checks
if the installed packages are compatible and shows upgrade instructions if
available. Should be run after `pip install -U spacy`.
DOCS: https://nightly.spacy.io/api/cli#validate
"""
validate()

View File

@ -1,8 +1,8 @@
"""
spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://spacy.io/usage/visualizers
DOCS: https://nightly.spacy.io/api/top-level#displacy
USAGE: https://nightly.spacy.io/usage/visualizers
"""
from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings
@ -37,8 +37,8 @@ def render(
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
RETURNS (str): Rendered HTML markup.
DOCS: https://spacy.io/api/top-level#displacy.render
USAGE: https://spacy.io/usage/visualizers
DOCS: https://nightly.spacy.io/api/top-level#displacy.render
USAGE: https://nightly.spacy.io/usage/visualizers
"""
factories = {
"dep": (DependencyRenderer, parse_deps),
@ -88,8 +88,8 @@ def serve(
port (int): Port to serve visualisation.
host (str): Host to serve visualisation.
DOCS: https://spacy.io/api/top-level#displacy.serve
USAGE: https://spacy.io/usage/visualizers
DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
USAGE: https://nightly.spacy.io/usage/visualizers
"""
from wsgiref import simple_server

View File

@ -22,7 +22,7 @@ class Warnings:
"generate a dependency visualization for it. Make sure the Doc "
"was processed with a model that supports dependency parsing, and "
"not just a language class like `English()`. For more info, see "
"the docs:\nhttps://spacy.io/usage/models")
"the docs:\nhttps://nightly.spacy.io/usage/models")
W006 = ("No entities to visualize found in Doc object. If this is "
"surprising to you, make sure the Doc was processed using a model "
"that supports named entity recognition, and check the `doc.ents` "
@ -147,7 +147,7 @@ class Errors:
E010 = ("Word vectors set to length 0. This may be because you don't have "
"a model installed or loaded, or because your model doesn't "
"include word vectors. For more info, see the docs:\n"
"https://spacy.io/usage/models")
"https://nightly.spacy.io/usage/models")
E011 = ("Unknown operator: '{op}'. Options: {opts}")
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
E014 = ("Unknown tag ID: {tag}")
@ -181,7 +181,7 @@ class Errors:
"list of (unicode, bool) tuples. Got bytes instance: {value}")
E029 = ("noun_chunks requires the dependency parse, which requires a "
"statistical model to be installed and loaded. For more info, see "
"the documentation:\nhttps://spacy.io/usage/models")
"the documentation:\nhttps://nightly.spacy.io/usage/models")
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
"component to the pipeline with: "
"nlp.add_pipe('sentencizer'). "
@ -294,7 +294,7 @@ class Errors:
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
"tokens to merge. If you want to find the longest non-overlapping "
"spans, you can use the util.filter_spans helper:\n"
"https://spacy.io/api/top-level#util.filter_spans")
"https://nightly.spacy.io/api/top-level#util.filter_spans")
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
"token can only be part of one entity, so make sure the entities "
"you're setting don't overlap.")
@ -364,10 +364,10 @@ class Errors:
E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure "
"to provide a valid JSON object as input with either the `text` "
"or `tokens` key. For more info, see the docs:\n"
"https://spacy.io/api/cli#pretrain-jsonl")
"https://nightly.spacy.io/api/cli#pretrain-jsonl")
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
"includes either the `text` or `tokens` key. For more info, see "
"the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl")
"the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl")
E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
"kb.add_entity and kb.add_alias to add entries.")
E140 = ("The list of entities, prior probabilities and entity vectors "

View File

@ -106,7 +106,7 @@ def conll_ner2docs(
raise ValueError(
"The token-per-line NER file is not formatted correctly. "
"Try checking whitespace and delimiters. See "
"https://spacy.io/api/cli#convert"
"https://nightly.spacy.io/api/cli#convert"
)
length = len(cols[0])
words.extend(cols[0])

View File

@ -44,7 +44,7 @@ def read_iob(raw_sents, vocab, n_sents):
sent_tags = ["-"] * len(sent_words)
else:
raise ValueError(
"The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convert"
"The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert"
)
words.extend(sent_words)
tags.extend(sent_tags)

View File

@ -38,7 +38,7 @@ class Corpus:
limit (int): Limit corpus to a subset of examples, e.g. for debugging.
Defaults to 0, which indicates no limit.
DOCS: https://spacy.io/api/corpus
DOCS: https://nightly.spacy.io/api/corpus
"""
def __init__(
@ -83,7 +83,7 @@ class Corpus:
nlp (Language): The current nlp object.
YIELDS (Example): The examples.
DOCS: https://spacy.io/api/corpus#call
DOCS: https://nightly.spacy.io/api/corpus#call
"""
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
if self.gold_preproc:

View File

@ -21,7 +21,7 @@ cdef class Candidate:
algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity) pair is assigned to a certain prior probability.
DOCS: https://spacy.io/api/kb/#candidate_init
DOCS: https://nightly.spacy.io/api/kb/#candidate_init
"""
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
@ -79,7 +79,7 @@ cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts.
DOCS: https://spacy.io/api/kb
DOCS: https://nightly.spacy.io/api/kb
"""
def __init__(self, Vocab vocab, entity_vector_length):

View File

@ -95,7 +95,7 @@ class Language:
object and processing pipeline.
lang (str): Two-letter language ID, i.e. ISO code.
DOCS: https://spacy.io/api/language
DOCS: https://nightly.spacy.io/api/language
"""
Defaults = BaseDefaults
@ -130,7 +130,7 @@ class Language:
create_tokenizer (Callable): Function that takes the nlp object and
returns a tokenizer.
DOCS: https://spacy.io/api/language#init
DOCS: https://nightly.spacy.io/api/language#init
"""
# We're only calling this to import all factories provided via entry
# points. The factory decorator applied to these functions takes care
@ -185,7 +185,7 @@ class Language:
RETURNS (Dict[str, Any]): The meta.
DOCS: https://spacy.io/api/language#meta
DOCS: https://nightly.spacy.io/api/language#meta
"""
spacy_version = util.get_model_version_range(about.__version__)
if self.vocab.lang:
@ -225,7 +225,7 @@ class Language:
RETURNS (thinc.api.Config): The config.
DOCS: https://spacy.io/api/language#config
DOCS: https://nightly.spacy.io/api/language#config
"""
self._config.setdefault("nlp", {})
self._config.setdefault("training", {})
@ -433,7 +433,7 @@ class Language:
will be combined and normalized for the whole pipeline.
func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://spacy.io/api/language#factory
DOCS: https://nightly.spacy.io/api/language#factory
"""
if not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="factory"))
@ -513,7 +513,7 @@ class Language:
Used for pipeline analysis.
func (Optional[Callable]): Factory function if not used as a decorator.
DOCS: https://spacy.io/api/language#component
DOCS: https://nightly.spacy.io/api/language#component
"""
if name is not None and not isinstance(name, str):
raise ValueError(Errors.E963.format(decorator="component"))
@ -579,7 +579,7 @@ class Language:
name (str): Name of pipeline component to get.
RETURNS (callable): The pipeline component.
DOCS: https://spacy.io/api/language#get_pipe
DOCS: https://nightly.spacy.io/api/language#get_pipe
"""
for pipe_name, component in self._components:
if pipe_name == name:
@ -608,7 +608,7 @@ class Language:
arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://spacy.io/api/language#create_pipe
DOCS: https://nightly.spacy.io/api/language#create_pipe
"""
name = name if name is not None else factory_name
if not isinstance(config, dict):
@ -722,7 +722,7 @@ class Language:
arguments and types expected by the factory.
RETURNS (Callable[[Doc], Doc]): The pipeline component.
DOCS: https://spacy.io/api/language#add_pipe
DOCS: https://nightly.spacy.io/api/language#add_pipe
"""
if not isinstance(factory_name, str):
bad_val = repr(factory_name)
@ -820,7 +820,7 @@ class Language:
name (str): Name of the component.
RETURNS (bool): Whether a component of the name exists in the pipeline.
DOCS: https://spacy.io/api/language#has_pipe
DOCS: https://nightly.spacy.io/api/language#has_pipe
"""
return name in self.pipe_names
@ -841,7 +841,7 @@ class Language:
validate (bool): Whether to validate the component config against the
arguments and types expected by the factory.
DOCS: https://spacy.io/api/language#replace_pipe
DOCS: https://nightly.spacy.io/api/language#replace_pipe
"""
if name not in self.pipe_names:
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
@ -870,7 +870,7 @@ class Language:
old_name (str): Name of the component to rename.
new_name (str): New name of the component.
DOCS: https://spacy.io/api/language#rename_pipe
DOCS: https://nightly.spacy.io/api/language#rename_pipe
"""
if old_name not in self.component_names:
raise ValueError(
@ -891,7 +891,7 @@ class Language:
name (str): Name of the component to remove.
RETURNS (tuple): A `(name, component)` tuple of the removed component.
DOCS: https://spacy.io/api/language#remove_pipe
DOCS: https://nightly.spacy.io/api/language#remove_pipe
"""
if name not in self.component_names:
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
@ -944,7 +944,7 @@ class Language:
keyword arguments for specific components.
RETURNS (Doc): A container for accessing the annotations.
DOCS: https://spacy.io/api/language#call
DOCS: https://nightly.spacy.io/api/language#call
"""
if len(text) > self.max_length:
raise ValueError(
@ -993,7 +993,7 @@ class Language:
disable (str or iterable): The name(s) of the pipes to disable
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
DOCS: https://spacy.io/api/language#select_pipes
DOCS: https://nightly.spacy.io/api/language#select_pipes
"""
if enable is None and disable is None:
raise ValueError(Errors.E991)
@ -1044,7 +1044,7 @@ class Language:
exclude (Iterable[str]): Names of components that shouldn't be updated.
RETURNS (Dict[str, float]): The updated losses dictionary
DOCS: https://spacy.io/api/language#update
DOCS: https://nightly.spacy.io/api/language#update
"""
if _ is not None:
raise ValueError(Errors.E989)
@ -1106,7 +1106,7 @@ class Language:
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
>>> nlp.rehearse(raw_batch)
DOCS: https://spacy.io/api/language#rehearse
DOCS: https://nightly.spacy.io/api/language#rehearse
"""
if len(examples) == 0:
return
@ -1153,7 +1153,7 @@ class Language:
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/language#begin_training
DOCS: https://nightly.spacy.io/api/language#begin_training
"""
# TODO: throw warning when get_gold_tuples is provided instead of get_examples
if get_examples is None:
@ -1200,7 +1200,7 @@ class Language:
sgd (Optional[Optimizer]): An optimizer.
RETURNS (Optimizer): The optimizer.
DOCS: https://spacy.io/api/language#resume_training
DOCS: https://nightly.spacy.io/api/language#resume_training
"""
if device >= 0: # TODO: do we need this here?
require_gpu(device)
@ -1236,7 +1236,7 @@ class Language:
for the scorer.
RETURNS (Scorer): The scorer containing the evaluation results.
DOCS: https://spacy.io/api/language#evaluate
DOCS: https://nightly.spacy.io/api/language#evaluate
"""
validate_examples(examples, "Language.evaluate")
if component_cfg is None:
@ -1286,7 +1286,7 @@ class Language:
>>> with nlp.use_params(optimizer.averages):
>>> nlp.to_disk("/tmp/checkpoint")
DOCS: https://spacy.io/api/language#use_params
DOCS: https://nightly.spacy.io/api/language#use_params
"""
if not params:
yield
@ -1333,7 +1333,7 @@ class Language:
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
YIELDS (Doc): Documents in the order of the original text.
DOCS: https://spacy.io/api/language#pipe
DOCS: https://nightly.spacy.io/api/language#pipe
"""
if n_process == -1:
n_process = mp.cpu_count()
@ -1469,7 +1469,7 @@ class Language:
the types expected by the factory.
RETURNS (Language): The initialized Language class.
DOCS: https://spacy.io/api/language#from_config
DOCS: https://nightly.spacy.io/api/language#from_config
"""
if auto_fill:
config = Config(
@ -1582,7 +1582,7 @@ class Language:
it doesn't exist.
exclude (list): Names of components or serialization fields to exclude.
DOCS: https://spacy.io/api/language#to_disk
DOCS: https://nightly.spacy.io/api/language#to_disk
"""
path = util.ensure_path(path)
serializers = {}
@ -1611,7 +1611,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The modified `Language` object.
DOCS: https://spacy.io/api/language#from_disk
DOCS: https://nightly.spacy.io/api/language#from_disk
"""
def deserialize_meta(path: Path) -> None:
@ -1659,7 +1659,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Language` object.
DOCS: https://spacy.io/api/language#to_bytes
DOCS: https://nightly.spacy.io/api/language#to_bytes
"""
serializers = {}
serializers["vocab"] = lambda: self.vocab.to_bytes()
@ -1683,7 +1683,7 @@ class Language:
exclude (list): Names of components or serialization fields to exclude.
RETURNS (Language): The `Language` object.
DOCS: https://spacy.io/api/language#from_bytes
DOCS: https://nightly.spacy.io/api/language#from_bytes
"""
def deserialize_meta(b):

View File

@ -30,7 +30,7 @@ cdef class Lexeme:
tag, dependency parse, or lemma (lemmatization depends on the
part-of-speech tag).
DOCS: https://spacy.io/api/lexeme
DOCS: https://nightly.spacy.io/api/lexeme
"""
def __init__(self, Vocab vocab, attr_t orth):
"""Create a Lexeme object.

View File

@ -57,7 +57,7 @@ class Table(OrderedDict):
data (dict): The dictionary.
name (str): Optional table name for reference.
DOCS: https://spacy.io/api/lookups#table.from_dict
DOCS: https://nightly.spacy.io/api/lookups#table.from_dict
"""
self = cls(name=name)
self.update(data)
@ -69,7 +69,7 @@ class Table(OrderedDict):
name (str): Optional table name for reference.
data (dict): Initial data, used to hint Bloom Filter.
DOCS: https://spacy.io/api/lookups#table.init
DOCS: https://nightly.spacy.io/api/lookups#table.init
"""
OrderedDict.__init__(self)
self.name = name
@ -135,7 +135,7 @@ class Table(OrderedDict):
RETURNS (bytes): The serialized table.
DOCS: https://spacy.io/api/lookups#table.to_bytes
DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes
"""
data = {
"name": self.name,
@ -150,7 +150,7 @@ class Table(OrderedDict):
bytes_data (bytes): The data to load.
RETURNS (Table): The loaded table.
DOCS: https://spacy.io/api/lookups#table.from_bytes
DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes
"""
loaded = srsly.msgpack_loads(bytes_data)
data = loaded.get("dict", {})
@ -172,7 +172,7 @@ class Lookups:
def __init__(self) -> None:
"""Initialize the Lookups object.
DOCS: https://spacy.io/api/lookups#init
DOCS: https://nightly.spacy.io/api/lookups#init
"""
self._tables = {}
@ -201,7 +201,7 @@ class Lookups:
data (dict): Optional data to add to the table.
RETURNS (Table): The newly added table.
DOCS: https://spacy.io/api/lookups#add_table
DOCS: https://nightly.spacy.io/api/lookups#add_table
"""
if name in self.tables:
raise ValueError(Errors.E158.format(name=name))
@ -215,7 +215,7 @@ class Lookups:
name (str): Name of the table to set.
table (Table): The Table to set.
DOCS: https://spacy.io/api/lookups#set_table
DOCS: https://nightly.spacy.io/api/lookups#set_table
"""
self._tables[name] = table
@ -227,7 +227,7 @@ class Lookups:
default (Any): Optional default value to return if table doesn't exist.
RETURNS (Table): The table.
DOCS: https://spacy.io/api/lookups#get_table
DOCS: https://nightly.spacy.io/api/lookups#get_table
"""
if name not in self._tables:
if default == UNSET:
@ -241,7 +241,7 @@ class Lookups:
name (str): Name of the table to remove.
RETURNS (Table): The removed table.
DOCS: https://spacy.io/api/lookups#remove_table
DOCS: https://nightly.spacy.io/api/lookups#remove_table
"""
if name not in self._tables:
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
@ -253,7 +253,7 @@ class Lookups:
name (str): Name of the table.
RETURNS (bool): Whether a table of that name exists.
DOCS: https://spacy.io/api/lookups#has_table
DOCS: https://nightly.spacy.io/api/lookups#has_table
"""
return name in self._tables
@ -262,7 +262,7 @@ class Lookups:
RETURNS (bytes): The serialized Lookups.
DOCS: https://spacy.io/api/lookups#to_bytes
DOCS: https://nightly.spacy.io/api/lookups#to_bytes
"""
return srsly.msgpack_dumps(self._tables)
@ -272,7 +272,7 @@ class Lookups:
bytes_data (bytes): The data to load.
RETURNS (Lookups): The loaded Lookups.
DOCS: https://spacy.io/api/lookups#from_bytes
DOCS: https://nightly.spacy.io/api/lookups#from_bytes
"""
self._tables = {}
for key, value in srsly.msgpack_loads(bytes_data).items():
@ -287,7 +287,7 @@ class Lookups:
path (str / Path): The file path.
DOCS: https://spacy.io/api/lookups#to_disk
DOCS: https://nightly.spacy.io/api/lookups#to_disk
"""
if len(self._tables):
path = ensure_path(path)
@ -306,7 +306,7 @@ class Lookups:
path (str / Path): The directory path.
RETURNS (Lookups): The loaded lookups.
DOCS: https://spacy.io/api/lookups#from_disk
DOCS: https://nightly.spacy.io/api/lookups#from_disk
"""
path = ensure_path(path)
filepath = path / filename

View File

@ -31,8 +31,8 @@ DEF PADDING = 5
cdef class Matcher:
"""Match sequences of tokens, based on pattern rules.
DOCS: https://spacy.io/api/matcher
USAGE: https://spacy.io/usage/rule-based-matching
DOCS: https://nightly.spacy.io/api/matcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching
"""
def __init__(self, vocab, validate=True):

View File

@ -19,8 +19,8 @@ cdef class PhraseMatcher:
sequences based on lists of token descriptions, the `PhraseMatcher` accepts
match patterns in the form of `Doc` objects.
DOCS: https://spacy.io/api/phrasematcher
USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
DOCS: https://nightly.spacy.io/api/phrasematcher
USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher
Adapted from FlashText: https://github.com/vi3k6i5/flashtext
MIT License (see `LICENSE`)
@ -34,7 +34,7 @@ cdef class PhraseMatcher:
attr (int / str): Token attribute to match on.
validate (bool): Perform additional validation when patterns are added.
DOCS: https://spacy.io/api/phrasematcher#init
DOCS: https://nightly.spacy.io/api/phrasematcher#init
"""
self.vocab = vocab
self._callbacks = {}
@ -61,7 +61,7 @@ cdef class PhraseMatcher:
RETURNS (int): The number of rules.
DOCS: https://spacy.io/api/phrasematcher#len
DOCS: https://nightly.spacy.io/api/phrasematcher#len
"""
return len(self._callbacks)
@ -71,7 +71,7 @@ cdef class PhraseMatcher:
key (str): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID.
DOCS: https://spacy.io/api/phrasematcher#contains
DOCS: https://nightly.spacy.io/api/phrasematcher#contains
"""
return key in self._callbacks
@ -85,7 +85,7 @@ cdef class PhraseMatcher:
key (str): The match ID.
DOCS: https://spacy.io/api/phrasematcher#remove
DOCS: https://nightly.spacy.io/api/phrasematcher#remove
"""
if key not in self._docs:
raise KeyError(key)
@ -164,7 +164,7 @@ cdef class PhraseMatcher:
as variable arguments. Will be ignored if a list of patterns is
provided as the second argument.
DOCS: https://spacy.io/api/phrasematcher#add
DOCS: https://nightly.spacy.io/api/phrasematcher#add
"""
if docs is None or hasattr(docs, "__call__"): # old API
on_match = docs
@ -228,7 +228,7 @@ cdef class PhraseMatcher:
`doc[start:end]`. The `match_id` is an integer. If as_spans is set
to True, a list of Span objects is returned.
DOCS: https://spacy.io/api/phrasematcher#call
DOCS: https://nightly.spacy.io/api/phrasematcher#call
"""
matches = []
if doc is None or len(doc) == 0:

View File

@ -38,7 +38,7 @@ class AttributeRuler(Pipe):
"""Set token-level attributes for tokens matched by Matcher patterns.
Additionally supports importing patterns from tag maps and morph rules.
DOCS: https://spacy.io/api/attributeruler
DOCS: https://nightly.spacy.io/api/attributeruler
"""
def __init__(
@ -59,7 +59,7 @@ class AttributeRuler(Pipe):
RETURNS (AttributeRuler): The AttributeRuler component.
DOCS: https://spacy.io/api/attributeruler#init
DOCS: https://nightly.spacy.io/api/attributeruler#init
"""
self.name = name
self.vocab = vocab
@ -77,7 +77,7 @@ class AttributeRuler(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/attributeruler#call
DOCS: https://nightly.spacy.io/api/attributeruler#call
"""
matches = sorted(self.matcher(doc))
@ -121,7 +121,7 @@ class AttributeRuler(Pipe):
tag_map (dict): The tag map that maps fine-grained tags to
coarse-grained tags and morphological features.
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
"""
for tag, attrs in tag_map.items():
pattern = [{"TAG": tag}]
@ -139,7 +139,7 @@ class AttributeRuler(Pipe):
fine-grained tags to coarse-grained tags, lemmas and morphological
features.
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
"""
for tag in morph_rules:
for word in morph_rules[tag]:
@ -163,7 +163,7 @@ class AttributeRuler(Pipe):
index (int): The index of the token in the matched span to modify. May
be negative to index from the end of the span. Defaults to 0.
DOCS: https://spacy.io/api/attributeruler#add
DOCS: https://nightly.spacy.io/api/attributeruler#add
"""
self.matcher.add(len(self.attrs), patterns)
self._attrs_unnormed.append(attrs)
@ -178,7 +178,7 @@ class AttributeRuler(Pipe):
as the arguments to AttributeRuler.add (patterns/attrs/index) to
add as patterns.
DOCS: https://spacy.io/api/attributeruler#add_patterns
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
"""
for p in pattern_dicts:
self.add(**p)
@ -203,7 +203,7 @@ class AttributeRuler(Pipe):
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
and "lemma" for the target token attributes.
DOCS: https://spacy.io/api/tagger#score
DOCS: https://nightly.spacy.io/api/tagger#score
"""
validate_examples(examples, "AttributeRuler.score")
results = {}
@ -227,7 +227,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/attributeruler#to_bytes
DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes
"""
serialize = {}
serialize["vocab"] = self.vocab.to_bytes
@ -243,7 +243,7 @@ class AttributeRuler(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
returns (AttributeRuler): The loaded object.
DOCS: https://spacy.io/api/attributeruler#from_bytes
DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes
"""
def load_patterns(b):
@ -264,7 +264,7 @@ class AttributeRuler(Pipe):
path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/attributeruler#to_disk
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
"""
serialize = {
"vocab": lambda p: self.vocab.to_disk(p),
@ -279,7 +279,7 @@ class AttributeRuler(Pipe):
path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/attributeruler#from_disk
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
"""
def load_patterns(p):

View File

@ -105,7 +105,7 @@ def make_parser(
cdef class DependencyParser(Parser):
"""Pipeline component for dependency parsing.
DOCS: https://spacy.io/api/dependencyparser
DOCS: https://nightly.spacy.io/api/dependencyparser
"""
TransitionSystem = ArcEager
@ -146,7 +146,7 @@ cdef class DependencyParser(Parser):
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
and Scorer.score_deps.
DOCS: https://spacy.io/api/dependencyparser#score
DOCS: https://nightly.spacy.io/api/dependencyparser#score
"""
validate_examples(examples, "DependencyParser.score")
def dep_getter(token, attr):

View File

@ -83,7 +83,7 @@ def make_entity_linker(
class EntityLinker(Pipe):
"""Pipeline component for named entity linking.
DOCS: https://spacy.io/api/entitylinker
DOCS: https://nightly.spacy.io/api/entitylinker
"""
NIL = "NIL" # string used to refer to a non-existing link
@ -111,7 +111,7 @@ class EntityLinker(Pipe):
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
incl_context (bool): Whether or not to include the local context in the model.
DOCS: https://spacy.io/api/entitylinker#init
DOCS: https://nightly.spacy.io/api/entitylinker#init
"""
self.vocab = vocab
self.model = model
@ -151,7 +151,7 @@ class EntityLinker(Pipe):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/entitylinker#begin_training
DOCS: https://nightly.spacy.io/api/entitylinker#begin_training
"""
self.require_kb()
nO = self.kb.entity_vector_length
@ -182,7 +182,7 @@ class EntityLinker(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/entitylinker#update
DOCS: https://nightly.spacy.io/api/entitylinker#update
"""
self.require_kb()
if losses is None:
@ -264,7 +264,7 @@ class EntityLinker(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/entitylinker#call
DOCS: https://nightly.spacy.io/api/entitylinker#call
"""
kb_ids = self.predict([doc])
self.set_annotations([doc], kb_ids)
@ -279,7 +279,7 @@ class EntityLinker(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/entitylinker#pipe
DOCS: https://nightly.spacy.io/api/entitylinker#pipe
"""
for docs in util.minibatch(stream, size=batch_size):
kb_ids = self.predict(docs)
@ -294,7 +294,7 @@ class EntityLinker(Pipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS (List[int]): The models prediction for each document.
DOCS: https://spacy.io/api/entitylinker#predict
DOCS: https://nightly.spacy.io/api/entitylinker#predict
"""
self.require_kb()
entity_count = 0
@ -391,7 +391,7 @@ class EntityLinker(Pipe):
docs (Iterable[Doc]): The documents to modify.
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
DOCS: https://spacy.io/api/entitylinker#set_annotations
DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations
"""
count_ents = len([ent for doc in docs for ent in doc.ents])
if count_ents != len(kb_ids):
@ -412,7 +412,7 @@ class EntityLinker(Pipe):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/entitylinker#to_disk
DOCS: https://nightly.spacy.io/api/entitylinker#to_disk
"""
serialize = {}
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
@ -430,7 +430,7 @@ class EntityLinker(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (EntityLinker): The modified EntityLinker object.
DOCS: https://spacy.io/api/entitylinker#from_disk
DOCS: https://nightly.spacy.io/api/entitylinker#from_disk
"""
def load_model(p):

View File

@ -53,8 +53,8 @@ class EntityRuler:
purely rule-based entity recognition system. After initialization, the
component is typically added to the pipeline using `nlp.add_pipe`.
DOCS: https://spacy.io/api/entityruler
USAGE: https://spacy.io/usage/rule-based-matching#entityruler
DOCS: https://nightly.spacy.io/api/entityruler
USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler
"""
def __init__(
@ -88,7 +88,7 @@ class EntityRuler:
added by the model, overwrite them by matches if necessary.
ent_id_sep (str): Separator used internally for entity IDs.
DOCS: https://spacy.io/api/entityruler#init
DOCS: https://nightly.spacy.io/api/entityruler#init
"""
self.nlp = nlp
self.name = name
@ -127,7 +127,7 @@ class EntityRuler:
doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available.
DOCS: https://spacy.io/api/entityruler#call
DOCS: https://nightly.spacy.io/api/entityruler#call
"""
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
matches = set(
@ -165,7 +165,7 @@ class EntityRuler:
RETURNS (set): The string labels.
DOCS: https://spacy.io/api/entityruler#labels
DOCS: https://nightly.spacy.io/api/entityruler#labels
"""
keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys())
@ -185,7 +185,7 @@ class EntityRuler:
RETURNS (set): The string entity ids.
DOCS: https://spacy.io/api/entityruler#ent_ids
DOCS: https://nightly.spacy.io/api/entityruler#ent_ids
"""
keys = set(self.token_patterns.keys())
keys.update(self.phrase_patterns.keys())
@ -203,7 +203,7 @@ class EntityRuler:
RETURNS (list): The original patterns, one dictionary per pattern.
DOCS: https://spacy.io/api/entityruler#patterns
DOCS: https://nightly.spacy.io/api/entityruler#patterns
"""
all_patterns = []
for label, patterns in self.token_patterns.items():
@ -230,7 +230,7 @@ class EntityRuler:
patterns (list): The patterns to add.
DOCS: https://spacy.io/api/entityruler#add_patterns
DOCS: https://nightly.spacy.io/api/entityruler#add_patterns
"""
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet
@ -324,7 +324,7 @@ class EntityRuler:
patterns_bytes (bytes): The bytestring to load.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_bytes
DOCS: https://nightly.spacy.io/api/entityruler#from_bytes
"""
cfg = srsly.msgpack_loads(patterns_bytes)
self.clear()
@ -346,7 +346,7 @@ class EntityRuler:
RETURNS (bytes): The serialized patterns.
DOCS: https://spacy.io/api/entityruler#to_bytes
DOCS: https://nightly.spacy.io/api/entityruler#to_bytes
"""
serial = {
"overwrite": self.overwrite,
@ -365,7 +365,7 @@ class EntityRuler:
path (str / Path): The JSONL file to load.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_disk
DOCS: https://nightly.spacy.io/api/entityruler#from_disk
"""
path = ensure_path(path)
self.clear()
@ -401,7 +401,7 @@ class EntityRuler:
path (str / Path): The JSONL file to save.
DOCS: https://spacy.io/api/entityruler#to_disk
DOCS: https://nightly.spacy.io/api/entityruler#to_disk
"""
path = ensure_path(path)
cfg = {

View File

@ -15,7 +15,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged noun chunks.
DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks
"""
if not doc.is_parsed:
return doc
@ -37,7 +37,7 @@ def merge_entities(doc: Doc):
doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged entities.
DOCS: https://spacy.io/api/pipeline-functions#merge_entities
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities
"""
with doc.retokenize() as retokenizer:
for ent in doc.ents:
@ -54,7 +54,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
label (str): The subtoken dependency label.
RETURNS (Doc): The Doc object with merged subtokens.
DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens
"""
# TODO: make stateful component with "label" config
merger = Matcher(doc.vocab)

View File

@ -43,7 +43,7 @@ class Lemmatizer(Pipe):
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
lookup tables.
DOCS: https://spacy.io/api/lemmatizer
DOCS: https://nightly.spacy.io/api/lemmatizer
"""
@classmethod
@ -54,7 +54,7 @@ class Lemmatizer(Pipe):
mode (str): The lemmatizer mode.
RETURNS (dict): The lookups configuration settings for this mode.
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
"""
if mode == "lookup":
return {
@ -80,7 +80,7 @@ class Lemmatizer(Pipe):
lookups should be loaded.
RETURNS (Lookups): The Lookups object.
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
"""
config = cls.get_lookups_config(mode)
required_tables = config.get("required_tables", [])
@ -123,7 +123,7 @@ class Lemmatizer(Pipe):
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
`False`.
DOCS: https://spacy.io/api/lemmatizer#init
DOCS: https://nightly.spacy.io/api/lemmatizer#init
"""
self.vocab = vocab
self.model = model
@ -152,7 +152,7 @@ class Lemmatizer(Pipe):
doc (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/lemmatizer#call
DOCS: https://nightly.spacy.io/api/lemmatizer#call
"""
for token in doc:
if self.overwrite or token.lemma == 0:
@ -168,7 +168,7 @@ class Lemmatizer(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/lemmatizer#pipe
DOCS: https://nightly.spacy.io/api/lemmatizer#pipe
"""
for doc in stream:
doc = self(doc)
@ -180,7 +180,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.
DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize
"""
lookup_table = self.lookups.get_table("lemma_lookup", {})
result = lookup_table.get(token.text, token.text)
@ -194,7 +194,7 @@ class Lemmatizer(Pipe):
token (Token): The token to lemmatize.
RETURNS (list): The available lemmas for the string.
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize
"""
cache_key = (token.orth, token.pos, token.morph)
if cache_key in self.cache:
@ -260,7 +260,7 @@ class Lemmatizer(Pipe):
token (Token): The token.
RETURNS (bool): Whether the token is a base form.
DOCS: https://spacy.io/api/lemmatizer#is_base_form
DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form
"""
return False
@ -270,7 +270,7 @@ class Lemmatizer(Pipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS: https://spacy.io/api/lemmatizer#score
DOCS: https://nightly.spacy.io/api/lemmatizer#score
"""
validate_examples(examples, "Lemmatizer.score")
return Scorer.score_token_attr(examples, "lemma", **kwargs)
@ -282,7 +282,7 @@ class Lemmatizer(Pipe):
it doesn't exist.
exclude (list): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/vocab#to_disk
DOCS: https://nightly.spacy.io/api/vocab#to_disk
"""
serialize = {}
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
@ -297,7 +297,7 @@ class Lemmatizer(Pipe):
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The modified `Vocab` object.
DOCS: https://spacy.io/api/vocab#to_disk
DOCS: https://nightly.spacy.io/api/vocab#to_disk
"""
deserialize = {}
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
@ -310,7 +310,7 @@ class Lemmatizer(Pipe):
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vocab` object.
DOCS: https://spacy.io/api/vocab#to_bytes
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
"""
serialize = {}
serialize["vocab"] = self.vocab.to_bytes
@ -324,7 +324,7 @@ class Lemmatizer(Pipe):
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The `Vocab` object.
DOCS: https://spacy.io/api/vocab#from_bytes
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
"""
deserialize = {}
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)

View File

@ -79,7 +79,7 @@ class Morphologizer(Tagger):
labels_morph (dict): Mapping of morph + POS tags to morph labels.
labels_pos (dict): Mapping of morph + POS tags to POS tags.
DOCS: https://spacy.io/api/morphologizer#init
DOCS: https://nightly.spacy.io/api/morphologizer#init
"""
self.vocab = vocab
self.model = model
@ -106,7 +106,7 @@ class Morphologizer(Tagger):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://spacy.io/api/morphologizer#add_label
DOCS: https://nightly.spacy.io/api/morphologizer#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -139,7 +139,7 @@ class Morphologizer(Tagger):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/morphologizer#begin_training
DOCS: https://nightly.spacy.io/api/morphologizer#begin_training
"""
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="Morphologizer", obj=type(get_examples))
@ -169,7 +169,7 @@ class Morphologizer(Tagger):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
DOCS: https://spacy.io/api/morphologizer#set_annotations
DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -194,7 +194,7 @@ class Morphologizer(Tagger):
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/morphologizer#get_loss
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
"""
validate_examples(examples, "Morphologizer.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -231,7 +231,7 @@ class Morphologizer(Tagger):
Scorer.score_token_attr for the attributes "pos" and "morph" and
Scorer.score_token_attr_per_feat for the attribute "morph".
DOCS: https://spacy.io/api/morphologizer#score
DOCS: https://nightly.spacy.io/api/morphologizer#score
"""
validate_examples(examples, "Morphologizer.score")
results = {}
@ -247,7 +247,7 @@ class Morphologizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/morphologizer#to_bytes
DOCS: https://nightly.spacy.io/api/morphologizer#to_bytes
"""
serialize = {}
serialize["model"] = self.model.to_bytes
@ -262,7 +262,7 @@ class Morphologizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Morphologizer): The loaded Morphologizer.
DOCS: https://spacy.io/api/morphologizer#from_bytes
DOCS: https://nightly.spacy.io/api/morphologizer#from_bytes
"""
def load_model(b):
try:
@ -284,7 +284,7 @@ class Morphologizer(Tagger):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/morphologizer#to_disk
DOCS: https://nightly.spacy.io/api/morphologizer#to_disk
"""
serialize = {
"vocab": lambda p: self.vocab.to_disk(p),
@ -300,7 +300,7 @@ class Morphologizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Morphologizer): The modified Morphologizer object.
DOCS: https://spacy.io/api/morphologizer#from_disk
DOCS: https://nightly.spacy.io/api/morphologizer#from_disk
"""
def load_model(p):
with p.open("rb") as file_:

View File

@ -88,7 +88,7 @@ def make_ner(
cdef class EntityRecognizer(Parser):
"""Pipeline component for named entity recognition.
DOCS: https://spacy.io/api/entityrecognizer
DOCS: https://nightly.spacy.io/api/entityrecognizer
"""
TransitionSystem = BiluoPushDown
@ -119,7 +119,7 @@ cdef class EntityRecognizer(Parser):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://spacy.io/api/entityrecognizer#score
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
"""
validate_examples(examples, "EntityRecognizer.score")
return Scorer.score_spans(examples, "ents", **kwargs)

View File

@ -15,7 +15,7 @@ cdef class Pipe:
from it and it defines the interface that components should follow to
function as trainable components in a spaCy pipeline.
DOCS: https://spacy.io/api/pipe
DOCS: https://nightly.spacy.io/api/pipe
"""
def __init__(self, vocab, model, name, **cfg):
"""Initialize a pipeline component.
@ -25,7 +25,7 @@ cdef class Pipe:
name (str): The component instance name.
**cfg: Additonal settings and config parameters.
DOCS: https://spacy.io/api/pipe#init
DOCS: https://nightly.spacy.io/api/pipe#init
"""
self.vocab = vocab
self.model = model
@ -40,7 +40,7 @@ cdef class Pipe:
docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/pipe#call
DOCS: https://nightly.spacy.io/api/pipe#call
"""
scores = self.predict([doc])
self.set_annotations([doc], scores)
@ -55,7 +55,7 @@ cdef class Pipe:
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/pipe#pipe
DOCS: https://nightly.spacy.io/api/pipe#pipe
"""
for docs in util.minibatch(stream, size=batch_size):
scores = self.predict(docs)
@ -69,7 +69,7 @@ cdef class Pipe:
docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations for each token in the documents.
DOCS: https://spacy.io/api/pipe#predict
DOCS: https://nightly.spacy.io/api/pipe#predict
"""
raise NotImplementedError(Errors.E931.format(method="predict", name=self.name))
@ -79,7 +79,7 @@ cdef class Pipe:
docs (Iterable[Doc]): The documents to modify.
scores: The scores to assign.
DOCS: https://spacy.io/api/pipe#set_annotations
DOCS: https://nightly.spacy.io/api/pipe#set_annotations
"""
raise NotImplementedError(Errors.E931.format(method="set_annotations", name=self.name))
@ -96,7 +96,7 @@ cdef class Pipe:
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/pipe#update
DOCS: https://nightly.spacy.io/api/pipe#update
"""
if losses is None:
losses = {}
@ -132,7 +132,7 @@ cdef class Pipe:
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/pipe#rehearse
DOCS: https://nightly.spacy.io/api/pipe#rehearse
"""
pass
@ -144,7 +144,7 @@ cdef class Pipe:
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/pipe#get_loss
DOCS: https://nightly.spacy.io/api/pipe#get_loss
"""
raise NotImplementedError(Errors.E931.format(method="get_loss", name=self.name))
@ -156,7 +156,7 @@ cdef class Pipe:
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://spacy.io/api/pipe#add_label
DOCS: https://nightly.spacy.io/api/pipe#add_label
"""
raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
@ -165,7 +165,7 @@ cdef class Pipe:
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/pipe#create_optimizer
DOCS: https://nightly.spacy.io/api/pipe#create_optimizer
"""
return util.create_default_optimizer()
@ -181,7 +181,7 @@ cdef class Pipe:
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/pipe#begin_training
DOCS: https://nightly.spacy.io/api/pipe#begin_training
"""
self.model.initialize()
if sgd is None:
@ -200,7 +200,7 @@ cdef class Pipe:
params (dict): The parameter values to use in the model.
DOCS: https://spacy.io/api/pipe#use_params
DOCS: https://nightly.spacy.io/api/pipe#use_params
"""
with self.model.use_params(params):
yield
@ -211,7 +211,7 @@ cdef class Pipe:
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores.
DOCS: https://spacy.io/api/pipe#score
DOCS: https://nightly.spacy.io/api/pipe#score
"""
return {}
@ -221,7 +221,7 @@ cdef class Pipe:
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/pipe#to_bytes
DOCS: https://nightly.spacy.io/api/pipe#to_bytes
"""
serialize = {}
serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
@ -236,7 +236,7 @@ cdef class Pipe:
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Pipe): The loaded object.
DOCS: https://spacy.io/api/pipe#from_bytes
DOCS: https://nightly.spacy.io/api/pipe#from_bytes
"""
def load_model(b):
@ -259,7 +259,7 @@ cdef class Pipe:
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/pipe#to_disk
DOCS: https://nightly.spacy.io/api/pipe#to_disk
"""
serialize = {}
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
@ -274,7 +274,7 @@ cdef class Pipe:
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Pipe): The loaded object.
DOCS: https://spacy.io/api/pipe#from_disk
DOCS: https://nightly.spacy.io/api/pipe#from_disk
"""
def load_model(p):

View File

@ -29,7 +29,7 @@ def make_sentencizer(
class Sentencizer(Pipe):
"""Segment the Doc into sentences using a rule-based strategy.
DOCS: https://spacy.io/api/sentencizer
DOCS: https://nightly.spacy.io/api/sentencizer
"""
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
@ -51,7 +51,7 @@ class Sentencizer(Pipe):
serialized with the nlp object.
RETURNS (Sentencizer): The sentencizer component.
DOCS: https://spacy.io/api/sentencizer#init
DOCS: https://nightly.spacy.io/api/sentencizer#init
"""
self.name = name
if punct_chars:
@ -68,7 +68,7 @@ class Sentencizer(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/sentencizer#call
DOCS: https://nightly.spacy.io/api/sentencizer#call
"""
start = 0
seen_period = False
@ -94,7 +94,7 @@ class Sentencizer(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/sentencizer#pipe
DOCS: https://nightly.spacy.io/api/sentencizer#pipe
"""
for docs in util.minibatch(stream, size=batch_size):
predictions = self.predict(docs)
@ -157,7 +157,7 @@ class Sentencizer(Pipe):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://spacy.io/api/sentencizer#score
DOCS: https://nightly.spacy.io/api/sentencizer#score
"""
validate_examples(examples, "Sentencizer.score")
results = Scorer.score_spans(examples, "sents", **kwargs)
@ -169,7 +169,7 @@ class Sentencizer(Pipe):
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/sentencizer#to_bytes
DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes
"""
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
@ -179,7 +179,7 @@ class Sentencizer(Pipe):
bytes_data (bytes): The data to load.
returns (Sentencizer): The loaded object.
DOCS: https://spacy.io/api/sentencizer#from_bytes
DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes
"""
cfg = srsly.msgpack_loads(bytes_data)
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
@ -188,7 +188,7 @@ class Sentencizer(Pipe):
def to_disk(self, path, *, exclude=tuple()):
"""Serialize the sentencizer to disk.
DOCS: https://spacy.io/api/sentencizer#to_disk
DOCS: https://nightly.spacy.io/api/sentencizer#to_disk
"""
path = util.ensure_path(path)
path = path.with_suffix(".json")
@ -198,7 +198,7 @@ class Sentencizer(Pipe):
def from_disk(self, path, *, exclude=tuple()):
"""Load the sentencizer from disk.
DOCS: https://spacy.io/api/sentencizer#from_disk
DOCS: https://nightly.spacy.io/api/sentencizer#from_disk
"""
path = util.ensure_path(path)
path = path.with_suffix(".json")

View File

@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
class SentenceRecognizer(Tagger):
"""Pipeline component for sentence segmentation.
DOCS: https://spacy.io/api/sentencerecognizer
DOCS: https://nightly.spacy.io/api/sentencerecognizer
"""
def __init__(self, vocab, model, name="senter"):
"""Initialize a sentence recognizer.
@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
name (str): The component instance name, used to add entries to the
losses during training.
DOCS: https://spacy.io/api/sentencerecognizer#init
DOCS: https://nightly.spacy.io/api/sentencerecognizer#init
"""
self.vocab = vocab
self.model = model
@ -76,7 +76,7 @@ class SentenceRecognizer(Tagger):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -101,7 +101,7 @@ class SentenceRecognizer(Tagger):
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/sentencerecognizer#get_loss
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
"""
validate_examples(examples, "SentenceRecognizer.get_loss")
labels = self.labels
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/sentencerecognizer#begin_training
DOCS: https://nightly.spacy.io/api/sentencerecognizer#begin_training
"""
self.set_output(len(self.labels))
self.model.initialize()
@ -151,7 +151,7 @@ class SentenceRecognizer(Tagger):
examples (Iterable[Example]): The examples to score.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
DOCS: https://spacy.io/api/sentencerecognizer#score
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
"""
validate_examples(examples, "SentenceRecognizer.score")
results = Scorer.score_spans(examples, "sents", **kwargs)
@ -164,7 +164,7 @@ class SentenceRecognizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/sentencerecognizer#to_bytes
DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_bytes
"""
serialize = {}
serialize["model"] = self.model.to_bytes
@ -179,7 +179,7 @@ class SentenceRecognizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Tagger): The loaded SentenceRecognizer.
DOCS: https://spacy.io/api/sentencerecognizer#from_bytes
DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_bytes
"""
def load_model(b):
try:
@ -201,7 +201,7 @@ class SentenceRecognizer(Tagger):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/sentencerecognizer#to_disk
DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_disk
"""
serialize = {
"vocab": lambda p: self.vocab.to_disk(p),
@ -217,7 +217,7 @@ class SentenceRecognizer(Tagger):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Tagger): The modified SentenceRecognizer object.
DOCS: https://spacy.io/api/sentencerecognizer#from_disk
DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_disk
"""
def load_model(p):
with p.open("rb") as file_:

View File

@ -78,7 +78,7 @@ class SimpleNER(Pipe):
def add_label(self, label: str) -> None:
"""Add a new label to the pipe.
label (str): The label to add.
DOCS: https://spacy.io/api/simplener#add_label
DOCS: https://nightly.spacy.io/api/simplener#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)

View File

@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
class Tagger(Pipe):
"""Pipeline component for part-of-speech tagging.
DOCS: https://spacy.io/api/tagger
DOCS: https://nightly.spacy.io/api/tagger
"""
def __init__(self, vocab, model, name="tagger", *, labels=None):
"""Initialize a part-of-speech tagger.
@ -69,7 +69,7 @@ class Tagger(Pipe):
losses during training.
labels (List): The set of labels. Defaults to None.
DOCS: https://spacy.io/api/tagger#init
DOCS: https://nightly.spacy.io/api/tagger#init
"""
self.vocab = vocab
self.model = model
@ -86,7 +86,7 @@ class Tagger(Pipe):
RETURNS (Tuple[str]): The labels.
DOCS: https://spacy.io/api/tagger#labels
DOCS: https://nightly.spacy.io/api/tagger#labels
"""
return tuple(self.cfg["labels"])
@ -96,7 +96,7 @@ class Tagger(Pipe):
doc (Doc): The document to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/tagger#call
DOCS: https://nightly.spacy.io/api/tagger#call
"""
tags = self.predict([doc])
self.set_annotations([doc], tags)
@ -111,7 +111,7 @@ class Tagger(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/tagger#pipe
DOCS: https://nightly.spacy.io/api/tagger#pipe
"""
for docs in util.minibatch(stream, size=batch_size):
tag_ids = self.predict(docs)
@ -124,7 +124,7 @@ class Tagger(Pipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document.
DOCS: https://spacy.io/api/tagger#predict
DOCS: https://nightly.spacy.io/api/tagger#predict
"""
if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs.
@ -153,7 +153,7 @@ class Tagger(Pipe):
docs (Iterable[Doc]): The documents to modify.
batch_tag_ids: The IDs to set, produced by Tagger.predict.
DOCS: https://spacy.io/api/tagger#set_annotations
DOCS: https://nightly.spacy.io/api/tagger#set_annotations
"""
if isinstance(docs, Doc):
docs = [docs]
@ -182,7 +182,7 @@ class Tagger(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/tagger#update
DOCS: https://nightly.spacy.io/api/tagger#update
"""
if losses is None:
losses = {}
@ -220,7 +220,7 @@ class Tagger(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/tagger#rehearse
DOCS: https://nightly.spacy.io/api/tagger#rehearse
"""
validate_examples(examples, "Tagger.rehearse")
docs = [eg.predicted for eg in examples]
@ -247,7 +247,7 @@ class Tagger(Pipe):
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/tagger#get_loss
DOCS: https://nightly.spacy.io/api/tagger#get_loss
"""
validate_examples(examples, "Tagger.get_loss")
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
@ -269,7 +269,7 @@ class Tagger(Pipe):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/tagger#begin_training
DOCS: https://nightly.spacy.io/api/tagger#begin_training
"""
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="Tagger", obj=type(get_examples))
@ -307,7 +307,7 @@ class Tagger(Pipe):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://spacy.io/api/tagger#add_label
DOCS: https://nightly.spacy.io/api/tagger#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -324,7 +324,7 @@ class Tagger(Pipe):
RETURNS (Dict[str, Any]): The scores, produced by
Scorer.score_token_attr for the attributes "tag".
DOCS: https://spacy.io/api/tagger#score
DOCS: https://nightly.spacy.io/api/tagger#score
"""
validate_examples(examples, "Tagger.score")
return Scorer.score_token_attr(examples, "tag", **kwargs)
@ -335,7 +335,7 @@ class Tagger(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): The serialized object.
DOCS: https://spacy.io/api/tagger#to_bytes
DOCS: https://nightly.spacy.io/api/tagger#to_bytes
"""
serialize = {}
serialize["model"] = self.model.to_bytes
@ -350,7 +350,7 @@ class Tagger(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Tagger): The loaded Tagger.
DOCS: https://spacy.io/api/tagger#from_bytes
DOCS: https://nightly.spacy.io/api/tagger#from_bytes
"""
def load_model(b):
try:
@ -372,7 +372,7 @@ class Tagger(Pipe):
path (str / Path): Path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/tagger#to_disk
DOCS: https://nightly.spacy.io/api/tagger#to_disk
"""
serialize = {
"vocab": lambda p: self.vocab.to_disk(p),
@ -388,7 +388,7 @@ class Tagger(Pipe):
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Tagger): The modified Tagger object.
DOCS: https://spacy.io/api/tagger#from_disk
DOCS: https://nightly.spacy.io/api/tagger#from_disk
"""
def load_model(p):
with p.open("rb") as file_:

View File

@ -92,7 +92,7 @@ def make_textcat(
class TextCategorizer(Pipe):
"""Pipeline component for text classification.
DOCS: https://spacy.io/api/textcategorizer
DOCS: https://nightly.spacy.io/api/textcategorizer
"""
def __init__(
@ -111,7 +111,7 @@ class TextCategorizer(Pipe):
losses during training.
labels (Iterable[str]): The labels to use.
DOCS: https://spacy.io/api/textcategorizer#init
DOCS: https://nightly.spacy.io/api/textcategorizer#init
"""
self.vocab = vocab
self.model = model
@ -124,7 +124,7 @@ class TextCategorizer(Pipe):
def labels(self) -> Tuple[str]:
"""RETURNS (Tuple[str]): The labels currently added to the component.
DOCS: https://spacy.io/api/textcategorizer#labels
DOCS: https://nightly.spacy.io/api/textcategorizer#labels
"""
return tuple(self.cfg.setdefault("labels", []))
@ -146,7 +146,7 @@ class TextCategorizer(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/textcategorizer#pipe
DOCS: https://nightly.spacy.io/api/textcategorizer#pipe
"""
for docs in util.minibatch(stream, size=batch_size):
scores = self.predict(docs)
@ -159,7 +159,7 @@ class TextCategorizer(Pipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: The models prediction for each document.
DOCS: https://spacy.io/api/textcategorizer#predict
DOCS: https://nightly.spacy.io/api/textcategorizer#predict
"""
tensors = [doc.tensor for doc in docs]
if not any(len(doc) for doc in docs):
@ -177,7 +177,7 @@ class TextCategorizer(Pipe):
docs (Iterable[Doc]): The documents to modify.
scores: The scores to set, produced by TextCategorizer.predict.
DOCS: https://spacy.io/api/textcategorizer#set_annotations
DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations
"""
for i, doc in enumerate(docs):
for j, label in enumerate(self.labels):
@ -204,7 +204,7 @@ class TextCategorizer(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/textcategorizer#update
DOCS: https://nightly.spacy.io/api/textcategorizer#update
"""
if losses is None:
losses = {}
@ -245,7 +245,7 @@ class TextCategorizer(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/textcategorizer#rehearse
DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse
"""
if losses is not None:
losses.setdefault(self.name, 0.0)
@ -289,7 +289,7 @@ class TextCategorizer(Pipe):
scores: Scores representing the model's predictions.
RETUTNRS (Tuple[float, float]): The loss and the gradient.
DOCS: https://spacy.io/api/textcategorizer#get_loss
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
"""
validate_examples(examples, "TextCategorizer.get_loss")
truths, not_missing = self._examples_to_truth(examples)
@ -305,7 +305,7 @@ class TextCategorizer(Pipe):
label (str): The label to add.
RETURNS (int): 0 if label is already present, otherwise 1.
DOCS: https://spacy.io/api/textcategorizer#add_label
DOCS: https://nightly.spacy.io/api/textcategorizer#add_label
"""
if not isinstance(label, str):
raise ValueError(Errors.E187)
@ -343,7 +343,7 @@ class TextCategorizer(Pipe):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/textcategorizer#begin_training
DOCS: https://nightly.spacy.io/api/textcategorizer#begin_training
"""
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples))
@ -378,7 +378,7 @@ class TextCategorizer(Pipe):
positive_label (str): Optional positive label.
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
DOCS: https://spacy.io/api/textcategorizer#score
DOCS: https://nightly.spacy.io/api/textcategorizer#score
"""
validate_examples(examples, "TextCategorizer.score")
return Scorer.score_cats(

View File

@ -56,7 +56,7 @@ class Tok2Vec(Pipe):
a list of Doc objects as input, and output a list of 2d float arrays.
name (str): The component instance name.
DOCS: https://spacy.io/api/tok2vec#init
DOCS: https://nightly.spacy.io/api/tok2vec#init
"""
self.vocab = vocab
self.model = model
@ -91,7 +91,7 @@ class Tok2Vec(Pipe):
docs (Doc): The Doc to process.
RETURNS (Doc): The processed Doc.
DOCS: https://spacy.io/api/tok2vec#call
DOCS: https://nightly.spacy.io/api/tok2vec#call
"""
tokvecses = self.predict([doc])
self.set_annotations([doc], tokvecses)
@ -106,7 +106,7 @@ class Tok2Vec(Pipe):
batch_size (int): The number of documents to buffer.
YIELDS (Doc): Processed documents in order.
DOCS: https://spacy.io/api/tok2vec#pipe
DOCS: https://nightly.spacy.io/api/tok2vec#pipe
"""
for docs in minibatch(stream, batch_size):
docs = list(docs)
@ -121,7 +121,7 @@ class Tok2Vec(Pipe):
docs (Iterable[Doc]): The documents to predict.
RETURNS: Vector representations for each token in the documents.
DOCS: https://spacy.io/api/tok2vec#predict
DOCS: https://nightly.spacy.io/api/tok2vec#predict
"""
tokvecs = self.model.predict(docs)
batch_id = Tok2VecListener.get_batch_id(docs)
@ -135,7 +135,7 @@ class Tok2Vec(Pipe):
docs (Iterable[Doc]): The documents to modify.
tokvecses: The tensors to set, produced by Tok2Vec.predict.
DOCS: https://spacy.io/api/tok2vec#set_annotations
DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations
"""
for doc, tokvecs in zip(docs, tokvecses):
assert tokvecs.shape[0] == len(doc)
@ -162,7 +162,7 @@ class Tok2Vec(Pipe):
Updated using the component name as the key.
RETURNS (Dict[str, float]): The updated losses dictionary.
DOCS: https://spacy.io/api/tok2vec#update
DOCS: https://nightly.spacy.io/api/tok2vec#update
"""
if losses is None:
losses = {}
@ -220,7 +220,7 @@ class Tok2Vec(Pipe):
create_optimizer if it doesn't exist.
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/tok2vec#begin_training
DOCS: https://nightly.spacy.io/api/tok2vec#begin_training
"""
docs = [Doc(self.vocab, words=["hello"])]
self.model.initialize(X=docs)

View File

@ -85,7 +85,7 @@ class Scorer:
) -> None:
"""Initialize the Scorer.
DOCS: https://spacy.io/api/scorer#init
DOCS: https://nightly.spacy.io/api/scorer#init
"""
self.nlp = nlp
self.cfg = cfg
@ -101,7 +101,7 @@ class Scorer:
examples (Iterable[Example]): The predicted annotations + correct annotations.
RETURNS (Dict): A dictionary of scores.
DOCS: https://spacy.io/api/scorer#score
DOCS: https://nightly.spacy.io/api/scorer#score
"""
scores = {}
if hasattr(self.nlp.tokenizer, "score"):
@ -121,7 +121,7 @@ class Scorer:
RETURNS (Dict[str, float]): A dictionary containing the scores
token_acc/p/r/f.
DOCS: https://spacy.io/api/scorer#score_tokenization
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
"""
acc_score = PRFScore()
prf_score = PRFScore()
@ -169,7 +169,7 @@ class Scorer:
RETURNS (Dict[str, float]): A dictionary containing the accuracy score
under the key attr_acc.
DOCS: https://spacy.io/api/scorer#score_token_attr
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
"""
tag_score = PRFScore()
for example in examples:
@ -263,7 +263,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
DOCS: https://spacy.io/api/scorer#score_spans
DOCS: https://nightly.spacy.io/api/scorer#score_spans
"""
score = PRFScore()
score_per_type = dict()
@ -350,7 +350,7 @@ class Scorer:
attr_f_per_type,
attr_auc_per_type
DOCS: https://spacy.io/api/scorer#score_cats
DOCS: https://nightly.spacy.io/api/scorer#score_cats
"""
if threshold is None:
threshold = 0.5 if multi_label else 0.0
@ -467,7 +467,7 @@ class Scorer:
RETURNS (Dict[str, Any]): A dictionary containing the scores:
attr_uas, attr_las, and attr_las_per_type.
DOCS: https://spacy.io/api/scorer#score_deps
DOCS: https://nightly.spacy.io/api/scorer#score_deps
"""
unlabelled = PRFScore()
labelled = PRFScore()

View File

@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
cdef class StringStore:
"""Look up strings by 64-bit hashes.
DOCS: https://spacy.io/api/stringstore
DOCS: https://nightly.spacy.io/api/stringstore
"""
def __init__(self, strings=None, freeze=False):
"""Create the StringStore.

View File

@ -31,7 +31,7 @@ cdef class Tokenizer:
"""Segment text, and create Doc objects with the discovered segment
boundaries.
DOCS: https://spacy.io/api/tokenizer
DOCS: https://nightly.spacy.io/api/tokenizer
"""
def __init__(self, Vocab vocab, rules=None, prefix_search=None,
suffix_search=None, infix_finditer=None, token_match=None,
@ -54,7 +54,7 @@ cdef class Tokenizer:
EXAMPLE:
>>> tokenizer = Tokenizer(nlp.vocab)
DOCS: https://spacy.io/api/tokenizer#init
DOCS: https://nightly.spacy.io/api/tokenizer#init
"""
self.mem = Pool()
self._cache = PreshMap()
@ -147,7 +147,7 @@ cdef class Tokenizer:
string (str): The string to tokenize.
RETURNS (Doc): A container for linguistic annotations.
DOCS: https://spacy.io/api/tokenizer#call
DOCS: https://nightly.spacy.io/api/tokenizer#call
"""
doc = self._tokenize_affixes(string, True)
self._apply_special_cases(doc)
@ -209,7 +209,7 @@ cdef class Tokenizer:
Defaults to 1000.
YIELDS (Doc): A sequence of Doc objects, in order.
DOCS: https://spacy.io/api/tokenizer#pipe
DOCS: https://nightly.spacy.io/api/tokenizer#pipe
"""
for text in texts:
yield self(text)
@ -529,7 +529,7 @@ cdef class Tokenizer:
and `.end()` methods, denoting the placement of internal segment
separators, e.g. hyphens.
DOCS: https://spacy.io/api/tokenizer#find_infix
DOCS: https://nightly.spacy.io/api/tokenizer#find_infix
"""
if self.infix_finditer is None:
return 0
@ -542,7 +542,7 @@ cdef class Tokenizer:
string (str): The string to segment.
RETURNS (int): The length of the prefix if present, otherwise `None`.
DOCS: https://spacy.io/api/tokenizer#find_prefix
DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix
"""
if self.prefix_search is None:
return 0
@ -556,7 +556,7 @@ cdef class Tokenizer:
string (str): The string to segment.
Returns (int): The length of the suffix if present, otherwise `None`.
DOCS: https://spacy.io/api/tokenizer#find_suffix
DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix
"""
if self.suffix_search is None:
return 0
@ -596,7 +596,7 @@ cdef class Tokenizer:
a token and its attributes. The `ORTH` fields of the attributes
must exactly match the string when they are concatenated.
DOCS: https://spacy.io/api/tokenizer#add_special_case
DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case
"""
self._validate_special_case(string, substrings)
substrings = list(substrings)
@ -635,7 +635,7 @@ cdef class Tokenizer:
string (str): The string to tokenize.
RETURNS (list): A list of (pattern_string, token_string) tuples
DOCS: https://spacy.io/api/tokenizer#explain
DOCS: https://nightly.spacy.io/api/tokenizer#explain
"""
prefix_search = self.prefix_search
suffix_search = self.suffix_search
@ -716,7 +716,7 @@ cdef class Tokenizer:
it doesn't exist.
exclude (list): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/tokenizer#to_disk
DOCS: https://nightly.spacy.io/api/tokenizer#to_disk
"""
path = util.ensure_path(path)
with path.open("wb") as file_:
@ -730,7 +730,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The modified `Tokenizer` object.
DOCS: https://spacy.io/api/tokenizer#from_disk
DOCS: https://nightly.spacy.io/api/tokenizer#from_disk
"""
path = util.ensure_path(path)
with path.open("rb") as file_:
@ -744,7 +744,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Tokenizer` object.
DOCS: https://spacy.io/api/tokenizer#to_bytes
DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes
"""
serializers = {
"vocab": lambda: self.vocab.to_bytes(),
@ -764,7 +764,7 @@ cdef class Tokenizer:
exclude (list): String names of serialization fields to exclude.
RETURNS (Tokenizer): The `Tokenizer` object.
DOCS: https://spacy.io/api/tokenizer#from_bytes
DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes
"""
data = {}
deserializers = {

View File

@ -24,8 +24,8 @@ from ..strings import get_string_id
cdef class Retokenizer:
"""Helper class for doc.retokenize() context manager.
DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://spacy.io/usage/linguistic-features#retokenization
DOCS: https://nightly.spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
"""
cdef Doc doc
cdef list merges
@ -47,7 +47,7 @@ cdef class Retokenizer:
span (Span): The span to merge.
attrs (dict): Attributes to set on the merged token.
DOCS: https://spacy.io/api/doc#retokenizer.merge
DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge
"""
if (span.start, span.end) in self._spans_to_merge:
return
@ -73,7 +73,7 @@ cdef class Retokenizer:
attrs (dict): Attributes to set on all split tokens. Attribute names
mapped to list of per-token attribute values.
DOCS: https://spacy.io/api/doc#retokenizer.split
DOCS: https://nightly.spacy.io/api/doc#retokenizer.split
"""
if ''.join(orths) != token.text:
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))

View File

@ -61,7 +61,7 @@ class DocBin:
store_user_data (bool): Whether to include the `Doc.user_data`.
docs (Iterable[Doc]): Docs to add.
DOCS: https://spacy.io/api/docbin#init
DOCS: https://nightly.spacy.io/api/docbin#init
"""
attrs = sorted([intify_attr(attr) for attr in attrs])
self.version = "0.1"
@ -86,7 +86,7 @@ class DocBin:
doc (Doc): The Doc object to add.
DOCS: https://spacy.io/api/docbin#add
DOCS: https://nightly.spacy.io/api/docbin#add
"""
array = doc.to_array(self.attrs)
if len(array.shape) == 1:
@ -115,7 +115,7 @@ class DocBin:
vocab (Vocab): The shared vocab.
YIELDS (Doc): The Doc objects.
DOCS: https://spacy.io/api/docbin#get_docs
DOCS: https://nightly.spacy.io/api/docbin#get_docs
"""
for string in self.strings:
vocab[string]
@ -141,7 +141,7 @@ class DocBin:
other (DocBin): The DocBin to merge into the current bin.
DOCS: https://spacy.io/api/docbin#merge
DOCS: https://nightly.spacy.io/api/docbin#merge
"""
if self.attrs != other.attrs:
raise ValueError(Errors.E166.format(current=self.attrs, other=other.attrs))
@ -158,7 +158,7 @@ class DocBin:
RETURNS (bytes): The serialized DocBin.
DOCS: https://spacy.io/api/docbin#to_bytes
DOCS: https://nightly.spacy.io/api/docbin#to_bytes
"""
for tokens in self.tokens:
assert len(tokens.shape) == 2, tokens.shape # this should never happen
@ -185,7 +185,7 @@ class DocBin:
bytes_data (bytes): The data to load from.
RETURNS (DocBin): The loaded DocBin.
DOCS: https://spacy.io/api/docbin#from_bytes
DOCS: https://nightly.spacy.io/api/docbin#from_bytes
"""
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
self.attrs = msg["attrs"]
@ -211,7 +211,7 @@ class DocBin:
path (str / Path): The file path.
DOCS: https://spacy.io/api/docbin#to_disk
DOCS: https://nightly.spacy.io/api/docbin#to_disk
"""
path = ensure_path(path)
with path.open("wb") as file_:
@ -223,7 +223,7 @@ class DocBin:
path (str / Path): The file path.
RETURNS (DocBin): The loaded DocBin.
DOCS: https://spacy.io/api/docbin#to_disk
DOCS: https://nightly.spacy.io/api/docbin#to_disk
"""
path = ensure_path(path)
with path.open("rb") as file_:

View File

@ -104,7 +104,7 @@ cdef class Doc:
>>> from spacy.tokens import Doc
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
DOCS: https://spacy.io/api/doc
DOCS: https://nightly.spacy.io/api/doc
"""
@classmethod
@ -118,8 +118,8 @@ cdef class Doc:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://spacy.io/api/doc#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://nightly.spacy.io/api/doc#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Doc"))
@ -132,7 +132,7 @@ cdef class Doc:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://spacy.io/api/doc#get_extension
DOCS: https://nightly.spacy.io/api/doc#get_extension
"""
return Underscore.doc_extensions.get(name)
@ -143,7 +143,7 @@ cdef class Doc:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://spacy.io/api/doc#has_extension
DOCS: https://nightly.spacy.io/api/doc#has_extension
"""
return name in Underscore.doc_extensions
@ -155,7 +155,7 @@ cdef class Doc:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://spacy.io/api/doc#remove_extension
DOCS: https://nightly.spacy.io/api/doc#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -173,7 +173,7 @@ cdef class Doc:
it is not. If `None`, defaults to `[True]*len(words)`
user_data (dict or None): Optional extra data to attach to the Doc.
DOCS: https://spacy.io/api/doc#init
DOCS: https://nightly.spacy.io/api/doc#init
"""
self.vocab = vocab
size = max(20, (len(words) if words is not None else 0))
@ -288,7 +288,7 @@ cdef class Doc:
You can use negative indices and open-ended ranges, which have
their normal Python semantics.
DOCS: https://spacy.io/api/doc#getitem
DOCS: https://nightly.spacy.io/api/doc#getitem
"""
if isinstance(i, slice):
start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
@ -305,7 +305,7 @@ cdef class Doc:
than-Python speeds are required, you can instead access the annotations
as a numpy array, or access the underlying C data directly from Cython.
DOCS: https://spacy.io/api/doc#iter
DOCS: https://nightly.spacy.io/api/doc#iter
"""
cdef int i
for i in range(self.length):
@ -316,7 +316,7 @@ cdef class Doc:
RETURNS (int): The number of tokens in the document.
DOCS: https://spacy.io/api/doc#len
DOCS: https://nightly.spacy.io/api/doc#len
"""
return self.length
@ -349,7 +349,7 @@ cdef class Doc:
the span.
RETURNS (Span): The newly constructed object.
DOCS: https://spacy.io/api/doc#char_span
DOCS: https://nightly.spacy.io/api/doc#char_span
"""
if not isinstance(label, int):
label = self.vocab.strings.add(label)
@ -374,7 +374,7 @@ cdef class Doc:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://spacy.io/api/doc#similarity
DOCS: https://nightly.spacy.io/api/doc#similarity
"""
if "similarity" in self.user_hooks:
return self.user_hooks["similarity"](self, other)
@ -407,7 +407,7 @@ cdef class Doc:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://spacy.io/api/doc#has_vector
DOCS: https://nightly.spacy.io/api/doc#has_vector
"""
if "has_vector" in self.user_hooks:
return self.user_hooks["has_vector"](self)
@ -425,7 +425,7 @@ cdef class Doc:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the document's semantics.
DOCS: https://spacy.io/api/doc#vector
DOCS: https://nightly.spacy.io/api/doc#vector
"""
def __get__(self):
if "vector" in self.user_hooks:
@ -453,7 +453,7 @@ cdef class Doc:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/doc#vector_norm
DOCS: https://nightly.spacy.io/api/doc#vector_norm
"""
def __get__(self):
if "vector_norm" in self.user_hooks:
@ -493,7 +493,7 @@ cdef class Doc:
RETURNS (tuple): Entities in the document, one `Span` per entity.
DOCS: https://spacy.io/api/doc#ents
DOCS: https://nightly.spacy.io/api/doc#ents
"""
def __get__(self):
cdef int i
@ -584,7 +584,7 @@ cdef class Doc:
YIELDS (Span): Noun chunks in the document.
DOCS: https://spacy.io/api/doc#noun_chunks
DOCS: https://nightly.spacy.io/api/doc#noun_chunks
"""
# Accumulate the result before beginning to iterate over it. This
@ -609,7 +609,7 @@ cdef class Doc:
YIELDS (Span): Sentences in the document.
DOCS: https://spacy.io/api/doc#sents
DOCS: https://nightly.spacy.io/api/doc#sents
"""
if not self.is_sentenced:
raise ValueError(Errors.E030)
@ -722,7 +722,7 @@ cdef class Doc:
attr_id (int): The attribute ID to key the counts.
RETURNS (dict): A dictionary mapping attributes to integer counts.
DOCS: https://spacy.io/api/doc#count_by
DOCS: https://nightly.spacy.io/api/doc#count_by
"""
cdef int i
cdef attr_t attr
@ -777,7 +777,7 @@ cdef class Doc:
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_array
DOCS: https://nightly.spacy.io/api/doc#from_array
"""
# Handle scalar/list inputs of strings/ints for py_attr_ids
# See also #3064
@ -872,7 +872,7 @@ cdef class Doc:
attrs (list): Optional list of attribute ID ints or attribute name strings.
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
DOCS: https://spacy.io/api/doc#from_docs
DOCS: https://nightly.spacy.io/api/doc#from_docs
"""
if not docs:
return None
@ -953,7 +953,7 @@ cdef class Doc:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self).
DOCS: https://spacy.io/api/doc#get_lca_matrix
DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix
"""
return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
@ -987,7 +987,7 @@ cdef class Doc:
it doesn't exist. Paths may be either strings or Path-like objects.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/doc#to_disk
DOCS: https://nightly.spacy.io/api/doc#to_disk
"""
path = util.ensure_path(path)
with path.open("wb") as file_:
@ -1002,7 +1002,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): The modified `Doc` object.
DOCS: https://spacy.io/api/doc#from_disk
DOCS: https://nightly.spacy.io/api/doc#from_disk
"""
path = util.ensure_path(path)
with path.open("rb") as file_:
@ -1016,7 +1016,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations.
DOCS: https://spacy.io/api/doc#to_bytes
DOCS: https://nightly.spacy.io/api/doc#to_bytes
"""
return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
@ -1027,7 +1027,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_bytes
DOCS: https://nightly.spacy.io/api/doc#from_bytes
"""
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
@ -1038,7 +1038,7 @@ cdef class Doc:
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations.
DOCS: https://spacy.io/api/doc#to_bytes
DOCS: https://nightly.spacy.io/api/doc#to_bytes
"""
array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, NORM, ENT_KB_ID]
if self.is_tagged:
@ -1086,7 +1086,7 @@ cdef class Doc:
exclude (list): String names of serialization fields to exclude.
RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_dict
DOCS: https://nightly.spacy.io/api/doc#from_dict
"""
if self.length != 0:
raise ValueError(Errors.E033.format(length=self.length))
@ -1166,8 +1166,8 @@ cdef class Doc:
retokenization are invalidated, although they may accidentally
continue to work.
DOCS: https://spacy.io/api/doc#retokenize
USAGE: https://spacy.io/usage/linguistic-features#retokenization
DOCS: https://nightly.spacy.io/api/doc#retokenize
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
"""
return Retokenizer(self)
@ -1202,7 +1202,7 @@ cdef class Doc:
be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
RETURNS (dict): The data in spaCy's JSON format.
DOCS: https://spacy.io/api/doc#to_json
DOCS: https://nightly.spacy.io/api/doc#to_json
"""
data = {"text": self.text}
if self.is_nered:

View File

@ -27,7 +27,7 @@ from .underscore import Underscore, get_ext_args
cdef class Span:
"""A slice from a Doc object.
DOCS: https://spacy.io/api/span
DOCS: https://nightly.spacy.io/api/span
"""
@classmethod
def set_extension(cls, name, **kwargs):
@ -40,8 +40,8 @@ cdef class Span:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://spacy.io/api/span#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://nightly.spacy.io/api/span#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Span"))
@ -54,7 +54,7 @@ cdef class Span:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://spacy.io/api/span#get_extension
DOCS: https://nightly.spacy.io/api/span#get_extension
"""
return Underscore.span_extensions.get(name)
@ -65,7 +65,7 @@ cdef class Span:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://spacy.io/api/span#has_extension
DOCS: https://nightly.spacy.io/api/span#has_extension
"""
return name in Underscore.span_extensions
@ -77,7 +77,7 @@ cdef class Span:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://spacy.io/api/span#remove_extension
DOCS: https://nightly.spacy.io/api/span#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -95,7 +95,7 @@ cdef class Span:
vector (ndarray[ndim=1, dtype='float32']): A meaning representation
of the span.
DOCS: https://spacy.io/api/span#init
DOCS: https://nightly.spacy.io/api/span#init
"""
if not (0 <= start <= end <= len(doc)):
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
@ -151,7 +151,7 @@ cdef class Span:
RETURNS (int): The number of tokens in the span.
DOCS: https://spacy.io/api/span#len
DOCS: https://nightly.spacy.io/api/span#len
"""
self._recalculate_indices()
if self.end < self.start:
@ -168,7 +168,7 @@ cdef class Span:
the span to get.
RETURNS (Token or Span): The token at `span[i]`.
DOCS: https://spacy.io/api/span#getitem
DOCS: https://nightly.spacy.io/api/span#getitem
"""
self._recalculate_indices()
if isinstance(i, slice):
@ -189,7 +189,7 @@ cdef class Span:
YIELDS (Token): A `Token` object.
DOCS: https://spacy.io/api/span#iter
DOCS: https://nightly.spacy.io/api/span#iter
"""
self._recalculate_indices()
for i in range(self.start, self.end):
@ -210,7 +210,7 @@ cdef class Span:
copy_user_data (bool): Whether or not to copy the original doc's user data.
RETURNS (Doc): The `Doc` copy of the span.
DOCS: https://spacy.io/api/span#as_doc
DOCS: https://nightly.spacy.io/api/span#as_doc
"""
# TODO: make copy_user_data a keyword-only argument (Python 3 only)
words = [t.text for t in self]
@ -292,7 +292,7 @@ cdef class Span:
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
(n, n), where n = len(self).
DOCS: https://spacy.io/api/span#get_lca_matrix
DOCS: https://nightly.spacy.io/api/span#get_lca_matrix
"""
return numpy.asarray(_get_lca_matrix(self.doc, self.start, self.end))
@ -304,7 +304,7 @@ cdef class Span:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://spacy.io/api/span#similarity
DOCS: https://nightly.spacy.io/api/span#similarity
"""
if "similarity" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["similarity"](self, other)
@ -400,7 +400,7 @@ cdef class Span:
RETURNS (tuple): Entities in the span, one `Span` per entity.
DOCS: https://spacy.io/api/span#ents
DOCS: https://nightly.spacy.io/api/span#ents
"""
ents = []
for ent in self.doc.ents:
@ -415,7 +415,7 @@ cdef class Span:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://spacy.io/api/span#has_vector
DOCS: https://nightly.spacy.io/api/span#has_vector
"""
if "has_vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["has_vector"](self)
@ -434,7 +434,7 @@ cdef class Span:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the span's semantics.
DOCS: https://spacy.io/api/span#vector
DOCS: https://nightly.spacy.io/api/span#vector
"""
if "vector" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self)
@ -448,7 +448,7 @@ cdef class Span:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/span#vector_norm
DOCS: https://nightly.spacy.io/api/span#vector_norm
"""
if "vector_norm" in self.doc.user_span_hooks:
return self.doc.user_span_hooks["vector"](self)
@ -508,7 +508,7 @@ cdef class Span:
YIELDS (Span): Base noun-phrase `Span` objects.
DOCS: https://spacy.io/api/span#noun_chunks
DOCS: https://nightly.spacy.io/api/span#noun_chunks
"""
if not self.doc.is_parsed:
raise ValueError(Errors.E029)
@ -533,7 +533,7 @@ cdef class Span:
RETURNS (Token): The root token.
DOCS: https://spacy.io/api/span#root
DOCS: https://nightly.spacy.io/api/span#root
"""
self._recalculate_indices()
if "root" in self.doc.user_span_hooks:
@ -590,7 +590,7 @@ cdef class Span:
RETURNS (tuple): A tuple of Token objects.
DOCS: https://spacy.io/api/span#lefts
DOCS: https://nightly.spacy.io/api/span#lefts
"""
return self.root.conjuncts
@ -601,7 +601,7 @@ cdef class Span:
YIELDS (Token):A left-child of a token of the span.
DOCS: https://spacy.io/api/span#lefts
DOCS: https://nightly.spacy.io/api/span#lefts
"""
for token in reversed(self): # Reverse, so we get tokens in order
for left in token.lefts:
@ -615,7 +615,7 @@ cdef class Span:
YIELDS (Token): A right-child of a token of the span.
DOCS: https://spacy.io/api/span#rights
DOCS: https://nightly.spacy.io/api/span#rights
"""
for token in self:
for right in token.rights:
@ -630,7 +630,7 @@ cdef class Span:
RETURNS (int): The number of leftward immediate children of the
span, in the syntactic dependency parse.
DOCS: https://spacy.io/api/span#n_lefts
DOCS: https://nightly.spacy.io/api/span#n_lefts
"""
return len(list(self.lefts))
@ -642,7 +642,7 @@ cdef class Span:
RETURNS (int): The number of rightward immediate children of the
span, in the syntactic dependency parse.
DOCS: https://spacy.io/api/span#n_rights
DOCS: https://nightly.spacy.io/api/span#n_rights
"""
return len(list(self.rights))
@ -652,7 +652,7 @@ cdef class Span:
YIELDS (Token): A token within the span, or a descendant from it.
DOCS: https://spacy.io/api/span#subtree
DOCS: https://nightly.spacy.io/api/span#subtree
"""
for word in self.lefts:
yield from word.subtree

View File

@ -30,7 +30,7 @@ cdef class Token:
"""An individual token i.e. a word, punctuation symbol, whitespace,
etc.
DOCS: https://spacy.io/api/token
DOCS: https://nightly.spacy.io/api/token
"""
@classmethod
def set_extension(cls, name, **kwargs):
@ -43,8 +43,8 @@ cdef class Token:
method (callable): Optional method for method extension.
force (bool): Force overwriting existing attribute.
DOCS: https://spacy.io/api/token#set_extension
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
DOCS: https://nightly.spacy.io/api/token#set_extension
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
"""
if cls.has_extension(name) and not kwargs.get("force", False):
raise ValueError(Errors.E090.format(name=name, obj="Token"))
@ -57,7 +57,7 @@ cdef class Token:
name (str): Name of the extension.
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
DOCS: https://spacy.io/api/token#get_extension
DOCS: https://nightly.spacy.io/api/token#get_extension
"""
return Underscore.token_extensions.get(name)
@ -68,7 +68,7 @@ cdef class Token:
name (str): Name of the extension.
RETURNS (bool): Whether the extension has been registered.
DOCS: https://spacy.io/api/token#has_extension
DOCS: https://nightly.spacy.io/api/token#has_extension
"""
return name in Underscore.token_extensions
@ -80,7 +80,7 @@ cdef class Token:
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
removed extension.
DOCS: https://spacy.io/api/token#remove_extension
DOCS: https://nightly.spacy.io/api/token#remove_extension
"""
if not cls.has_extension(name):
raise ValueError(Errors.E046.format(name=name))
@ -93,7 +93,7 @@ cdef class Token:
doc (Doc): The parent document.
offset (int): The index of the token within the document.
DOCS: https://spacy.io/api/token#init
DOCS: https://nightly.spacy.io/api/token#init
"""
self.vocab = vocab
self.doc = doc
@ -108,7 +108,7 @@ cdef class Token:
RETURNS (int): The number of unicode characters in the token.
DOCS: https://spacy.io/api/token#len
DOCS: https://nightly.spacy.io/api/token#len
"""
return self.c.lex.length
@ -171,7 +171,7 @@ cdef class Token:
flag_id (int): The ID of the flag attribute.
RETURNS (bool): Whether the flag is set.
DOCS: https://spacy.io/api/token#check_flag
DOCS: https://nightly.spacy.io/api/token#check_flag
"""
return Lexeme.c_check_flag(self.c.lex, flag_id)
@ -181,7 +181,7 @@ cdef class Token:
i (int): The relative position of the token to get. Defaults to 1.
RETURNS (Token): The token at position `self.doc[self.i+i]`.
DOCS: https://spacy.io/api/token#nbor
DOCS: https://nightly.spacy.io/api/token#nbor
"""
if self.i+i < 0 or (self.i+i >= len(self.doc)):
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
@ -195,7 +195,7 @@ cdef class Token:
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
DOCS: https://spacy.io/api/token#similarity
DOCS: https://nightly.spacy.io/api/token#similarity
"""
if "similarity" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["similarity"](self, other)
@ -373,7 +373,7 @@ cdef class Token:
RETURNS (bool): Whether a word vector is associated with the object.
DOCS: https://spacy.io/api/token#has_vector
DOCS: https://nightly.spacy.io/api/token#has_vector
"""
if "has_vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["has_vector"](self)
@ -388,7 +388,7 @@ cdef class Token:
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the token's semantics.
DOCS: https://spacy.io/api/token#vector
DOCS: https://nightly.spacy.io/api/token#vector
"""
if "vector" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector"](self)
@ -403,7 +403,7 @@ cdef class Token:
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/token#vector_norm
DOCS: https://nightly.spacy.io/api/token#vector_norm
"""
if "vector_norm" in self.doc.user_token_hooks:
return self.doc.user_token_hooks["vector_norm"](self)
@ -426,7 +426,7 @@ cdef class Token:
RETURNS (int): The number of leftward immediate children of the
word, in the syntactic dependency parse.
DOCS: https://spacy.io/api/token#n_lefts
DOCS: https://nightly.spacy.io/api/token#n_lefts
"""
return self.c.l_kids
@ -438,7 +438,7 @@ cdef class Token:
RETURNS (int): The number of rightward immediate children of the
word, in the syntactic dependency parse.
DOCS: https://spacy.io/api/token#n_rights
DOCS: https://nightly.spacy.io/api/token#n_rights
"""
return self.c.r_kids
@ -470,7 +470,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token starts a sentence.
None if unknown.
DOCS: https://spacy.io/api/token#is_sent_start
DOCS: https://nightly.spacy.io/api/token#is_sent_start
"""
def __get__(self):
if self.c.sent_start == 0:
@ -499,7 +499,7 @@ cdef class Token:
RETURNS (bool / None): Whether the token ends a sentence.
None if unknown.
DOCS: https://spacy.io/api/token#is_sent_end
DOCS: https://nightly.spacy.io/api/token#is_sent_end
"""
def __get__(self):
if self.i + 1 == len(self.doc):
@ -521,7 +521,7 @@ cdef class Token:
YIELDS (Token): A left-child of the token.
DOCS: https://spacy.io/api/token#lefts
DOCS: https://nightly.spacy.io/api/token#lefts
"""
cdef int nr_iter = 0
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
@ -541,7 +541,7 @@ cdef class Token:
YIELDS (Token): A right-child of the token.
DOCS: https://spacy.io/api/token#rights
DOCS: https://nightly.spacy.io/api/token#rights
"""
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
tokens = []
@ -563,7 +563,7 @@ cdef class Token:
YIELDS (Token): A child token such that `child.head==self`.
DOCS: https://spacy.io/api/token#children
DOCS: https://nightly.spacy.io/api/token#children
"""
yield from self.lefts
yield from self.rights
@ -576,7 +576,7 @@ cdef class Token:
YIELDS (Token): A descendent token such that
`self.is_ancestor(descendent) or token == self`.
DOCS: https://spacy.io/api/token#subtree
DOCS: https://nightly.spacy.io/api/token#subtree
"""
for word in self.lefts:
yield from word.subtree
@ -607,7 +607,7 @@ cdef class Token:
YIELDS (Token): A sequence of ancestor tokens such that
`ancestor.is_ancestor(self)`.
DOCS: https://spacy.io/api/token#ancestors
DOCS: https://nightly.spacy.io/api/token#ancestors
"""
cdef const TokenC* head_ptr = self.c
# Guard against infinite loop, no token can have
@ -625,7 +625,7 @@ cdef class Token:
descendant (Token): Another token.
RETURNS (bool): Whether this token is the ancestor of the descendant.
DOCS: https://spacy.io/api/token#is_ancestor
DOCS: https://nightly.spacy.io/api/token#is_ancestor
"""
if self.doc is not descendant.doc:
return False
@ -729,7 +729,7 @@ cdef class Token:
RETURNS (tuple): The coordinated tokens.
DOCS: https://spacy.io/api/token#conjuncts
DOCS: https://nightly.spacy.io/api/token#conjuncts
"""
cdef Token word, child
if "conjuncts" in self.doc.user_token_hooks:

View File

@ -44,7 +44,7 @@ cdef class Vectors:
the table need to be assigned - so len(list(vectors.keys())) may be
greater or smaller than vectors.shape[0].
DOCS: https://spacy.io/api/vectors
DOCS: https://nightly.spacy.io/api/vectors
"""
cdef public object name
cdef public object data
@ -59,7 +59,7 @@ cdef class Vectors:
keys (iterable): A sequence of keys, aligned with the data.
name (str): A name to identify the vectors table.
DOCS: https://spacy.io/api/vectors#init
DOCS: https://nightly.spacy.io/api/vectors#init
"""
self.name = name
if data is None:
@ -83,7 +83,7 @@ cdef class Vectors:
RETURNS (tuple): A `(rows, dims)` pair.
DOCS: https://spacy.io/api/vectors#shape
DOCS: https://nightly.spacy.io/api/vectors#shape
"""
return self.data.shape
@ -93,7 +93,7 @@ cdef class Vectors:
RETURNS (int): The vector size.
DOCS: https://spacy.io/api/vectors#size
DOCS: https://nightly.spacy.io/api/vectors#size
"""
return self.data.shape[0] * self.data.shape[1]
@ -103,7 +103,7 @@ cdef class Vectors:
RETURNS (bool): `True` if no slots are available for new keys.
DOCS: https://spacy.io/api/vectors#is_full
DOCS: https://nightly.spacy.io/api/vectors#is_full
"""
return self._unset.size() == 0
@ -114,7 +114,7 @@ cdef class Vectors:
RETURNS (int): The number of keys in the table.
DOCS: https://spacy.io/api/vectors#n_keys
DOCS: https://nightly.spacy.io/api/vectors#n_keys
"""
return len(self.key2row)
@ -127,7 +127,7 @@ cdef class Vectors:
key (int): The key to get the vector for.
RETURNS (ndarray): The vector for the key.
DOCS: https://spacy.io/api/vectors#getitem
DOCS: https://nightly.spacy.io/api/vectors#getitem
"""
i = self.key2row[key]
if i is None:
@ -141,7 +141,7 @@ cdef class Vectors:
key (int): The key to set the vector for.
vector (ndarray): The vector to set.
DOCS: https://spacy.io/api/vectors#setitem
DOCS: https://nightly.spacy.io/api/vectors#setitem
"""
i = self.key2row[key]
self.data[i] = vector
@ -153,7 +153,7 @@ cdef class Vectors:
YIELDS (int): A key in the table.
DOCS: https://spacy.io/api/vectors#iter
DOCS: https://nightly.spacy.io/api/vectors#iter
"""
yield from self.key2row
@ -162,7 +162,7 @@ cdef class Vectors:
RETURNS (int): The number of vectors in the data.
DOCS: https://spacy.io/api/vectors#len
DOCS: https://nightly.spacy.io/api/vectors#len
"""
return self.data.shape[0]
@ -172,7 +172,7 @@ cdef class Vectors:
key (int): The key to check.
RETURNS (bool): Whether the key has a vector entry.
DOCS: https://spacy.io/api/vectors#contains
DOCS: https://nightly.spacy.io/api/vectors#contains
"""
return key in self.key2row
@ -189,7 +189,7 @@ cdef class Vectors:
inplace (bool): Reallocate the memory.
RETURNS (list): The removed items as a list of `(key, row)` tuples.
DOCS: https://spacy.io/api/vectors#resize
DOCS: https://nightly.spacy.io/api/vectors#resize
"""
xp = get_array_module(self.data)
if inplace:
@ -224,7 +224,7 @@ cdef class Vectors:
YIELDS (ndarray): A vector in the table.
DOCS: https://spacy.io/api/vectors#values
DOCS: https://nightly.spacy.io/api/vectors#values
"""
for row, vector in enumerate(range(self.data.shape[0])):
if not self._unset.count(row):
@ -235,7 +235,7 @@ cdef class Vectors:
YIELDS (tuple): A key/vector pair.
DOCS: https://spacy.io/api/vectors#items
DOCS: https://nightly.spacy.io/api/vectors#items
"""
for key, row in self.key2row.items():
yield key, self.data[row]
@ -281,7 +281,7 @@ cdef class Vectors:
row (int / None): The row number of a vector to map the key to.
RETURNS (int): The row the vector was added to.
DOCS: https://spacy.io/api/vectors#add
DOCS: https://nightly.spacy.io/api/vectors#add
"""
# use int for all keys and rows in key2row for more efficient access
# and serialization
@ -368,7 +368,7 @@ cdef class Vectors:
path (str / Path): A path to a directory, which will be created if
it doesn't exists.
DOCS: https://spacy.io/api/vectors#to_disk
DOCS: https://nightly.spacy.io/api/vectors#to_disk
"""
xp = get_array_module(self.data)
if xp is numpy:
@ -396,7 +396,7 @@ cdef class Vectors:
path (str / Path): Directory path, string or Path-like object.
RETURNS (Vectors): The modified object.
DOCS: https://spacy.io/api/vectors#from_disk
DOCS: https://nightly.spacy.io/api/vectors#from_disk
"""
def load_key2row(path):
if path.exists():
@ -432,7 +432,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vectors` object.
DOCS: https://spacy.io/api/vectors#to_bytes
DOCS: https://nightly.spacy.io/api/vectors#to_bytes
"""
def serialize_weights():
if hasattr(self.data, "to_bytes"):
@ -453,7 +453,7 @@ cdef class Vectors:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vectors): The `Vectors` object.
DOCS: https://spacy.io/api/vectors#from_bytes
DOCS: https://nightly.spacy.io/api/vectors#from_bytes
"""
def deserialize_weights(b):
if hasattr(self.data, "from_bytes"):

View File

@ -54,7 +54,7 @@ cdef class Vocab:
instance also provides access to the `StringStore`, and owns underlying
C-data that is shared between `Doc` objects.
DOCS: https://spacy.io/api/vocab
DOCS: https://nightly.spacy.io/api/vocab
"""
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
oov_prob=-20., vectors_name=None, writing_system={},
@ -117,7 +117,7 @@ cdef class Vocab:
available bit will be chosen.
RETURNS (int): The integer ID by which the flag value can be checked.
DOCS: https://spacy.io/api/vocab#add_flag
DOCS: https://nightly.spacy.io/api/vocab#add_flag
"""
if flag_id == -1:
for bit in range(1, 64):
@ -201,7 +201,7 @@ cdef class Vocab:
string (unicode): The ID string.
RETURNS (bool) Whether the string has an entry in the vocabulary.
DOCS: https://spacy.io/api/vocab#contains
DOCS: https://nightly.spacy.io/api/vocab#contains
"""
cdef hash_t int_key
if isinstance(key, bytes):
@ -218,7 +218,7 @@ cdef class Vocab:
YIELDS (Lexeme): An entry in the vocabulary.
DOCS: https://spacy.io/api/vocab#iter
DOCS: https://nightly.spacy.io/api/vocab#iter
"""
cdef attr_t key
cdef size_t addr
@ -241,7 +241,7 @@ cdef class Vocab:
>>> apple = nlp.vocab.strings["apple"]
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
DOCS: https://spacy.io/api/vocab#getitem
DOCS: https://nightly.spacy.io/api/vocab#getitem
"""
cdef attr_t orth
if isinstance(id_or_string, unicode):
@ -309,7 +309,7 @@ cdef class Vocab:
word was mapped to, and `score` the similarity score between the
two words.
DOCS: https://spacy.io/api/vocab#prune_vectors
DOCS: https://nightly.spacy.io/api/vocab#prune_vectors
"""
xp = get_array_module(self.vectors.data)
# Make prob negative so it sorts by rank ascending
@ -349,7 +349,7 @@ cdef class Vocab:
and shape determined by the `vocab.vectors` instance. Usually, a
numpy ndarray of shape (300,) and dtype float32.
DOCS: https://spacy.io/api/vocab#get_vector
DOCS: https://nightly.spacy.io/api/vocab#get_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -396,7 +396,7 @@ cdef class Vocab:
orth (int / unicode): The word.
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
DOCS: https://spacy.io/api/vocab#set_vector
DOCS: https://nightly.spacy.io/api/vocab#set_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -418,7 +418,7 @@ cdef class Vocab:
orth (int / unicode): The word.
RETURNS (bool): Whether the word has a vector.
DOCS: https://spacy.io/api/vocab#has_vector
DOCS: https://nightly.spacy.io/api/vocab#has_vector
"""
if isinstance(orth, str):
orth = self.strings.add(orth)
@ -431,7 +431,7 @@ cdef class Vocab:
it doesn't exist.
exclude (list): String names of serialization fields to exclude.
DOCS: https://spacy.io/api/vocab#to_disk
DOCS: https://nightly.spacy.io/api/vocab#to_disk
"""
path = util.ensure_path(path)
if not path.exists():
@ -452,7 +452,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The modified `Vocab` object.
DOCS: https://spacy.io/api/vocab#to_disk
DOCS: https://nightly.spacy.io/api/vocab#to_disk
"""
path = util.ensure_path(path)
getters = ["strings", "vectors"]
@ -477,7 +477,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (bytes): The serialized form of the `Vocab` object.
DOCS: https://spacy.io/api/vocab#to_bytes
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
"""
def deserialize_vectors():
if self.vectors is None:
@ -499,7 +499,7 @@ cdef class Vocab:
exclude (list): String names of serialization fields to exclude.
RETURNS (Vocab): The `Vocab` object.
DOCS: https://spacy.io/api/vocab#from_bytes
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
"""
def serialize_vectors(b):
if self.vectors is None: