mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Replace links to nightly docs [ci skip]
This commit is contained in:
parent
b26a3daa9a
commit
d0c3775712
|
@ -29,7 +29,7 @@ COMMAND = "python -m spacy"
|
|||
NAME = "spacy"
|
||||
HELP = """spaCy Command-line Interface
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli
|
||||
DOCS: https://spacy.io/api/cli
|
||||
"""
|
||||
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
|
||||
You'd typically start by cloning a project template to a local directory and
|
||||
|
|
|
@ -64,7 +64,7 @@ def convert_cli(
|
|||
is written to stdout, so you can pipe them forward to a JSON file:
|
||||
$ spacy convert some_file.conllu --file-type json > some_file.json
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#convert
|
||||
DOCS: https://spacy.io/api/cli#convert
|
||||
"""
|
||||
if isinstance(file_type, FileTypes):
|
||||
# We get an instance of the FileTypes from the CLI so we need its string value
|
||||
|
@ -268,6 +268,6 @@ def _get_converter(msg, converter, input_path):
|
|||
msg.warn(
|
||||
"Can't automatically detect NER format. "
|
||||
"Conversion may not succeed. "
|
||||
"See https://nightly.spacy.io/api/cli#convert"
|
||||
"See https://spacy.io/api/cli#convert"
|
||||
)
|
||||
return converter
|
||||
|
|
|
@ -34,7 +34,7 @@ def debug_config_cli(
|
|||
as command line options. For instance, --training.batch_size 128 overrides
|
||||
the value of "batch_size" in the block "[training]".
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#debug-config
|
||||
DOCS: https://spacy.io/api/cli#debug-config
|
||||
"""
|
||||
overrides = parse_config_overrides(ctx.args)
|
||||
import_code(code_path)
|
||||
|
|
|
@ -50,7 +50,7 @@ def debug_data_cli(
|
|||
useful stats, and can help you find problems like invalid entity annotations,
|
||||
cyclic dependencies, low data labels and more.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#debug-data
|
||||
DOCS: https://spacy.io/api/cli#debug-data
|
||||
"""
|
||||
if ctx.command.name == "debug-data":
|
||||
msg.warn(
|
||||
|
|
|
@ -40,7 +40,7 @@ def debug_model_cli(
|
|||
Analyze a Thinc model implementation. Includes checks for internal structure
|
||||
and activations during training.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#debug-model
|
||||
DOCS: https://spacy.io/api/cli#debug-model
|
||||
"""
|
||||
setup_gpu(use_gpu)
|
||||
layers = string_to_list(layers, intify=True)
|
||||
|
|
|
@ -28,7 +28,7 @@ def download_cli(
|
|||
additional arguments provided to this command will be passed to `pip install`
|
||||
on package installation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#download
|
||||
DOCS: https://spacy.io/api/cli#download
|
||||
AVAILABLE PACKAGES: https://spacy.io/models
|
||||
"""
|
||||
download(model, direct, *ctx.args)
|
||||
|
@ -80,7 +80,7 @@ def get_compatibility() -> dict:
|
|||
f"Couldn't fetch compatibility table. Please find a package for your spaCy "
|
||||
f"installation (v{about.__version__}), and download it manually. "
|
||||
f"For more details, see the documentation: "
|
||||
f"https://nightly.spacy.io/usage/models",
|
||||
f"https://spacy.io/usage/models",
|
||||
exits=1,
|
||||
)
|
||||
comp_table = r.json()
|
||||
|
|
|
@ -36,7 +36,7 @@ def evaluate_cli(
|
|||
dependency parses in a HTML file, set as output directory as the
|
||||
displacy_path argument.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#evaluate
|
||||
DOCS: https://spacy.io/api/cli#evaluate
|
||||
"""
|
||||
import_code(code_path)
|
||||
evaluate(
|
||||
|
|
|
@ -23,7 +23,7 @@ def info_cli(
|
|||
print its meta information. Flag --markdown prints details in Markdown for easy
|
||||
copy-pasting to GitHub issues.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#info
|
||||
DOCS: https://spacy.io/api/cli#info
|
||||
"""
|
||||
exclude = string_to_list(exclude)
|
||||
info(model, markdown=markdown, silent=silent, exclude=exclude)
|
||||
|
|
|
@ -41,7 +41,7 @@ def init_config_cli(
|
|||
optimal settings for your use case. This includes the choice of architecture,
|
||||
pretrained weights and related hyperparameters.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#init-config
|
||||
DOCS: https://spacy.io/api/cli#init-config
|
||||
"""
|
||||
if isinstance(optimize, Optimizations): # instance of enum from the CLI
|
||||
optimize = optimize.value
|
||||
|
@ -78,9 +78,9 @@ def init_fill_config_cli(
|
|||
from the default config and will create all objects, check the registered
|
||||
functions for their default values and update the base config. This command
|
||||
can be used with a config generated via the training quickstart widget:
|
||||
https://nightly.spacy.io/usage/training#quickstart
|
||||
https://spacy.io/usage/training#quickstart
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#init-fill-config
|
||||
DOCS: https://spacy.io/api/cli#init-fill-config
|
||||
"""
|
||||
fill_config(output_file, base_path, pretraining=pretraining, diff=diff)
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ def package_cli(
|
|||
registered functions like pipeline components), they are copied into the
|
||||
package and imported in the __init__.py.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#package
|
||||
DOCS: https://spacy.io/api/cli#package
|
||||
"""
|
||||
create_sdist, create_wheel = get_build_formats(string_to_list(build))
|
||||
code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]
|
||||
|
|
|
@ -44,7 +44,7 @@ def pretrain_cli(
|
|||
all settings are the same between pretraining and training. Ideally,
|
||||
this is done by using the same config file for both commands.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#pretrain
|
||||
DOCS: https://spacy.io/api/cli#pretrain
|
||||
"""
|
||||
config_overrides = parse_config_overrides(ctx.args)
|
||||
import_code(code_path)
|
||||
|
|
|
@ -30,7 +30,7 @@ def profile_cli(
|
|||
It can either be provided as a JSONL file, or be read from sys.sytdin.
|
||||
If no input file is specified, the IMDB dataset is loaded via Thinc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#debug-profile
|
||||
DOCS: https://spacy.io/api/cli#debug-profile
|
||||
"""
|
||||
if ctx.parent.command.name == NAME: # called as top-level command
|
||||
msg.warn(
|
||||
|
|
|
@ -22,7 +22,7 @@ def project_assets_cli(
|
|||
provided in the project.yml, the file is only downloaded if no local file
|
||||
with the same checksum exists.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-assets
|
||||
DOCS: https://spacy.io/api/cli#project-assets
|
||||
"""
|
||||
project_assets(project_dir, sparse_checkout=sparse_checkout)
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ def project_clone_cli(
|
|||
defaults to the official spaCy template repo, but can be customized
|
||||
(including using a private repo).
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-clone
|
||||
DOCS: https://spacy.io/api/cli#project-clone
|
||||
"""
|
||||
if dest is None:
|
||||
dest = Path.cwd() / Path(name).parts[-1]
|
||||
|
|
|
@ -5,7 +5,7 @@ from ...util import working_dir
|
|||
from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
|
||||
|
||||
|
||||
DOCS_URL = "https://nightly.spacy.io"
|
||||
DOCS_URL = "https://spacy.io"
|
||||
INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which
|
||||
> is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`:
|
||||
> `pip install spacy-nightly`. Make sure to use a fresh virtual environment."""
|
||||
|
@ -44,7 +44,7 @@ def project_document_cli(
|
|||
auto-generated section and only the auto-generated docs will be replaced
|
||||
when you re-run the command.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-document
|
||||
DOCS: https://spacy.io/api/cli#project-document
|
||||
"""
|
||||
project_document(project_dir, output_file, no_emoji=no_emoji)
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ def project_update_dvc_cli(
|
|||
workflow is used. The DVC config will only be updated if the project.yml
|
||||
changed.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-dvc
|
||||
DOCS: https://spacy.io/api/cli#project-dvc
|
||||
"""
|
||||
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ def project_pull_cli(
|
|||
A storage can be anything that the smart-open library can upload to, e.g.
|
||||
AWS, Google Cloud Storage, SSH, local directories etc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-pull
|
||||
DOCS: https://spacy.io/api/cli#project-pull
|
||||
"""
|
||||
for url, output_path in project_pull(project_dir, remote):
|
||||
if url is not None:
|
||||
|
|
|
@ -18,7 +18,7 @@ def project_push_cli(
|
|||
the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
|
||||
local directories etc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-push
|
||||
DOCS: https://spacy.io/api/cli#project-push
|
||||
"""
|
||||
for output_path, url in project_push(project_dir, remote):
|
||||
if url is None:
|
||||
|
|
|
@ -28,7 +28,7 @@ def project_run_cli(
|
|||
commands define dependencies and/or outputs, they will only be re-run if
|
||||
state has changed.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#project-run
|
||||
DOCS: https://spacy.io/api/cli#project-run
|
||||
"""
|
||||
if show_help or not subcommand:
|
||||
print_run_help(project_dir, subcommand)
|
||||
|
|
|
@ -37,7 +37,7 @@ def train_cli(
|
|||
used to register custom functions and architectures that can then be
|
||||
referenced in the config.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#train
|
||||
DOCS: https://spacy.io/api/cli#train
|
||||
"""
|
||||
util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
||||
# Make sure all files and paths exists if they are needed
|
||||
|
|
|
@ -17,7 +17,7 @@ def validate_cli():
|
|||
if the installed packages are compatible and shows upgrade instructions if
|
||||
available. Should be run after `pip install -U spacy`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/cli#validate
|
||||
DOCS: https://spacy.io/api/cli#validate
|
||||
"""
|
||||
validate()
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
"""
|
||||
spaCy's built in visualization suite for dependencies and named entities.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/top-level#displacy
|
||||
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||
DOCS: https://spacy.io/api/top-level#displacy
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
||||
import warnings
|
||||
|
@ -37,8 +37,8 @@ def render(
|
|||
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
|
||||
RETURNS (str): Rendered HTML markup.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/top-level#displacy.render
|
||||
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||
DOCS: https://spacy.io/api/top-level#displacy.render
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
factories = {
|
||||
"dep": (DependencyRenderer, parse_deps),
|
||||
|
@ -88,8 +88,8 @@ def serve(
|
|||
port (int): Port to serve visualisation.
|
||||
host (str): Host to serve visualisation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
|
||||
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||
DOCS: https://spacy.io/api/top-level#displacy.serve
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
from wsgiref import simple_server
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ class Warnings:
|
|||
"generate a dependency visualization for it. Make sure the Doc "
|
||||
"was processed with a model that supports dependency parsing, and "
|
||||
"not just a language class like `English()`. For more info, see "
|
||||
"the docs:\nhttps://nightly.spacy.io/usage/models")
|
||||
"the docs:\nhttps://spacy.io/usage/models")
|
||||
W006 = ("No entities to visualize found in Doc object. If this is "
|
||||
"surprising to you, make sure the Doc was processed using a model "
|
||||
"that supports named entity recognition, and check the `doc.ents` "
|
||||
|
@ -86,7 +86,7 @@ class Warnings:
|
|||
"the config block to replace its token-to-vector listener with a copy "
|
||||
"and make it independent. For example, `replace_listeners = "
|
||||
"[\"model.tok2vec\"]` See the documentation for details: "
|
||||
"https://nightly.spacy.io/usage/training#config-components-listeners")
|
||||
"https://spacy.io/usage/training#config-components-listeners")
|
||||
W087 = ("Component '{name}' will be (re)trained, but the component '{listener}' "
|
||||
"depends on it via a listener and is frozen. This means that the "
|
||||
"performance of '{listener}' will be degraded. You can either freeze "
|
||||
|
@ -95,12 +95,12 @@ class Warnings:
|
|||
"the config block to replace its token-to-vector listener with a copy "
|
||||
"and make it independent. For example, `replace_listeners = "
|
||||
"[\"model.tok2vec\"]` See the documentation for details: "
|
||||
"https://nightly.spacy.io/usage/training#config-components-listeners")
|
||||
"https://spacy.io/usage/training#config-components-listeners")
|
||||
W088 = ("The pipeline component {name} implements a `begin_training` "
|
||||
"method, which won't be called by spaCy. As of v3.0, `begin_training` "
|
||||
"has been renamed to `initialize`, so you likely want to rename the "
|
||||
"component method. See the documentation for details: "
|
||||
"https://nightly.spacy.io/api/language#initialize")
|
||||
"https://spacy.io/api/language#initialize")
|
||||
W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed "
|
||||
"to `nlp.initialize`.")
|
||||
W090 = ("Could not locate any {format} files in path '{path}'.")
|
||||
|
@ -180,7 +180,7 @@ class Errors:
|
|||
E010 = ("Word vectors set to length 0. This may be because you don't have "
|
||||
"a model installed or loaded, or because your model doesn't "
|
||||
"include word vectors. For more info, see the docs:\n"
|
||||
"https://nightly.spacy.io/usage/models")
|
||||
"https://spacy.io/usage/models")
|
||||
E011 = ("Unknown operator: '{op}'. Options: {opts}")
|
||||
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
|
||||
E016 = ("MultitaskObjective target should be function or one of: dep, "
|
||||
|
@ -211,7 +211,7 @@ class Errors:
|
|||
E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}")
|
||||
E029 = ("`noun_chunks` requires the dependency parse, which requires a "
|
||||
"statistical model to be installed and loaded. For more info, see "
|
||||
"the documentation:\nhttps://nightly.spacy.io/usage/models")
|
||||
"the documentation:\nhttps://spacy.io/usage/models")
|
||||
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
|
||||
"component to the pipeline with: `nlp.add_pipe('sentencizer')`. "
|
||||
"Alternatively, add the dependency parser or sentence recognizer, "
|
||||
|
@ -318,7 +318,7 @@ class Errors:
|
|||
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
|
||||
"tokens to merge. If you want to find the longest non-overlapping "
|
||||
"spans, you can use the util.filter_spans helper:\n"
|
||||
"https://nightly.spacy.io/api/top-level#util.filter_spans")
|
||||
"https://spacy.io/api/top-level#util.filter_spans")
|
||||
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
|
||||
"token can only be part of one entity, so make sure the entities "
|
||||
"you're setting don't overlap.")
|
||||
|
@ -536,9 +536,9 @@ class Errors:
|
|||
"solve this, remove the existing directories in the output directory.")
|
||||
E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
|
||||
"Try checking whitespace and delimiters. See "
|
||||
"https://nightly.spacy.io/api/cli#convert")
|
||||
"https://spacy.io/api/cli#convert")
|
||||
E903 = ("The token-per-line NER file is not formatted correctly. Try checking "
|
||||
"whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert")
|
||||
"whitespace and delimiters. See https://spacy.io/api/cli#convert")
|
||||
E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
|
||||
"dimension refers to the output width, after the linear projection "
|
||||
"has been applied.")
|
||||
|
|
|
@ -23,7 +23,7 @@ cdef class Candidate:
|
|||
algorithm which will disambiguate the various candidates to the correct one.
|
||||
Each candidate (alias, entity) pair is assigned to a certain prior probability.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/kb/#candidate_init
|
||||
DOCS: https://spacy.io/api/kb/#candidate_init
|
||||
"""
|
||||
|
||||
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
|
||||
|
@ -81,7 +81,7 @@ cdef class KnowledgeBase:
|
|||
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
|
||||
to support entity linking of named entities to real-world concepts.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/kb
|
||||
DOCS: https://spacy.io/api/kb
|
||||
"""
|
||||
|
||||
def __init__(self, Vocab vocab, entity_vector_length):
|
||||
|
|
|
@ -104,7 +104,7 @@ class Language:
|
|||
object and processing pipeline.
|
||||
lang (str): Two-letter language ID, i.e. ISO code.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language
|
||||
DOCS: https://spacy.io/api/language
|
||||
"""
|
||||
|
||||
Defaults = BaseDefaults
|
||||
|
@ -141,7 +141,7 @@ class Language:
|
|||
returns a tokenizer.
|
||||
batch_size (int): Default batch size for pipe and evaluate.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#init
|
||||
DOCS: https://spacy.io/api/language#init
|
||||
"""
|
||||
# We're only calling this to import all factories provided via entry
|
||||
# points. The factory decorator applied to these functions takes care
|
||||
|
@ -194,7 +194,7 @@ class Language:
|
|||
|
||||
RETURNS (Dict[str, Any]): The meta.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#meta
|
||||
DOCS: https://spacy.io/api/language#meta
|
||||
"""
|
||||
spacy_version = util.get_model_version_range(about.__version__)
|
||||
if self.vocab.lang:
|
||||
|
@ -235,7 +235,7 @@ class Language:
|
|||
|
||||
RETURNS (thinc.api.Config): The config.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#config
|
||||
DOCS: https://spacy.io/api/language#config
|
||||
"""
|
||||
self._config.setdefault("nlp", {})
|
||||
self._config.setdefault("training", {})
|
||||
|
@ -444,7 +444,7 @@ class Language:
|
|||
the score won't be shown in the logs or be weighted.
|
||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#factory
|
||||
DOCS: https://spacy.io/api/language#factory
|
||||
"""
|
||||
if not isinstance(name, str):
|
||||
raise ValueError(Errors.E963.format(decorator="factory"))
|
||||
|
@ -524,7 +524,7 @@ class Language:
|
|||
Used for pipeline analysis.
|
||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#component
|
||||
DOCS: https://spacy.io/api/language#component
|
||||
"""
|
||||
if name is not None and not isinstance(name, str):
|
||||
raise ValueError(Errors.E963.format(decorator="component"))
|
||||
|
@ -590,7 +590,7 @@ class Language:
|
|||
name (str): Name of pipeline component to get.
|
||||
RETURNS (callable): The pipeline component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#get_pipe
|
||||
DOCS: https://spacy.io/api/language#get_pipe
|
||||
"""
|
||||
for pipe_name, component in self._components:
|
||||
if pipe_name == name:
|
||||
|
@ -619,7 +619,7 @@ class Language:
|
|||
arguments and types expected by the factory.
|
||||
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#create_pipe
|
||||
DOCS: https://spacy.io/api/language#create_pipe
|
||||
"""
|
||||
name = name if name is not None else factory_name
|
||||
if not isinstance(config, dict):
|
||||
|
@ -740,7 +740,7 @@ class Language:
|
|||
arguments and types expected by the factory.
|
||||
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#add_pipe
|
||||
DOCS: https://spacy.io/api/language#add_pipe
|
||||
"""
|
||||
if not isinstance(factory_name, str):
|
||||
bad_val = repr(factory_name)
|
||||
|
@ -838,7 +838,7 @@ class Language:
|
|||
name (str): Name of the component.
|
||||
RETURNS (bool): Whether a component of the name exists in the pipeline.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#has_pipe
|
||||
DOCS: https://spacy.io/api/language#has_pipe
|
||||
"""
|
||||
return name in self.pipe_names
|
||||
|
||||
|
@ -860,7 +860,7 @@ class Language:
|
|||
arguments and types expected by the factory.
|
||||
RETURNS (Callable[[Doc], Doc]): The new pipeline component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#replace_pipe
|
||||
DOCS: https://spacy.io/api/language#replace_pipe
|
||||
"""
|
||||
if name not in self.pipe_names:
|
||||
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
|
||||
|
@ -891,7 +891,7 @@ class Language:
|
|||
old_name (str): Name of the component to rename.
|
||||
new_name (str): New name of the component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#rename_pipe
|
||||
DOCS: https://spacy.io/api/language#rename_pipe
|
||||
"""
|
||||
if old_name not in self.component_names:
|
||||
raise ValueError(
|
||||
|
@ -916,7 +916,7 @@ class Language:
|
|||
name (str): Name of the component to remove.
|
||||
RETURNS (tuple): A `(name, component)` tuple of the removed component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#remove_pipe
|
||||
DOCS: https://spacy.io/api/language#remove_pipe
|
||||
"""
|
||||
if name not in self.component_names:
|
||||
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
|
||||
|
@ -972,7 +972,7 @@ class Language:
|
|||
keyword arguments for specific components.
|
||||
RETURNS (Doc): A container for accessing the annotations.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#call
|
||||
DOCS: https://spacy.io/api/language#call
|
||||
"""
|
||||
doc = self.make_doc(text)
|
||||
if component_cfg is None:
|
||||
|
@ -1023,7 +1023,7 @@ class Language:
|
|||
disable (str or iterable): The name(s) of the pipes to disable
|
||||
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#select_pipes
|
||||
DOCS: https://spacy.io/api/language#select_pipes
|
||||
"""
|
||||
if enable is None and disable is None:
|
||||
raise ValueError(Errors.E991)
|
||||
|
@ -1081,7 +1081,7 @@ class Language:
|
|||
exclude (Iterable[str]): Names of components that shouldn't be updated.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#update
|
||||
DOCS: https://spacy.io/api/language#update
|
||||
"""
|
||||
if _ is not None:
|
||||
raise ValueError(Errors.E989)
|
||||
|
@ -1144,7 +1144,7 @@ class Language:
|
|||
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
|
||||
>>> nlp.rehearse(raw_batch)
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#rehearse
|
||||
DOCS: https://spacy.io/api/language#rehearse
|
||||
"""
|
||||
if len(examples) == 0:
|
||||
return
|
||||
|
@ -1199,7 +1199,7 @@ class Language:
|
|||
provided, will be created using the .create_optimizer() method.
|
||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#initialize
|
||||
DOCS: https://spacy.io/api/language#initialize
|
||||
"""
|
||||
if get_examples is None:
|
||||
util.logger.debug(
|
||||
|
@ -1266,7 +1266,7 @@ class Language:
|
|||
|
||||
RETURNS (Optimizer): The optimizer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#resume_training
|
||||
DOCS: https://spacy.io/api/language#resume_training
|
||||
"""
|
||||
ops = get_current_ops()
|
||||
if self.vocab.vectors.data.shape[1] >= 1:
|
||||
|
@ -1293,7 +1293,7 @@ class Language:
|
|||
Function that deals with a failing batch of documents. This callable function should take in
|
||||
the component's name, the component itself, the offending batch of documents, and the exception
|
||||
that was thrown.
|
||||
DOCS: https://nightly.spacy.io/api/language#set_error_handler
|
||||
DOCS: https://spacy.io/api/language#set_error_handler
|
||||
"""
|
||||
self.default_error_handler = error_handler
|
||||
for name, pipe in self.pipeline:
|
||||
|
@ -1322,7 +1322,7 @@ class Language:
|
|||
|
||||
RETURNS (Scorer): The scorer containing the evaluation results.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#evaluate
|
||||
DOCS: https://spacy.io/api/language#evaluate
|
||||
"""
|
||||
examples = list(examples)
|
||||
validate_examples(examples, "Language.evaluate")
|
||||
|
@ -1377,7 +1377,7 @@ class Language:
|
|||
>>> with nlp.use_params(optimizer.averages):
|
||||
>>> nlp.to_disk("/tmp/checkpoint")
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#use_params
|
||||
DOCS: https://spacy.io/api/language#use_params
|
||||
"""
|
||||
if not params:
|
||||
yield
|
||||
|
@ -1424,7 +1424,7 @@ class Language:
|
|||
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
|
||||
YIELDS (Doc): Documents in the order of the original text.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#pipe
|
||||
DOCS: https://spacy.io/api/language#pipe
|
||||
"""
|
||||
if n_process == -1:
|
||||
n_process = mp.cpu_count()
|
||||
|
@ -1568,7 +1568,7 @@ class Language:
|
|||
the types expected by the factory.
|
||||
RETURNS (Language): The initialized Language class.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#from_config
|
||||
DOCS: https://spacy.io/api/language#from_config
|
||||
"""
|
||||
if auto_fill:
|
||||
config = Config(
|
||||
|
@ -1712,7 +1712,7 @@ class Language:
|
|||
either be an empty list to not replace any listeners, or a complete
|
||||
(!) list of the paths to all listener layers used by the model.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#replace_listeners
|
||||
DOCS: https://spacy.io/api/language#replace_listeners
|
||||
"""
|
||||
if tok2vec_name not in self.pipe_names:
|
||||
err = Errors.E889.format(
|
||||
|
@ -1782,7 +1782,7 @@ class Language:
|
|||
it doesn't exist.
|
||||
exclude (list): Names of components or serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#to_disk
|
||||
DOCS: https://spacy.io/api/language#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
serializers = {}
|
||||
|
@ -1811,7 +1811,7 @@ class Language:
|
|||
exclude (list): Names of components or serialization fields to exclude.
|
||||
RETURNS (Language): The modified `Language` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#from_disk
|
||||
DOCS: https://spacy.io/api/language#from_disk
|
||||
"""
|
||||
|
||||
def deserialize_meta(path: Path) -> None:
|
||||
|
@ -1859,7 +1859,7 @@ class Language:
|
|||
exclude (list): Names of components or serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized form of the `Language` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#to_bytes
|
||||
DOCS: https://spacy.io/api/language#to_bytes
|
||||
"""
|
||||
serializers = {}
|
||||
serializers["vocab"] = lambda: self.vocab.to_bytes()
|
||||
|
@ -1883,7 +1883,7 @@ class Language:
|
|||
exclude (list): Names of components or serialization fields to exclude.
|
||||
RETURNS (Language): The `Language` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/language#from_bytes
|
||||
DOCS: https://spacy.io/api/language#from_bytes
|
||||
"""
|
||||
|
||||
def deserialize_meta(b):
|
||||
|
|
|
@ -30,7 +30,7 @@ cdef class Lexeme:
|
|||
tag, dependency parse, or lemma (lemmatization depends on the
|
||||
part-of-speech tag).
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lexeme
|
||||
DOCS: https://spacy.io/api/lexeme
|
||||
"""
|
||||
def __init__(self, Vocab vocab, attr_t orth):
|
||||
"""Create a Lexeme object.
|
||||
|
|
|
@ -57,7 +57,7 @@ class Table(OrderedDict):
|
|||
data (dict): The dictionary.
|
||||
name (str): Optional table name for reference.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#table.from_dict
|
||||
DOCS: https://spacy.io/api/lookups#table.from_dict
|
||||
"""
|
||||
self = cls(name=name)
|
||||
self.update(data)
|
||||
|
@ -69,7 +69,7 @@ class Table(OrderedDict):
|
|||
name (str): Optional table name for reference.
|
||||
data (dict): Initial data, used to hint Bloom Filter.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#table.init
|
||||
DOCS: https://spacy.io/api/lookups#table.init
|
||||
"""
|
||||
OrderedDict.__init__(self)
|
||||
self.name = name
|
||||
|
@ -135,7 +135,7 @@ class Table(OrderedDict):
|
|||
|
||||
RETURNS (bytes): The serialized table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes
|
||||
DOCS: https://spacy.io/api/lookups#table.to_bytes
|
||||
"""
|
||||
data = {
|
||||
"name": self.name,
|
||||
|
@ -150,7 +150,7 @@ class Table(OrderedDict):
|
|||
bytes_data (bytes): The data to load.
|
||||
RETURNS (Table): The loaded table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes
|
||||
DOCS: https://spacy.io/api/lookups#table.from_bytes
|
||||
"""
|
||||
loaded = srsly.msgpack_loads(bytes_data)
|
||||
data = loaded.get("dict", {})
|
||||
|
@ -172,7 +172,7 @@ class Lookups:
|
|||
def __init__(self) -> None:
|
||||
"""Initialize the Lookups object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#init
|
||||
DOCS: https://spacy.io/api/lookups#init
|
||||
"""
|
||||
self._tables = {}
|
||||
|
||||
|
@ -201,7 +201,7 @@ class Lookups:
|
|||
data (dict): Optional data to add to the table.
|
||||
RETURNS (Table): The newly added table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#add_table
|
||||
DOCS: https://spacy.io/api/lookups#add_table
|
||||
"""
|
||||
if name in self.tables:
|
||||
raise ValueError(Errors.E158.format(name=name))
|
||||
|
@ -215,7 +215,7 @@ class Lookups:
|
|||
name (str): Name of the table to set.
|
||||
table (Table): The Table to set.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#set_table
|
||||
DOCS: https://spacy.io/api/lookups#set_table
|
||||
"""
|
||||
self._tables[name] = table
|
||||
|
||||
|
@ -227,7 +227,7 @@ class Lookups:
|
|||
default (Any): Optional default value to return if table doesn't exist.
|
||||
RETURNS (Table): The table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#get_table
|
||||
DOCS: https://spacy.io/api/lookups#get_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
if default == UNSET:
|
||||
|
@ -241,7 +241,7 @@ class Lookups:
|
|||
name (str): Name of the table to remove.
|
||||
RETURNS (Table): The removed table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#remove_table
|
||||
DOCS: https://spacy.io/api/lookups#remove_table
|
||||
"""
|
||||
if name not in self._tables:
|
||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||
|
@ -253,7 +253,7 @@ class Lookups:
|
|||
name (str): Name of the table.
|
||||
RETURNS (bool): Whether a table of that name exists.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#has_table
|
||||
DOCS: https://spacy.io/api/lookups#has_table
|
||||
"""
|
||||
return name in self._tables
|
||||
|
||||
|
@ -262,7 +262,7 @@ class Lookups:
|
|||
|
||||
RETURNS (bytes): The serialized Lookups.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#to_bytes
|
||||
DOCS: https://spacy.io/api/lookups#to_bytes
|
||||
"""
|
||||
return srsly.msgpack_dumps(self._tables)
|
||||
|
||||
|
@ -272,7 +272,7 @@ class Lookups:
|
|||
bytes_data (bytes): The data to load.
|
||||
RETURNS (Lookups): The loaded Lookups.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#from_bytes
|
||||
DOCS: https://spacy.io/api/lookups#from_bytes
|
||||
"""
|
||||
self._tables = {}
|
||||
for key, value in srsly.msgpack_loads(bytes_data).items():
|
||||
|
@ -287,7 +287,7 @@ class Lookups:
|
|||
|
||||
path (str / Path): The file path.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#to_disk
|
||||
DOCS: https://spacy.io/api/lookups#to_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
if not path.exists():
|
||||
|
@ -305,7 +305,7 @@ class Lookups:
|
|||
path (str / Path): The directory path.
|
||||
RETURNS (Lookups): The loaded lookups.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lookups#from_disk
|
||||
DOCS: https://spacy.io/api/lookups#from_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
filepath = path / filename
|
||||
|
|
|
@ -32,8 +32,8 @@ DEF PADDING = 5
|
|||
cdef class Matcher:
|
||||
"""Match sequences of tokens, based on pattern rules.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/matcher
|
||||
USAGE: https://nightly.spacy.io/usage/rule-based-matching
|
||||
DOCS: https://spacy.io/api/matcher
|
||||
USAGE: https://spacy.io/usage/rule-based-matching
|
||||
"""
|
||||
|
||||
def __init__(self, vocab, validate=True):
|
||||
|
|
|
@ -20,8 +20,8 @@ cdef class PhraseMatcher:
|
|||
sequences based on lists of token descriptions, the `PhraseMatcher` accepts
|
||||
match patterns in the form of `Doc` objects.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher
|
||||
USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher
|
||||
DOCS: https://spacy.io/api/phrasematcher
|
||||
USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
|
||||
|
||||
Adapted from FlashText: https://github.com/vi3k6i5/flashtext
|
||||
MIT License (see `LICENSE`)
|
||||
|
@ -35,7 +35,7 @@ cdef class PhraseMatcher:
|
|||
attr (int / str): Token attribute to match on.
|
||||
validate (bool): Perform additional validation when patterns are added.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#init
|
||||
DOCS: https://spacy.io/api/phrasematcher#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self._callbacks = {}
|
||||
|
@ -64,7 +64,7 @@ cdef class PhraseMatcher:
|
|||
|
||||
RETURNS (int): The number of rules.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#len
|
||||
DOCS: https://spacy.io/api/phrasematcher#len
|
||||
"""
|
||||
return len(self._callbacks)
|
||||
|
||||
|
@ -74,7 +74,7 @@ cdef class PhraseMatcher:
|
|||
key (str): The match ID.
|
||||
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#contains
|
||||
DOCS: https://spacy.io/api/phrasematcher#contains
|
||||
"""
|
||||
return key in self._callbacks
|
||||
|
||||
|
@ -88,7 +88,7 @@ cdef class PhraseMatcher:
|
|||
|
||||
key (str): The match ID.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#remove
|
||||
DOCS: https://spacy.io/api/phrasematcher#remove
|
||||
"""
|
||||
if key not in self._docs:
|
||||
raise KeyError(key)
|
||||
|
@ -167,7 +167,7 @@ cdef class PhraseMatcher:
|
|||
as variable arguments. Will be ignored if a list of patterns is
|
||||
provided as the second argument.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#add
|
||||
DOCS: https://spacy.io/api/phrasematcher#add
|
||||
"""
|
||||
if docs is None or hasattr(docs, "__call__"): # old API
|
||||
on_match = docs
|
||||
|
@ -241,7 +241,7 @@ cdef class PhraseMatcher:
|
|||
`doc[start:end]`. The `match_id` is an integer. If as_spans is set
|
||||
to True, a list of Span objects is returned.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/phrasematcher#call
|
||||
DOCS: https://spacy.io/api/phrasematcher#call
|
||||
"""
|
||||
matches = []
|
||||
if doc is None or len(doc) == 0:
|
||||
|
|
|
@ -32,7 +32,7 @@ class AttributeRuler(Pipe):
|
|||
"""Set token-level attributes for tokens matched by Matcher patterns.
|
||||
Additionally supports importing patterns from tag maps and morph rules.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler
|
||||
DOCS: https://spacy.io/api/attributeruler
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -48,7 +48,7 @@ class AttributeRuler(Pipe):
|
|||
|
||||
RETURNS (AttributeRuler): The AttributeRuler component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#init
|
||||
DOCS: https://spacy.io/api/attributeruler#init
|
||||
"""
|
||||
self.name = name
|
||||
self.vocab = vocab
|
||||
|
@ -94,7 +94,7 @@ class AttributeRuler(Pipe):
|
|||
doc (Doc): The document to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#call
|
||||
DOCS: https://spacy.io/api/attributeruler#call
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
try:
|
||||
|
@ -143,7 +143,7 @@ class AttributeRuler(Pipe):
|
|||
tag_map (dict): The tag map that maps fine-grained tags to
|
||||
coarse-grained tags and morphological features.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
|
||||
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
|
||||
"""
|
||||
for tag, attrs in tag_map.items():
|
||||
pattern = [{"TAG": tag}]
|
||||
|
@ -165,7 +165,7 @@ class AttributeRuler(Pipe):
|
|||
fine-grained tags to coarse-grained tags, lemmas and morphological
|
||||
features.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
|
||||
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
|
||||
"""
|
||||
for tag in morph_rules:
|
||||
for word in morph_rules[tag]:
|
||||
|
@ -193,7 +193,7 @@ class AttributeRuler(Pipe):
|
|||
index (int): The index of the token in the matched span to modify. May
|
||||
be negative to index from the end of the span. Defaults to 0.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#add
|
||||
DOCS: https://spacy.io/api/attributeruler#add
|
||||
"""
|
||||
# We need to make a string here, because otherwise the ID we pass back
|
||||
# will be interpreted as the hash of a string, rather than an ordinal.
|
||||
|
@ -211,7 +211,7 @@ class AttributeRuler(Pipe):
|
|||
as the arguments to AttributeRuler.add (patterns/attrs/index) to
|
||||
add as patterns.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
|
||||
DOCS: https://spacy.io/api/attributeruler#add_patterns
|
||||
"""
|
||||
for p in patterns:
|
||||
self.add(**p)
|
||||
|
@ -236,7 +236,7 @@ class AttributeRuler(Pipe):
|
|||
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
|
||||
and "lemma" for the target token attributes.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#score
|
||||
DOCS: https://spacy.io/api/tagger#score
|
||||
"""
|
||||
|
||||
def morph_key_getter(token, attr):
|
||||
|
@ -273,7 +273,7 @@ class AttributeRuler(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes
|
||||
DOCS: https://spacy.io/api/attributeruler#to_bytes
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = self.vocab.to_bytes
|
||||
|
@ -289,7 +289,7 @@ class AttributeRuler(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
returns (AttributeRuler): The loaded object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes
|
||||
DOCS: https://spacy.io/api/attributeruler#from_bytes
|
||||
"""
|
||||
|
||||
def load_patterns(b):
|
||||
|
@ -310,7 +310,7 @@ class AttributeRuler(Pipe):
|
|||
path (Union[Path, str]): A path to a directory.
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
|
||||
DOCS: https://spacy.io/api/attributeruler#to_disk
|
||||
"""
|
||||
serialize = {
|
||||
"vocab": lambda p: self.vocab.to_disk(p),
|
||||
|
@ -327,7 +327,7 @@ class AttributeRuler(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (AttributeRuler): The loaded object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
|
||||
DOCS: https://spacy.io/api/attributeruler#from_disk
|
||||
"""
|
||||
|
||||
def load_patterns(p):
|
||||
|
|
|
@ -202,7 +202,7 @@ def make_beam_parser(
|
|||
cdef class DependencyParser(Parser):
|
||||
"""Pipeline component for dependency parsing.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/dependencyparser
|
||||
DOCS: https://spacy.io/api/dependencyparser
|
||||
"""
|
||||
TransitionSystem = ArcEager
|
||||
|
||||
|
@ -243,7 +243,7 @@ cdef class DependencyParser(Parser):
|
|||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
|
||||
and Scorer.score_deps.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/dependencyparser#score
|
||||
DOCS: https://spacy.io/api/dependencyparser#score
|
||||
"""
|
||||
def has_sents(doc):
|
||||
return doc.has_annotation("SENT_START")
|
||||
|
|
|
@ -94,7 +94,7 @@ def make_entity_linker(
|
|||
class EntityLinker(TrainablePipe):
|
||||
"""Pipeline component for named entity linking.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker
|
||||
DOCS: https://spacy.io/api/entitylinker
|
||||
"""
|
||||
|
||||
NIL = "NIL" # string used to refer to a non-existing link
|
||||
|
@ -124,7 +124,7 @@ class EntityLinker(TrainablePipe):
|
|||
get_candidates (Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]): Function that
|
||||
produces a list of candidates, given a certain knowledge base and a textual mention.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#init
|
||||
DOCS: https://spacy.io/api/entitylinker#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -171,7 +171,7 @@ class EntityLinker(TrainablePipe):
|
|||
Note that providing this argument, will overwrite all data accumulated in the current KB.
|
||||
Use this only when loading a KB as-such from file.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#initialize
|
||||
DOCS: https://spacy.io/api/entitylinker#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "EntityLinker.initialize")
|
||||
if kb_loader is not None:
|
||||
|
@ -207,7 +207,7 @@ class EntityLinker(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#update
|
||||
DOCS: https://spacy.io/api/entitylinker#update
|
||||
"""
|
||||
self.validate_kb()
|
||||
if losses is None:
|
||||
|
@ -283,7 +283,7 @@ class EntityLinker(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to predict.
|
||||
RETURNS (List[int]): The models prediction for each document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#predict
|
||||
DOCS: https://spacy.io/api/entitylinker#predict
|
||||
"""
|
||||
self.validate_kb()
|
||||
entity_count = 0
|
||||
|
@ -380,7 +380,7 @@ class EntityLinker(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations
|
||||
DOCS: https://spacy.io/api/entitylinker#set_annotations
|
||||
"""
|
||||
count_ents = len([ent for doc in docs for ent in doc.ents])
|
||||
if count_ents != len(kb_ids):
|
||||
|
@ -399,7 +399,7 @@ class EntityLinker(TrainablePipe):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores.
|
||||
|
||||
DOCS TODO: https://nightly.spacy.io/api/entity_linker#score
|
||||
DOCS TODO: https://spacy.io/api/entity_linker#score
|
||||
"""
|
||||
validate_examples(examples, "EntityLinker.score")
|
||||
return Scorer.score_links(examples, negative_labels=[self.NIL])
|
||||
|
@ -412,7 +412,7 @@ class EntityLinker(TrainablePipe):
|
|||
path (str / Path): Path to a directory.
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#to_disk
|
||||
DOCS: https://spacy.io/api/entitylinker#to_disk
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
|
||||
|
@ -430,7 +430,7 @@ class EntityLinker(TrainablePipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (EntityLinker): The modified EntityLinker object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entitylinker#from_disk
|
||||
DOCS: https://spacy.io/api/entitylinker#from_disk
|
||||
"""
|
||||
|
||||
def load_model(p):
|
||||
|
|
|
@ -59,8 +59,8 @@ class EntityRuler(Pipe):
|
|||
purely rule-based entity recognition system. After initialization, the
|
||||
component is typically added to the pipeline using `nlp.add_pipe`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler
|
||||
USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler
|
||||
DOCS: https://spacy.io/api/entityruler
|
||||
USAGE: https://spacy.io/usage/rule-based-matching#entityruler
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -94,7 +94,7 @@ class EntityRuler(Pipe):
|
|||
added by the model, overwrite them by matches if necessary.
|
||||
ent_id_sep (str): Separator used internally for entity IDs.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#init
|
||||
DOCS: https://spacy.io/api/entityruler#init
|
||||
"""
|
||||
self.nlp = nlp
|
||||
self.name = name
|
||||
|
@ -133,7 +133,7 @@ class EntityRuler(Pipe):
|
|||
doc (Doc): The Doc object in the pipeline.
|
||||
RETURNS (Doc): The Doc with added entities, if available.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#call
|
||||
DOCS: https://spacy.io/api/entityruler#call
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
try:
|
||||
|
@ -183,7 +183,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
RETURNS (set): The string labels.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#labels
|
||||
DOCS: https://spacy.io/api/entityruler#labels
|
||||
"""
|
||||
keys = set(self.token_patterns.keys())
|
||||
keys.update(self.phrase_patterns.keys())
|
||||
|
@ -211,7 +211,7 @@ class EntityRuler(Pipe):
|
|||
nlp (Language): The current nlp object the component is part of.
|
||||
patterns Optional[Iterable[PatternType]]: The list of patterns.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#initialize
|
||||
DOCS: https://spacy.io/api/entityruler#initialize
|
||||
"""
|
||||
self.clear()
|
||||
if patterns:
|
||||
|
@ -223,7 +223,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
RETURNS (set): The string entity ids.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#ent_ids
|
||||
DOCS: https://spacy.io/api/entityruler#ent_ids
|
||||
"""
|
||||
keys = set(self.token_patterns.keys())
|
||||
keys.update(self.phrase_patterns.keys())
|
||||
|
@ -241,7 +241,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
RETURNS (list): The original patterns, one dictionary per pattern.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#patterns
|
||||
DOCS: https://spacy.io/api/entityruler#patterns
|
||||
"""
|
||||
all_patterns = []
|
||||
for label, patterns in self.token_patterns.items():
|
||||
|
@ -268,7 +268,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
patterns (list): The patterns to add.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#add_patterns
|
||||
DOCS: https://spacy.io/api/entityruler#add_patterns
|
||||
"""
|
||||
|
||||
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet
|
||||
|
@ -366,7 +366,7 @@ class EntityRuler(Pipe):
|
|||
patterns_bytes (bytes): The bytestring to load.
|
||||
RETURNS (EntityRuler): The loaded entity ruler.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#from_bytes
|
||||
DOCS: https://spacy.io/api/entityruler#from_bytes
|
||||
"""
|
||||
cfg = srsly.msgpack_loads(patterns_bytes)
|
||||
self.clear()
|
||||
|
@ -388,7 +388,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
RETURNS (bytes): The serialized patterns.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#to_bytes
|
||||
DOCS: https://spacy.io/api/entityruler#to_bytes
|
||||
"""
|
||||
serial = {
|
||||
"overwrite": self.overwrite,
|
||||
|
@ -407,7 +407,7 @@ class EntityRuler(Pipe):
|
|||
path (str / Path): The JSONL file to load.
|
||||
RETURNS (EntityRuler): The loaded entity ruler.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#from_disk
|
||||
DOCS: https://spacy.io/api/entityruler#from_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
self.clear()
|
||||
|
@ -443,7 +443,7 @@ class EntityRuler(Pipe):
|
|||
|
||||
path (str / Path): The JSONL file to save.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityruler#to_disk
|
||||
DOCS: https://spacy.io/api/entityruler#to_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
cfg = {
|
||||
|
|
|
@ -18,7 +18,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
|
|||
doc (Doc): The Doc object.
|
||||
RETURNS (Doc): The Doc object with merged noun chunks.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks
|
||||
DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
|
||||
"""
|
||||
if not doc.has_annotation("DEP"):
|
||||
return doc
|
||||
|
@ -40,7 +40,7 @@ def merge_entities(doc: Doc):
|
|||
doc (Doc): The Doc object.
|
||||
RETURNS (Doc): The Doc object with merged entities.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities
|
||||
DOCS: https://spacy.io/api/pipeline-functions#merge_entities
|
||||
"""
|
||||
with doc.retokenize() as retokenizer:
|
||||
for ent in doc.ents:
|
||||
|
@ -57,7 +57,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
|
|||
label (str): The subtoken dependency label.
|
||||
RETURNS (Doc): The Doc object with merged subtokens.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens
|
||||
DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
|
||||
"""
|
||||
# TODO: make stateful component with "label" config
|
||||
merger = Matcher(doc.vocab)
|
||||
|
|
|
@ -32,7 +32,7 @@ class Lemmatizer(Pipe):
|
|||
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
||||
lookup tables.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer
|
||||
DOCS: https://spacy.io/api/lemmatizer
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
@ -68,7 +68,7 @@ class Lemmatizer(Pipe):
|
|||
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
|
||||
`False`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#init
|
||||
DOCS: https://spacy.io/api/lemmatizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -98,7 +98,7 @@ class Lemmatizer(Pipe):
|
|||
doc (Doc): The Doc to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#call
|
||||
DOCS: https://spacy.io/api/lemmatizer#call
|
||||
"""
|
||||
if not self._validated:
|
||||
self._validate_tables(Errors.E1004)
|
||||
|
@ -159,7 +159,7 @@ class Lemmatizer(Pipe):
|
|||
token (Token): The token to lemmatize.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize
|
||||
DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
|
||||
"""
|
||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||
result = lookup_table.get(token.text, token.text)
|
||||
|
@ -173,7 +173,7 @@ class Lemmatizer(Pipe):
|
|||
token (Token): The token to lemmatize.
|
||||
RETURNS (list): The available lemmas for the string.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize
|
||||
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
|
||||
"""
|
||||
cache_key = (token.orth, token.pos, token.morph)
|
||||
if cache_key in self.cache:
|
||||
|
@ -241,7 +241,7 @@ class Lemmatizer(Pipe):
|
|||
token (Token): The token.
|
||||
RETURNS (bool): Whether the token is a base form.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form
|
||||
DOCS: https://spacy.io/api/lemmatizer#is_base_form
|
||||
"""
|
||||
return False
|
||||
|
||||
|
@ -251,7 +251,7 @@ class Lemmatizer(Pipe):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#score
|
||||
DOCS: https://spacy.io/api/lemmatizer#score
|
||||
"""
|
||||
validate_examples(examples, "Lemmatizer.score")
|
||||
return Scorer.score_token_attr(examples, "lemma", **kwargs)
|
||||
|
@ -264,7 +264,7 @@ class Lemmatizer(Pipe):
|
|||
path (str / Path): Path to a directory.
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#to_disk
|
||||
DOCS: https://spacy.io/api/lemmatizer#to_disk
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
|
||||
|
@ -280,7 +280,7 @@ class Lemmatizer(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (Lemmatizer): The modified Lemmatizer object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#from_disk
|
||||
DOCS: https://spacy.io/api/lemmatizer#from_disk
|
||||
"""
|
||||
deserialize = {}
|
||||
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
|
||||
|
@ -295,7 +295,7 @@ class Lemmatizer(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#to_bytes
|
||||
DOCS: https://spacy.io/api/lemmatizer#to_bytes
|
||||
"""
|
||||
serialize = {}
|
||||
serialize["vocab"] = self.vocab.to_bytes
|
||||
|
@ -311,7 +311,7 @@ class Lemmatizer(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (Lemmatizer): The loaded Lemmatizer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/lemmatizer#from_bytes
|
||||
DOCS: https://spacy.io/api/lemmatizer#from_bytes
|
||||
"""
|
||||
deserialize = {}
|
||||
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
|
||||
|
|
|
@ -75,7 +75,7 @@ class Morphologizer(Tagger):
|
|||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#init
|
||||
DOCS: https://spacy.io/api/morphologizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -104,7 +104,7 @@ class Morphologizer(Tagger):
|
|||
label (str): The label to add.
|
||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#add_label
|
||||
DOCS: https://spacy.io/api/morphologizer#add_label
|
||||
"""
|
||||
if not isinstance(label, str):
|
||||
raise ValueError(Errors.E187)
|
||||
|
@ -134,7 +134,7 @@ class Morphologizer(Tagger):
|
|||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#initialize
|
||||
DOCS: https://spacy.io/api/morphologizer#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "Morphologizer.initialize")
|
||||
if labels is not None:
|
||||
|
@ -185,7 +185,7 @@ class Morphologizer(Tagger):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations
|
||||
DOCS: https://spacy.io/api/morphologizer#set_annotations
|
||||
"""
|
||||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
|
@ -208,7 +208,7 @@ class Morphologizer(Tagger):
|
|||
scores: Scores representing the model's predictions.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
|
||||
DOCS: https://spacy.io/api/morphologizer#get_loss
|
||||
"""
|
||||
validate_examples(examples, "Morphologizer.get_loss")
|
||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
||||
|
@ -254,7 +254,7 @@ class Morphologizer(Tagger):
|
|||
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
||||
Scorer.score_token_attr_per_feat for the attribute "morph".
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/morphologizer#score
|
||||
DOCS: https://spacy.io/api/morphologizer#score
|
||||
"""
|
||||
def morph_key_getter(token, attr):
|
||||
return getattr(token, attr).key
|
||||
|
|
|
@ -163,7 +163,7 @@ def make_beam_ner(
|
|||
cdef class EntityRecognizer(Parser):
|
||||
"""Pipeline component for named entity recognition.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityrecognizer
|
||||
DOCS: https://spacy.io/api/entityrecognizer
|
||||
"""
|
||||
TransitionSystem = BiluoPushDown
|
||||
|
||||
|
@ -194,7 +194,7 @@ cdef class EntityRecognizer(Parser):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
|
||||
DOCS: https://spacy.io/api/entityrecognizer#score
|
||||
"""
|
||||
validate_examples(examples, "EntityRecognizer.score")
|
||||
return get_ner_prf(examples)
|
||||
|
|
|
@ -16,7 +16,7 @@ cdef class Pipe:
|
|||
Trainable pipeline components like the EntityRecognizer or TextCategorizer
|
||||
should inherit from the subclass 'TrainablePipe'.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe
|
||||
DOCS: https://spacy.io/api/pipe
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
@ -34,7 +34,7 @@ cdef class Pipe:
|
|||
docs (Doc): The Doc to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#call
|
||||
DOCS: https://spacy.io/api/pipe#call
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="__call__", name=self.name))
|
||||
|
||||
|
@ -47,7 +47,7 @@ cdef class Pipe:
|
|||
batch_size (int): The number of documents to buffer.
|
||||
YIELDS (Doc): Processed documents in order.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#pipe
|
||||
DOCS: https://spacy.io/api/pipe#pipe
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
for doc in stream:
|
||||
|
@ -69,7 +69,7 @@ cdef class Pipe:
|
|||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#initialize
|
||||
DOCS: https://spacy.io/api/pipe#initialize
|
||||
"""
|
||||
pass
|
||||
|
||||
|
@ -79,7 +79,7 @@ cdef class Pipe:
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#score
|
||||
DOCS: https://spacy.io/api/pipe#score
|
||||
"""
|
||||
return {}
|
||||
|
||||
|
@ -111,7 +111,7 @@ cdef class Pipe:
|
|||
the component's name, the component itself, the offending batch of documents, and the exception
|
||||
that was thrown.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#set_error_handler
|
||||
DOCS: https://spacy.io/api/pipe#set_error_handler
|
||||
"""
|
||||
self.error_handler = error_handler
|
||||
|
||||
|
@ -120,7 +120,7 @@ cdef class Pipe:
|
|||
|
||||
RETURNS (Callable): The error handler, or if it's not set a default function that just reraises.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#get_error_handler
|
||||
DOCS: https://spacy.io/api/pipe#get_error_handler
|
||||
"""
|
||||
if hasattr(self, "error_handler"):
|
||||
return self.error_handler
|
||||
|
|
|
@ -26,7 +26,7 @@ def make_sentencizer(
|
|||
class Sentencizer(Pipe):
|
||||
"""Segment the Doc into sentences using a rule-based strategy.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer
|
||||
DOCS: https://spacy.io/api/sentencizer
|
||||
"""
|
||||
|
||||
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
|
||||
|
@ -48,7 +48,7 @@ class Sentencizer(Pipe):
|
|||
serialized with the nlp object.
|
||||
RETURNS (Sentencizer): The sentencizer component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#init
|
||||
DOCS: https://spacy.io/api/sentencizer#init
|
||||
"""
|
||||
self.name = name
|
||||
if punct_chars:
|
||||
|
@ -62,7 +62,7 @@ class Sentencizer(Pipe):
|
|||
doc (Doc): The document to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#call
|
||||
DOCS: https://spacy.io/api/sentencizer#call
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
try:
|
||||
|
@ -142,7 +142,7 @@ class Sentencizer(Pipe):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#score
|
||||
DOCS: https://spacy.io/api/sentencizer#score
|
||||
"""
|
||||
def has_sents(doc):
|
||||
return doc.has_annotation("SENT_START")
|
||||
|
@ -157,7 +157,7 @@ class Sentencizer(Pipe):
|
|||
|
||||
RETURNS (bytes): The serialized object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes
|
||||
DOCS: https://spacy.io/api/sentencizer#to_bytes
|
||||
"""
|
||||
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
|
||||
|
||||
|
@ -167,7 +167,7 @@ class Sentencizer(Pipe):
|
|||
bytes_data (bytes): The data to load.
|
||||
returns (Sentencizer): The loaded object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes
|
||||
DOCS: https://spacy.io/api/sentencizer#from_bytes
|
||||
"""
|
||||
cfg = srsly.msgpack_loads(bytes_data)
|
||||
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
|
||||
|
@ -176,7 +176,7 @@ class Sentencizer(Pipe):
|
|||
def to_disk(self, path, *, exclude=tuple()):
|
||||
"""Serialize the sentencizer to disk.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#to_disk
|
||||
DOCS: https://spacy.io/api/sentencizer#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
path = path.with_suffix(".json")
|
||||
|
@ -186,7 +186,7 @@ class Sentencizer(Pipe):
|
|||
def from_disk(self, path, *, exclude=tuple()):
|
||||
"""Load the sentencizer from disk.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencizer#from_disk
|
||||
DOCS: https://spacy.io/api/sentencizer#from_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
path = path.with_suffix(".json")
|
||||
|
|
|
@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
|
|||
class SentenceRecognizer(Tagger):
|
||||
"""Pipeline component for sentence segmentation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer
|
||||
DOCS: https://spacy.io/api/sentencerecognizer
|
||||
"""
|
||||
def __init__(self, vocab, model, name="senter"):
|
||||
"""Initialize a sentence recognizer.
|
||||
|
@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
|
|||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#init
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -80,7 +80,7 @@ class SentenceRecognizer(Tagger):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
|
||||
"""
|
||||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
|
@ -105,7 +105,7 @@ class SentenceRecognizer(Tagger):
|
|||
scores: Scores representing the model's predictions.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#get_loss
|
||||
"""
|
||||
validate_examples(examples, "SentenceRecognizer.get_loss")
|
||||
labels = self.labels
|
||||
|
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
|
|||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#initialize
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "SentenceRecognizer.initialize")
|
||||
doc_sample = []
|
||||
|
@ -158,7 +158,7 @@ class SentenceRecognizer(Tagger):
|
|||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
|
||||
DOCS: https://spacy.io/api/sentencerecognizer#score
|
||||
"""
|
||||
def has_sents(doc):
|
||||
return doc.has_annotation("SENT_START")
|
||||
|
|
|
@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
|
|||
class Tagger(TrainablePipe):
|
||||
"""Pipeline component for part-of-speech tagging.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger
|
||||
DOCS: https://spacy.io/api/tagger
|
||||
"""
|
||||
def __init__(self, vocab, model, name="tagger"):
|
||||
"""Initialize a part-of-speech tagger.
|
||||
|
@ -68,7 +68,7 @@ class Tagger(TrainablePipe):
|
|||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#init
|
||||
DOCS: https://spacy.io/api/tagger#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -85,7 +85,7 @@ class Tagger(TrainablePipe):
|
|||
|
||||
RETURNS (Tuple[str]): The labels.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#labels
|
||||
DOCS: https://spacy.io/api/tagger#labels
|
||||
"""
|
||||
return tuple(self.cfg["labels"])
|
||||
|
||||
|
@ -100,7 +100,7 @@ class Tagger(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to predict.
|
||||
RETURNS: The models prediction for each document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#predict
|
||||
DOCS: https://spacy.io/api/tagger#predict
|
||||
"""
|
||||
if not any(len(doc) for doc in docs):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
|
@ -129,7 +129,7 @@ class Tagger(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
batch_tag_ids: The IDs to set, produced by Tagger.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#set_annotations
|
||||
DOCS: https://spacy.io/api/tagger#set_annotations
|
||||
"""
|
||||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
|
@ -155,7 +155,7 @@ class Tagger(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#update
|
||||
DOCS: https://spacy.io/api/tagger#update
|
||||
"""
|
||||
if losses is None:
|
||||
losses = {}
|
||||
|
@ -190,7 +190,7 @@ class Tagger(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#rehearse
|
||||
DOCS: https://spacy.io/api/tagger#rehearse
|
||||
"""
|
||||
if losses is None:
|
||||
losses = {}
|
||||
|
@ -219,7 +219,7 @@ class Tagger(TrainablePipe):
|
|||
scores: Scores representing the model's predictions.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#get_loss
|
||||
DOCS: https://spacy.io/api/tagger#get_loss
|
||||
"""
|
||||
validate_examples(examples, "Tagger.get_loss")
|
||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
||||
|
@ -246,7 +246,7 @@ class Tagger(TrainablePipe):
|
|||
`init labels` command. If no labels are provided, the get_examples
|
||||
callback is used to extract the labels from the data.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#initialize
|
||||
DOCS: https://spacy.io/api/tagger#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "Tagger.initialize")
|
||||
if labels is not None:
|
||||
|
@ -278,7 +278,7 @@ class Tagger(TrainablePipe):
|
|||
label (str): The label to add.
|
||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#add_label
|
||||
DOCS: https://spacy.io/api/tagger#add_label
|
||||
"""
|
||||
if not isinstance(label, str):
|
||||
raise ValueError(Errors.E187)
|
||||
|
@ -296,7 +296,7 @@ class Tagger(TrainablePipe):
|
|||
RETURNS (Dict[str, Any]): The scores, produced by
|
||||
Scorer.score_token_attr for the attributes "tag".
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tagger#score
|
||||
DOCS: https://spacy.io/api/tagger#score
|
||||
"""
|
||||
validate_examples(examples, "Tagger.score")
|
||||
return Scorer.score_token_attr(examples, "tag", **kwargs)
|
||||
|
|
|
@ -104,7 +104,7 @@ def make_textcat(
|
|||
class TextCategorizer(TrainablePipe):
|
||||
"""Pipeline component for single-label text classification.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer
|
||||
DOCS: https://spacy.io/api/textcategorizer
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -118,7 +118,7 @@ class TextCategorizer(TrainablePipe):
|
|||
losses during training.
|
||||
threshold (float): Cutoff to consider a prediction "positive".
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#init
|
||||
DOCS: https://spacy.io/api/textcategorizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -131,7 +131,7 @@ class TextCategorizer(TrainablePipe):
|
|||
def labels(self) -> Tuple[str]:
|
||||
"""RETURNS (Tuple[str]): The labels currently added to the component.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#labels
|
||||
DOCS: https://spacy.io/api/textcategorizer#labels
|
||||
"""
|
||||
return tuple(self.cfg["labels"])
|
||||
|
||||
|
@ -139,7 +139,7 @@ class TextCategorizer(TrainablePipe):
|
|||
def label_data(self) -> List[str]:
|
||||
"""RETURNS (List[str]): Information about the component's labels.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#label_data
|
||||
DOCS: https://spacy.io/api/textcategorizer#label_data
|
||||
"""
|
||||
return self.labels
|
||||
|
||||
|
@ -149,7 +149,7 @@ class TextCategorizer(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to predict.
|
||||
RETURNS: The models prediction for each document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#predict
|
||||
DOCS: https://spacy.io/api/textcategorizer#predict
|
||||
"""
|
||||
if not any(len(doc) for doc in docs):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
|
@ -167,7 +167,7 @@ class TextCategorizer(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
scores: The scores to set, produced by TextCategorizer.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations
|
||||
DOCS: https://spacy.io/api/textcategorizer#set_annotations
|
||||
"""
|
||||
for i, doc in enumerate(docs):
|
||||
for j, label in enumerate(self.labels):
|
||||
|
@ -191,7 +191,7 @@ class TextCategorizer(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#update
|
||||
DOCS: https://spacy.io/api/textcategorizer#update
|
||||
"""
|
||||
if losses is None:
|
||||
losses = {}
|
||||
|
@ -230,7 +230,7 @@ class TextCategorizer(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse
|
||||
DOCS: https://spacy.io/api/textcategorizer#rehearse
|
||||
"""
|
||||
if losses is not None:
|
||||
losses.setdefault(self.name, 0.0)
|
||||
|
@ -275,7 +275,7 @@ class TextCategorizer(TrainablePipe):
|
|||
scores: Scores representing the model's predictions.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
|
||||
DOCS: https://spacy.io/api/textcategorizer#get_loss
|
||||
"""
|
||||
validate_examples(examples, "TextCategorizer.get_loss")
|
||||
self._validate_categories(examples)
|
||||
|
@ -292,7 +292,7 @@ class TextCategorizer(TrainablePipe):
|
|||
label (str): The label to add.
|
||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#add_label
|
||||
DOCS: https://spacy.io/api/textcategorizer#add_label
|
||||
"""
|
||||
if not isinstance(label, str):
|
||||
raise ValueError(Errors.E187)
|
||||
|
@ -321,7 +321,7 @@ class TextCategorizer(TrainablePipe):
|
|||
`init labels` command. If no labels are provided, the get_examples
|
||||
callback is used to extract the labels from the data.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#initialize
|
||||
DOCS: https://spacy.io/api/textcategorizer#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "TextCategorizer.initialize")
|
||||
self._validate_categories(get_examples())
|
||||
|
@ -354,7 +354,7 @@ class TextCategorizer(TrainablePipe):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/textcategorizer#score
|
||||
DOCS: https://spacy.io/api/textcategorizer#score
|
||||
"""
|
||||
validate_examples(examples, "TextCategorizer.score")
|
||||
self._validate_categories(examples)
|
||||
|
|
|
@ -104,7 +104,7 @@ def make_multilabel_textcat(
|
|||
class MultiLabel_TextCategorizer(TextCategorizer):
|
||||
"""Pipeline component for multi-label text classification.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer
|
||||
DOCS: https://spacy.io/api/multilabel_textcategorizer
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -123,7 +123,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
|||
losses during training.
|
||||
threshold (float): Cutoff to consider a prediction "positive".
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#init
|
||||
DOCS: https://spacy.io/api/multilabel_textcategorizer#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -149,7 +149,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
|||
`init labels` command. If no labels are provided, the get_examples
|
||||
callback is used to extract the labels from the data.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#initialize
|
||||
DOCS: https://spacy.io/api/multilabel_textcategorizer#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "MultiLabel_TextCategorizer.initialize")
|
||||
if labels is None:
|
||||
|
@ -173,7 +173,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
|
|||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/multilabel_textcategorizer#score
|
||||
DOCS: https://spacy.io/api/multilabel_textcategorizer#score
|
||||
"""
|
||||
validate_examples(examples, "MultiLabel_TextCategorizer.score")
|
||||
return Scorer.score_cats(
|
||||
|
|
|
@ -55,7 +55,7 @@ class Tok2Vec(TrainablePipe):
|
|||
a list of Doc objects as input, and output a list of 2d float arrays.
|
||||
name (str): The component instance name.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tok2vec#init
|
||||
DOCS: https://spacy.io/api/tok2vec#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -115,7 +115,7 @@ class Tok2Vec(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to predict.
|
||||
RETURNS: Vector representations for each token in the documents.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tok2vec#predict
|
||||
DOCS: https://spacy.io/api/tok2vec#predict
|
||||
"""
|
||||
tokvecs = self.model.predict(docs)
|
||||
batch_id = Tok2VecListener.get_batch_id(docs)
|
||||
|
@ -129,7 +129,7 @@ class Tok2Vec(TrainablePipe):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
tokvecses: The tensors to set, produced by Tok2Vec.predict.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations
|
||||
DOCS: https://spacy.io/api/tok2vec#set_annotations
|
||||
"""
|
||||
for doc, tokvecs in zip(docs, tokvecses):
|
||||
assert tokvecs.shape[0] == len(doc)
|
||||
|
@ -153,7 +153,7 @@ class Tok2Vec(TrainablePipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tok2vec#update
|
||||
DOCS: https://spacy.io/api/tok2vec#update
|
||||
"""
|
||||
if losses is None:
|
||||
losses = {}
|
||||
|
@ -204,7 +204,7 @@ class Tok2Vec(TrainablePipe):
|
|||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tok2vec#initialize
|
||||
DOCS: https://spacy.io/api/tok2vec#initialize
|
||||
"""
|
||||
validate_get_examples(get_examples, "Tok2Vec.initialize")
|
||||
doc_sample = []
|
||||
|
|
|
@ -20,7 +20,7 @@ cdef class TrainablePipe(Pipe):
|
|||
from it and it defines the interface that components should follow to
|
||||
function as trainable components in a spaCy pipeline.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe
|
||||
DOCS: https://spacy.io/api/pipe
|
||||
"""
|
||||
def __init__(self, vocab: Vocab, model: Model, name: str, **cfg):
|
||||
"""Initialize a pipeline component.
|
||||
|
@ -30,7 +30,7 @@ cdef class TrainablePipe(Pipe):
|
|||
name (str): The component instance name.
|
||||
**cfg: Additional settings and config parameters.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#init
|
||||
DOCS: https://spacy.io/api/pipe#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.model = model
|
||||
|
@ -45,7 +45,7 @@ cdef class TrainablePipe(Pipe):
|
|||
docs (Doc): The Doc to process.
|
||||
RETURNS (Doc): The processed Doc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#call
|
||||
DOCS: https://spacy.io/api/pipe#call
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
try:
|
||||
|
@ -67,7 +67,7 @@ cdef class TrainablePipe(Pipe):
|
|||
the exception.
|
||||
YIELDS (Doc): Processed documents in order.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#pipe
|
||||
DOCS: https://spacy.io/api/pipe#pipe
|
||||
"""
|
||||
error_handler = self.get_error_handler()
|
||||
for docs in util.minibatch(stream, size=batch_size):
|
||||
|
@ -85,7 +85,7 @@ cdef class TrainablePipe(Pipe):
|
|||
docs (Iterable[Doc]): The documents to predict.
|
||||
RETURNS: Vector representations of the predictions.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#predict
|
||||
DOCS: https://spacy.io/api/pipe#predict
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="predict", name=self.name))
|
||||
|
||||
|
@ -95,7 +95,7 @@ cdef class TrainablePipe(Pipe):
|
|||
docs (Iterable[Doc]): The documents to modify.
|
||||
scores: The scores to assign.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#set_annotations
|
||||
DOCS: https://spacy.io/api/pipe#set_annotations
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="set_annotations", name=self.name))
|
||||
|
||||
|
@ -114,7 +114,7 @@ cdef class TrainablePipe(Pipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#update
|
||||
DOCS: https://spacy.io/api/pipe#update
|
||||
"""
|
||||
if losses is None:
|
||||
losses = {}
|
||||
|
@ -151,7 +151,7 @@ cdef class TrainablePipe(Pipe):
|
|||
Updated using the component name as the key.
|
||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#rehearse
|
||||
DOCS: https://spacy.io/api/pipe#rehearse
|
||||
"""
|
||||
pass
|
||||
|
||||
|
@ -163,7 +163,7 @@ cdef class TrainablePipe(Pipe):
|
|||
scores: Scores representing the model's predictions.
|
||||
RETURNS (Tuple[float, float]): The loss and the gradient.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#get_loss
|
||||
DOCS: https://spacy.io/api/pipe#get_loss
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="get_loss", name=self.name))
|
||||
|
||||
|
@ -172,7 +172,7 @@ cdef class TrainablePipe(Pipe):
|
|||
|
||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#create_optimizer
|
||||
DOCS: https://spacy.io/api/pipe#create_optimizer
|
||||
"""
|
||||
return util.create_default_optimizer()
|
||||
|
||||
|
@ -186,7 +186,7 @@ cdef class TrainablePipe(Pipe):
|
|||
returns a representative sample of gold-standard Example objects.
|
||||
nlp (Language): The current nlp object the component is part of.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#initialize
|
||||
DOCS: https://spacy.io/api/pipe#initialize
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="TrainablePipe", method="initialize", name=self.name))
|
||||
|
||||
|
@ -199,7 +199,7 @@ cdef class TrainablePipe(Pipe):
|
|||
label (str): The label to add.
|
||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#add_label
|
||||
DOCS: https://spacy.io/api/pipe#add_label
|
||||
"""
|
||||
raise NotImplementedError(Errors.E931.format(parent="Pipe", method="add_label", name=self.name))
|
||||
|
||||
|
@ -229,7 +229,7 @@ cdef class TrainablePipe(Pipe):
|
|||
|
||||
params (dict): The parameter values to use in the model.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#use_params
|
||||
DOCS: https://spacy.io/api/pipe#use_params
|
||||
"""
|
||||
with self.model.use_params(params):
|
||||
yield
|
||||
|
@ -241,7 +241,7 @@ cdef class TrainablePipe(Pipe):
|
|||
|
||||
sgd (thinc.api.Optimizer): The optimizer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#finish_update
|
||||
DOCS: https://spacy.io/api/pipe#finish_update
|
||||
"""
|
||||
self.model.finish_update(sgd)
|
||||
|
||||
|
@ -261,7 +261,7 @@ cdef class TrainablePipe(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#to_bytes
|
||||
DOCS: https://spacy.io/api/pipe#to_bytes
|
||||
"""
|
||||
self._validate_serialization_attrs()
|
||||
serialize = {}
|
||||
|
@ -277,7 +277,7 @@ cdef class TrainablePipe(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (TrainablePipe): The loaded object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#from_bytes
|
||||
DOCS: https://spacy.io/api/pipe#from_bytes
|
||||
"""
|
||||
self._validate_serialization_attrs()
|
||||
|
||||
|
@ -301,7 +301,7 @@ cdef class TrainablePipe(Pipe):
|
|||
path (str / Path): Path to a directory.
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#to_disk
|
||||
DOCS: https://spacy.io/api/pipe#to_disk
|
||||
"""
|
||||
self._validate_serialization_attrs()
|
||||
serialize = {}
|
||||
|
@ -318,7 +318,7 @@ cdef class TrainablePipe(Pipe):
|
|||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
RETURNS (TrainablePipe): The loaded object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/pipe#from_disk
|
||||
DOCS: https://spacy.io/api/pipe#from_disk
|
||||
"""
|
||||
self._validate_serialization_attrs()
|
||||
|
||||
|
|
|
@ -103,7 +103,7 @@ class Scorer:
|
|||
) -> None:
|
||||
"""Initialize the Scorer.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#init
|
||||
DOCS: https://spacy.io/api/scorer#init
|
||||
"""
|
||||
self.nlp = nlp
|
||||
self.cfg = cfg
|
||||
|
@ -119,7 +119,7 @@ class Scorer:
|
|||
examples (Iterable[Example]): The predicted annotations + correct annotations.
|
||||
RETURNS (Dict): A dictionary of scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score
|
||||
DOCS: https://spacy.io/api/scorer#score
|
||||
"""
|
||||
scores = {}
|
||||
if hasattr(self.nlp.tokenizer, "score"):
|
||||
|
@ -139,7 +139,7 @@ class Scorer:
|
|||
RETURNS (Dict[str, Any]): A dictionary containing the scores
|
||||
token_acc/p/r/f.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
|
||||
DOCS: https://spacy.io/api/scorer#score_tokenization
|
||||
"""
|
||||
acc_score = PRFScore()
|
||||
prf_score = PRFScore()
|
||||
|
@ -198,7 +198,7 @@ class Scorer:
|
|||
RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
|
||||
under the key attr_acc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
|
||||
DOCS: https://spacy.io/api/scorer#score_token_attr
|
||||
"""
|
||||
tag_score = PRFScore()
|
||||
for example in examples:
|
||||
|
@ -317,7 +317,7 @@ class Scorer:
|
|||
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
|
||||
the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_spans
|
||||
DOCS: https://spacy.io/api/scorer#score_spans
|
||||
"""
|
||||
score = PRFScore()
|
||||
score_per_type = dict()
|
||||
|
@ -413,7 +413,7 @@ class Scorer:
|
|||
attr_f_per_type,
|
||||
attr_auc_per_type
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_cats
|
||||
DOCS: https://spacy.io/api/scorer#score_cats
|
||||
"""
|
||||
if threshold is None:
|
||||
threshold = 0.5 if multi_label else 0.0
|
||||
|
@ -519,7 +519,7 @@ class Scorer:
|
|||
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
|
||||
RETURNS (Dict[str, Any]): A dictionary containing the scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_links
|
||||
DOCS: https://spacy.io/api/scorer#score_links
|
||||
"""
|
||||
f_per_type = {}
|
||||
for example in examples:
|
||||
|
@ -603,7 +603,7 @@ class Scorer:
|
|||
RETURNS (Dict[str, Any]): A dictionary containing the scores:
|
||||
attr_uas, attr_las, and attr_las_per_type.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/scorer#score_deps
|
||||
DOCS: https://spacy.io/api/scorer#score_deps
|
||||
"""
|
||||
unlabelled = PRFScore()
|
||||
labelled = PRFScore()
|
||||
|
|
|
@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
|
|||
cdef class StringStore:
|
||||
"""Look up strings by 64-bit hashes.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/stringstore
|
||||
DOCS: https://spacy.io/api/stringstore
|
||||
"""
|
||||
def __init__(self, strings=None, freeze=False):
|
||||
"""Create the StringStore.
|
||||
|
|
|
@ -31,7 +31,7 @@ cdef class Tokenizer:
|
|||
"""Segment text, and create Doc objects with the discovered segment
|
||||
boundaries.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer
|
||||
DOCS: https://spacy.io/api/tokenizer
|
||||
"""
|
||||
def __init__(self, Vocab vocab, rules=None, prefix_search=None,
|
||||
suffix_search=None, infix_finditer=None, token_match=None,
|
||||
|
@ -54,7 +54,7 @@ cdef class Tokenizer:
|
|||
EXAMPLE:
|
||||
>>> tokenizer = Tokenizer(nlp.vocab)
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#init
|
||||
DOCS: https://spacy.io/api/tokenizer#init
|
||||
"""
|
||||
self.mem = Pool()
|
||||
self._cache = PreshMap()
|
||||
|
@ -147,7 +147,7 @@ cdef class Tokenizer:
|
|||
string (str): The string to tokenize.
|
||||
RETURNS (Doc): A container for linguistic annotations.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#call
|
||||
DOCS: https://spacy.io/api/tokenizer#call
|
||||
"""
|
||||
doc = self._tokenize_affixes(string, True)
|
||||
self._apply_special_cases(doc)
|
||||
|
@ -209,7 +209,7 @@ cdef class Tokenizer:
|
|||
Defaults to 1000.
|
||||
YIELDS (Doc): A sequence of Doc objects, in order.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#pipe
|
||||
DOCS: https://spacy.io/api/tokenizer#pipe
|
||||
"""
|
||||
for text in texts:
|
||||
yield self(text)
|
||||
|
@ -529,7 +529,7 @@ cdef class Tokenizer:
|
|||
and `.end()` methods, denoting the placement of internal segment
|
||||
separators, e.g. hyphens.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#find_infix
|
||||
DOCS: https://spacy.io/api/tokenizer#find_infix
|
||||
"""
|
||||
if self.infix_finditer is None:
|
||||
return 0
|
||||
|
@ -542,7 +542,7 @@ cdef class Tokenizer:
|
|||
string (str): The string to segment.
|
||||
RETURNS (int): The length of the prefix if present, otherwise `None`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix
|
||||
DOCS: https://spacy.io/api/tokenizer#find_prefix
|
||||
"""
|
||||
if self.prefix_search is None:
|
||||
return 0
|
||||
|
@ -556,7 +556,7 @@ cdef class Tokenizer:
|
|||
string (str): The string to segment.
|
||||
Returns (int): The length of the suffix if present, otherwise `None`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix
|
||||
DOCS: https://spacy.io/api/tokenizer#find_suffix
|
||||
"""
|
||||
if self.suffix_search is None:
|
||||
return 0
|
||||
|
@ -596,7 +596,7 @@ cdef class Tokenizer:
|
|||
a token and its attributes. The `ORTH` fields of the attributes
|
||||
must exactly match the string when they are concatenated.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case
|
||||
DOCS: https://spacy.io/api/tokenizer#add_special_case
|
||||
"""
|
||||
self._validate_special_case(string, substrings)
|
||||
substrings = list(substrings)
|
||||
|
@ -635,7 +635,7 @@ cdef class Tokenizer:
|
|||
string (str): The string to tokenize.
|
||||
RETURNS (list): A list of (pattern_string, token_string) tuples
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#explain
|
||||
DOCS: https://spacy.io/api/tokenizer#explain
|
||||
"""
|
||||
prefix_search = self.prefix_search
|
||||
suffix_search = self.suffix_search
|
||||
|
@ -718,7 +718,7 @@ cdef class Tokenizer:
|
|||
it doesn't exist.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#to_disk
|
||||
DOCS: https://spacy.io/api/tokenizer#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open("wb") as file_:
|
||||
|
@ -732,7 +732,7 @@ cdef class Tokenizer:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#from_disk
|
||||
DOCS: https://spacy.io/api/tokenizer#from_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open("rb") as file_:
|
||||
|
@ -746,7 +746,7 @@ cdef class Tokenizer:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized form of the `Tokenizer` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes
|
||||
DOCS: https://spacy.io/api/tokenizer#to_bytes
|
||||
"""
|
||||
serializers = {
|
||||
"vocab": lambda: self.vocab.to_bytes(),
|
||||
|
@ -766,7 +766,7 @@ cdef class Tokenizer:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Tokenizer): The `Tokenizer` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes
|
||||
DOCS: https://spacy.io/api/tokenizer#from_bytes
|
||||
"""
|
||||
data = {}
|
||||
deserializers = {
|
||||
|
|
|
@ -24,8 +24,8 @@ from ..strings import get_string_id
|
|||
cdef class Retokenizer:
|
||||
"""Helper class for doc.retokenize() context manager.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#retokenize
|
||||
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
|
||||
DOCS: https://spacy.io/api/doc#retokenize
|
||||
USAGE: https://spacy.io/usage/linguistic-features#retokenization
|
||||
"""
|
||||
cdef Doc doc
|
||||
cdef list merges
|
||||
|
@ -47,7 +47,7 @@ cdef class Retokenizer:
|
|||
span (Span): The span to merge.
|
||||
attrs (dict): Attributes to set on the merged token.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge
|
||||
DOCS: https://spacy.io/api/doc#retokenizer.merge
|
||||
"""
|
||||
if (span.start, span.end) in self._spans_to_merge:
|
||||
return
|
||||
|
@ -73,7 +73,7 @@ cdef class Retokenizer:
|
|||
attrs (dict): Attributes to set on all split tokens. Attribute names
|
||||
mapped to list of per-token attribute values.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#retokenizer.split
|
||||
DOCS: https://spacy.io/api/doc#retokenizer.split
|
||||
"""
|
||||
if ''.join(orths) != token.text:
|
||||
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))
|
||||
|
|
|
@ -62,7 +62,7 @@ class DocBin:
|
|||
store_user_data (bool): Whether to write the `Doc.user_data` to bytes/file.
|
||||
docs (Iterable[Doc]): Docs to add.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#init
|
||||
DOCS: https://spacy.io/api/docbin#init
|
||||
"""
|
||||
attrs = sorted([intify_attr(attr) for attr in attrs])
|
||||
self.version = "0.1"
|
||||
|
@ -88,7 +88,7 @@ class DocBin:
|
|||
|
||||
doc (Doc): The Doc object to add.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#add
|
||||
DOCS: https://spacy.io/api/docbin#add
|
||||
"""
|
||||
array = doc.to_array(self.attrs)
|
||||
if len(array.shape) == 1:
|
||||
|
@ -122,7 +122,7 @@ class DocBin:
|
|||
vocab (Vocab): The shared vocab.
|
||||
YIELDS (Doc): The Doc objects.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#get_docs
|
||||
DOCS: https://spacy.io/api/docbin#get_docs
|
||||
"""
|
||||
for string in self.strings:
|
||||
vocab[string]
|
||||
|
@ -153,7 +153,7 @@ class DocBin:
|
|||
|
||||
other (DocBin): The DocBin to merge into the current bin.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#merge
|
||||
DOCS: https://spacy.io/api/docbin#merge
|
||||
"""
|
||||
if self.attrs != other.attrs:
|
||||
raise ValueError(
|
||||
|
@ -180,7 +180,7 @@ class DocBin:
|
|||
|
||||
RETURNS (bytes): The serialized DocBin.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#to_bytes
|
||||
DOCS: https://spacy.io/api/docbin#to_bytes
|
||||
"""
|
||||
for tokens in self.tokens:
|
||||
assert len(tokens.shape) == 2, tokens.shape # this should never happen
|
||||
|
@ -208,7 +208,7 @@ class DocBin:
|
|||
bytes_data (bytes): The data to load from.
|
||||
RETURNS (DocBin): The loaded DocBin.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#from_bytes
|
||||
DOCS: https://spacy.io/api/docbin#from_bytes
|
||||
"""
|
||||
try:
|
||||
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
|
||||
|
@ -240,7 +240,7 @@ class DocBin:
|
|||
|
||||
path (str / Path): The file path.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#to_disk
|
||||
DOCS: https://spacy.io/api/docbin#to_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
with path.open("wb") as file_:
|
||||
|
@ -252,7 +252,7 @@ class DocBin:
|
|||
path (str / Path): The file path.
|
||||
RETURNS (DocBin): The loaded DocBin.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/docbin#to_disk
|
||||
DOCS: https://spacy.io/api/docbin#to_disk
|
||||
"""
|
||||
path = ensure_path(path)
|
||||
with path.open("rb") as file_:
|
||||
|
|
|
@ -116,7 +116,7 @@ cdef class Doc:
|
|||
>>> from spacy.tokens import Doc
|
||||
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc
|
||||
DOCS: https://spacy.io/api/doc
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
|
@ -130,8 +130,8 @@ cdef class Doc:
|
|||
method (callable): Optional method for method extension.
|
||||
force (bool): Force overwriting existing attribute.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#set_extension
|
||||
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
DOCS: https://spacy.io/api/doc#set_extension
|
||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
"""
|
||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||
raise ValueError(Errors.E090.format(name=name, obj="Doc"))
|
||||
|
@ -144,7 +144,7 @@ cdef class Doc:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#get_extension
|
||||
DOCS: https://spacy.io/api/doc#get_extension
|
||||
"""
|
||||
return Underscore.doc_extensions.get(name)
|
||||
|
||||
|
@ -155,7 +155,7 @@ cdef class Doc:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (bool): Whether the extension has been registered.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#has_extension
|
||||
DOCS: https://spacy.io/api/doc#has_extension
|
||||
"""
|
||||
return name in Underscore.doc_extensions
|
||||
|
||||
|
@ -167,7 +167,7 @@ cdef class Doc:
|
|||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||
removed extension.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#remove_extension
|
||||
DOCS: https://spacy.io/api/doc#remove_extension
|
||||
"""
|
||||
if not cls.has_extension(name):
|
||||
raise ValueError(Errors.E046.format(name=name))
|
||||
|
@ -219,7 +219,7 @@ cdef class Doc:
|
|||
length as words, as IOB tags to assign as token.ent_iob and
|
||||
token.ent_type. Defaults to None.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#init
|
||||
DOCS: https://spacy.io/api/doc#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
size = max(20, (len(words) if words is not None else 0))
|
||||
|
@ -399,7 +399,7 @@ cdef class Doc:
|
|||
every token in the doc.
|
||||
RETURNS (bool): Whether annotation is present.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#has_annotation
|
||||
DOCS: https://spacy.io/api/doc#has_annotation
|
||||
"""
|
||||
|
||||
# empty docs are always annotated
|
||||
|
@ -450,7 +450,7 @@ cdef class Doc:
|
|||
You can use negative indices and open-ended ranges, which have
|
||||
their normal Python semantics.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#getitem
|
||||
DOCS: https://spacy.io/api/doc#getitem
|
||||
"""
|
||||
if isinstance(i, slice):
|
||||
start, stop = util.normalize_slice(len(self), i.start, i.stop, i.step)
|
||||
|
@ -467,7 +467,7 @@ cdef class Doc:
|
|||
than-Python speeds are required, you can instead access the annotations
|
||||
as a numpy array, or access the underlying C data directly from Cython.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#iter
|
||||
DOCS: https://spacy.io/api/doc#iter
|
||||
"""
|
||||
cdef int i
|
||||
for i in range(self.length):
|
||||
|
@ -478,7 +478,7 @@ cdef class Doc:
|
|||
|
||||
RETURNS (int): The number of tokens in the document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#len
|
||||
DOCS: https://spacy.io/api/doc#len
|
||||
"""
|
||||
return self.length
|
||||
|
||||
|
@ -519,7 +519,7 @@ cdef class Doc:
|
|||
partially covered by the character span). Defaults to "strict".
|
||||
RETURNS (Span): The newly constructed object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#char_span
|
||||
DOCS: https://spacy.io/api/doc#char_span
|
||||
"""
|
||||
if not isinstance(label, int):
|
||||
label = self.vocab.strings.add(label)
|
||||
|
@ -562,7 +562,7 @@ cdef class Doc:
|
|||
`Span`, `Token` and `Lexeme` objects.
|
||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#similarity
|
||||
DOCS: https://spacy.io/api/doc#similarity
|
||||
"""
|
||||
if "similarity" in self.user_hooks:
|
||||
return self.user_hooks["similarity"](self, other)
|
||||
|
@ -595,7 +595,7 @@ cdef class Doc:
|
|||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#has_vector
|
||||
DOCS: https://spacy.io/api/doc#has_vector
|
||||
"""
|
||||
if "has_vector" in self.user_hooks:
|
||||
return self.user_hooks["has_vector"](self)
|
||||
|
@ -613,7 +613,7 @@ cdef class Doc:
|
|||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||
representing the document's semantics.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#vector
|
||||
DOCS: https://spacy.io/api/doc#vector
|
||||
"""
|
||||
def __get__(self):
|
||||
if "vector" in self.user_hooks:
|
||||
|
@ -641,7 +641,7 @@ cdef class Doc:
|
|||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#vector_norm
|
||||
DOCS: https://spacy.io/api/doc#vector_norm
|
||||
"""
|
||||
def __get__(self):
|
||||
if "vector_norm" in self.user_hooks:
|
||||
|
@ -681,7 +681,7 @@ cdef class Doc:
|
|||
|
||||
RETURNS (tuple): Entities in the document, one `Span` per entity.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#ents
|
||||
DOCS: https://spacy.io/api/doc#ents
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef int i
|
||||
|
@ -827,7 +827,7 @@ cdef class Doc:
|
|||
|
||||
YIELDS (Span): Noun chunks in the document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#noun_chunks
|
||||
DOCS: https://spacy.io/api/doc#noun_chunks
|
||||
"""
|
||||
if self.noun_chunks_iterator is None:
|
||||
raise NotImplementedError(Errors.E894.format(lang=self.vocab.lang))
|
||||
|
@ -850,7 +850,7 @@ cdef class Doc:
|
|||
|
||||
YIELDS (Span): Sentences in the document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#sents
|
||||
DOCS: https://spacy.io/api/doc#sents
|
||||
"""
|
||||
if not self.has_annotation("SENT_START"):
|
||||
raise ValueError(Errors.E030)
|
||||
|
@ -959,7 +959,7 @@ cdef class Doc:
|
|||
attr_id (int): The attribute ID to key the counts.
|
||||
RETURNS (dict): A dictionary mapping attributes to integer counts.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#count_by
|
||||
DOCS: https://spacy.io/api/doc#count_by
|
||||
"""
|
||||
cdef int i
|
||||
cdef attr_t attr
|
||||
|
@ -1006,7 +1006,7 @@ cdef class Doc:
|
|||
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
|
||||
RETURNS (Doc): Itself.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#from_array
|
||||
DOCS: https://spacy.io/api/doc#from_array
|
||||
"""
|
||||
# Handle scalar/list inputs of strings/ints for py_attr_ids
|
||||
# See also #3064
|
||||
|
@ -1098,7 +1098,7 @@ cdef class Doc:
|
|||
attrs (list): Optional list of attribute ID ints or attribute name strings.
|
||||
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#from_docs
|
||||
DOCS: https://spacy.io/api/doc#from_docs
|
||||
"""
|
||||
if not docs:
|
||||
return None
|
||||
|
@ -1170,7 +1170,7 @@ cdef class Doc:
|
|||
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
||||
(n, n), where n = len(self).
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix
|
||||
DOCS: https://spacy.io/api/doc#get_lca_matrix
|
||||
"""
|
||||
return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
|
||||
|
||||
|
@ -1203,7 +1203,7 @@ cdef class Doc:
|
|||
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#to_disk
|
||||
DOCS: https://spacy.io/api/doc#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open("wb") as file_:
|
||||
|
@ -1218,7 +1218,7 @@ cdef class Doc:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Doc): The modified `Doc` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#from_disk
|
||||
DOCS: https://spacy.io/api/doc#from_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
with path.open("rb") as file_:
|
||||
|
@ -1232,7 +1232,7 @@ cdef class Doc:
|
|||
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
||||
all annotations.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#to_bytes
|
||||
DOCS: https://spacy.io/api/doc#to_bytes
|
||||
"""
|
||||
return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
|
||||
|
||||
|
@ -1243,7 +1243,7 @@ cdef class Doc:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Doc): Itself.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#from_bytes
|
||||
DOCS: https://spacy.io/api/doc#from_bytes
|
||||
"""
|
||||
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
|
||||
|
||||
|
@ -1254,7 +1254,7 @@ cdef class Doc:
|
|||
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
||||
all annotations.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#to_bytes
|
||||
DOCS: https://spacy.io/api/doc#to_bytes
|
||||
"""
|
||||
array_head = Doc._get_array_attrs()
|
||||
strings = set()
|
||||
|
@ -1302,7 +1302,7 @@ cdef class Doc:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Doc): Itself.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#from_dict
|
||||
DOCS: https://spacy.io/api/doc#from_dict
|
||||
"""
|
||||
if self.length != 0:
|
||||
raise ValueError(Errors.E033.format(length=self.length))
|
||||
|
@ -1373,8 +1373,8 @@ cdef class Doc:
|
|||
retokenization are invalidated, although they may accidentally
|
||||
continue to work.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/doc#retokenize
|
||||
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
|
||||
DOCS: https://spacy.io/api/doc#retokenize
|
||||
USAGE: https://spacy.io/usage/linguistic-features#retokenization
|
||||
"""
|
||||
return Retokenizer(self)
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ from .underscore import Underscore, get_ext_args
|
|||
cdef class Span:
|
||||
"""A slice from a Doc object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span
|
||||
DOCS: https://spacy.io/api/span
|
||||
"""
|
||||
@classmethod
|
||||
def set_extension(cls, name, **kwargs):
|
||||
|
@ -37,8 +37,8 @@ cdef class Span:
|
|||
method (callable): Optional method for method extension.
|
||||
force (bool): Force overwriting existing attribute.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#set_extension
|
||||
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
DOCS: https://spacy.io/api/span#set_extension
|
||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
"""
|
||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||
raise ValueError(Errors.E090.format(name=name, obj="Span"))
|
||||
|
@ -51,7 +51,7 @@ cdef class Span:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#get_extension
|
||||
DOCS: https://spacy.io/api/span#get_extension
|
||||
"""
|
||||
return Underscore.span_extensions.get(name)
|
||||
|
||||
|
@ -62,7 +62,7 @@ cdef class Span:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (bool): Whether the extension has been registered.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#has_extension
|
||||
DOCS: https://spacy.io/api/span#has_extension
|
||||
"""
|
||||
return name in Underscore.span_extensions
|
||||
|
||||
|
@ -74,7 +74,7 @@ cdef class Span:
|
|||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||
removed extension.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#remove_extension
|
||||
DOCS: https://spacy.io/api/span#remove_extension
|
||||
"""
|
||||
if not cls.has_extension(name):
|
||||
raise ValueError(Errors.E046.format(name=name))
|
||||
|
@ -92,7 +92,7 @@ cdef class Span:
|
|||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation
|
||||
of the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#init
|
||||
DOCS: https://spacy.io/api/span#init
|
||||
"""
|
||||
if not (0 <= start <= end <= len(doc)):
|
||||
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
|
||||
|
@ -162,7 +162,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (int): The number of tokens in the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#len
|
||||
DOCS: https://spacy.io/api/span#len
|
||||
"""
|
||||
if self.c.end < self.c.start:
|
||||
return 0
|
||||
|
@ -178,7 +178,7 @@ cdef class Span:
|
|||
the span to get.
|
||||
RETURNS (Token or Span): The token at `span[i]`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#getitem
|
||||
DOCS: https://spacy.io/api/span#getitem
|
||||
"""
|
||||
if isinstance(i, slice):
|
||||
start, end = normalize_slice(len(self), i.start, i.stop, i.step)
|
||||
|
@ -198,7 +198,7 @@ cdef class Span:
|
|||
|
||||
YIELDS (Token): A `Token` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#iter
|
||||
DOCS: https://spacy.io/api/span#iter
|
||||
"""
|
||||
for i in range(self.c.start, self.c.end):
|
||||
yield self.doc[i]
|
||||
|
@ -218,7 +218,7 @@ cdef class Span:
|
|||
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
||||
RETURNS (Doc): The `Doc` copy of the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#as_doc
|
||||
DOCS: https://spacy.io/api/span#as_doc
|
||||
"""
|
||||
words = [t.text for t in self]
|
||||
spaces = [bool(t.whitespace_) for t in self]
|
||||
|
@ -291,7 +291,7 @@ cdef class Span:
|
|||
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
||||
(n, n), where n = len(self).
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#get_lca_matrix
|
||||
DOCS: https://spacy.io/api/span#get_lca_matrix
|
||||
"""
|
||||
return numpy.asarray(_get_lca_matrix(self.doc, self.c.start, self.c.end))
|
||||
|
||||
|
@ -303,7 +303,7 @@ cdef class Span:
|
|||
`Span`, `Token` and `Lexeme` objects.
|
||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#similarity
|
||||
DOCS: https://spacy.io/api/span#similarity
|
||||
"""
|
||||
if "similarity" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["similarity"](self, other)
|
||||
|
@ -385,7 +385,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (tuple): Entities in the span, one `Span` per entity.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#ents
|
||||
DOCS: https://spacy.io/api/span#ents
|
||||
"""
|
||||
cdef Span ent
|
||||
ents = []
|
||||
|
@ -404,7 +404,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#has_vector
|
||||
DOCS: https://spacy.io/api/span#has_vector
|
||||
"""
|
||||
if "has_vector" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["has_vector"](self)
|
||||
|
@ -423,7 +423,7 @@ cdef class Span:
|
|||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||
representing the span's semantics.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#vector
|
||||
DOCS: https://spacy.io/api/span#vector
|
||||
"""
|
||||
if "vector" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["vector"](self)
|
||||
|
@ -437,7 +437,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#vector_norm
|
||||
DOCS: https://spacy.io/api/span#vector_norm
|
||||
"""
|
||||
if "vector_norm" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["vector"](self)
|
||||
|
@ -501,7 +501,7 @@ cdef class Span:
|
|||
|
||||
YIELDS (Span): Noun chunks in the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#noun_chunks
|
||||
DOCS: https://spacy.io/api/span#noun_chunks
|
||||
"""
|
||||
for span in self.doc.noun_chunks:
|
||||
if span.start >= self.start and span.end <= self.end:
|
||||
|
@ -515,7 +515,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (Token): The root token.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#root
|
||||
DOCS: https://spacy.io/api/span#root
|
||||
"""
|
||||
if "root" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["root"](self)
|
||||
|
@ -571,7 +571,7 @@ cdef class Span:
|
|||
|
||||
RETURNS (tuple): A tuple of Token objects.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#lefts
|
||||
DOCS: https://spacy.io/api/span#lefts
|
||||
"""
|
||||
return self.root.conjuncts
|
||||
|
||||
|
@ -582,7 +582,7 @@ cdef class Span:
|
|||
|
||||
YIELDS (Token):A left-child of a token of the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#lefts
|
||||
DOCS: https://spacy.io/api/span#lefts
|
||||
"""
|
||||
for token in reversed(self): # Reverse, so we get tokens in order
|
||||
for left in token.lefts:
|
||||
|
@ -596,7 +596,7 @@ cdef class Span:
|
|||
|
||||
YIELDS (Token): A right-child of a token of the span.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#rights
|
||||
DOCS: https://spacy.io/api/span#rights
|
||||
"""
|
||||
for token in self:
|
||||
for right in token.rights:
|
||||
|
@ -611,7 +611,7 @@ cdef class Span:
|
|||
RETURNS (int): The number of leftward immediate children of the
|
||||
span, in the syntactic dependency parse.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#n_lefts
|
||||
DOCS: https://spacy.io/api/span#n_lefts
|
||||
"""
|
||||
return len(list(self.lefts))
|
||||
|
||||
|
@ -623,7 +623,7 @@ cdef class Span:
|
|||
RETURNS (int): The number of rightward immediate children of the
|
||||
span, in the syntactic dependency parse.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#n_rights
|
||||
DOCS: https://spacy.io/api/span#n_rights
|
||||
"""
|
||||
return len(list(self.rights))
|
||||
|
||||
|
@ -633,7 +633,7 @@ cdef class Span:
|
|||
|
||||
YIELDS (Token): A token within the span, or a descendant from it.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/span#subtree
|
||||
DOCS: https://spacy.io/api/span#subtree
|
||||
"""
|
||||
for word in self.lefts:
|
||||
yield from word.subtree
|
||||
|
|
|
@ -27,7 +27,7 @@ cdef class SpanGroup:
|
|||
>>> doc.spans["errors"] = [doc[0:1], doc[2:4]]
|
||||
>>> assert isinstance(doc.spans["errors"], SpanGroup)
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup
|
||||
DOCS: https://spacy.io/api/spangroup
|
||||
"""
|
||||
def __init__(self, doc, *, name="", attrs={}, spans=[]):
|
||||
"""Create a SpanGroup.
|
||||
|
@ -37,7 +37,7 @@ cdef class SpanGroup:
|
|||
attrs (Dict[str, Any]): Optional JSON-serializable attributes to attach.
|
||||
spans (Iterable[Span]): The spans to add to the group.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#init
|
||||
DOCS: https://spacy.io/api/spangroup#init
|
||||
"""
|
||||
# We need to make this a weak reference, so that the Doc object can
|
||||
# own the SpanGroup without circular references. We do want to get
|
||||
|
@ -56,7 +56,7 @@ cdef class SpanGroup:
|
|||
def doc(self):
|
||||
"""RETURNS (Doc): The reference document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#doc
|
||||
DOCS: https://spacy.io/api/spangroup#doc
|
||||
"""
|
||||
return self._doc_ref()
|
||||
|
||||
|
@ -64,7 +64,7 @@ cdef class SpanGroup:
|
|||
def has_overlap(self):
|
||||
"""RETURNS (bool): Whether the group contains overlapping spans.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#has_overlap
|
||||
DOCS: https://spacy.io/api/spangroup#has_overlap
|
||||
"""
|
||||
if not len(self):
|
||||
return False
|
||||
|
@ -79,7 +79,7 @@ cdef class SpanGroup:
|
|||
def __len__(self):
|
||||
"""RETURNS (int): The number of spans in the group.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#len
|
||||
DOCS: https://spacy.io/api/spangroup#len
|
||||
"""
|
||||
return self.c.size()
|
||||
|
||||
|
@ -89,7 +89,7 @@ cdef class SpanGroup:
|
|||
|
||||
span (Span): The span to append.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#append
|
||||
DOCS: https://spacy.io/api/spangroup#append
|
||||
"""
|
||||
if span.doc is not self.doc:
|
||||
raise ValueError("Cannot add span to group: refers to different Doc.")
|
||||
|
@ -101,7 +101,7 @@ cdef class SpanGroup:
|
|||
|
||||
spans (Iterable[Span]): The spans to add.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#extend
|
||||
DOCS: https://spacy.io/api/spangroup#extend
|
||||
"""
|
||||
cdef Span span
|
||||
for span in spans:
|
||||
|
@ -113,7 +113,7 @@ cdef class SpanGroup:
|
|||
i (int): The item index.
|
||||
RETURNS (Span): The span at the given index.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#getitem
|
||||
DOCS: https://spacy.io/api/spangroup#getitem
|
||||
"""
|
||||
cdef int size = self.c.size()
|
||||
if i < -size or i >= size:
|
||||
|
@ -127,7 +127,7 @@ cdef class SpanGroup:
|
|||
|
||||
RETURNS (bytes): The serialized span group.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#to_bytes
|
||||
DOCS: https://spacy.io/api/spangroup#to_bytes
|
||||
"""
|
||||
output = {"name": self.name, "attrs": self.attrs, "spans": []}
|
||||
for i in range(self.c.size()):
|
||||
|
@ -159,7 +159,7 @@ cdef class SpanGroup:
|
|||
bytes_data (bytes): The span group to load.
|
||||
RETURNS (SpanGroup): The deserialized span group.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/spangroup#from_bytes
|
||||
DOCS: https://spacy.io/api/spangroup#from_bytes
|
||||
"""
|
||||
msg = srsly.msgpack_loads(bytes_data)
|
||||
self.name = msg["name"]
|
||||
|
|
|
@ -27,7 +27,7 @@ cdef class Token:
|
|||
"""An individual token – i.e. a word, punctuation symbol, whitespace,
|
||||
etc.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token
|
||||
DOCS: https://spacy.io/api/token
|
||||
"""
|
||||
@classmethod
|
||||
def set_extension(cls, name, **kwargs):
|
||||
|
@ -40,8 +40,8 @@ cdef class Token:
|
|||
method (callable): Optional method for method extension.
|
||||
force (bool): Force overwriting existing attribute.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#set_extension
|
||||
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
DOCS: https://spacy.io/api/token#set_extension
|
||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||
"""
|
||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||
raise ValueError(Errors.E090.format(name=name, obj="Token"))
|
||||
|
@ -54,7 +54,7 @@ cdef class Token:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#get_extension
|
||||
DOCS: https://spacy.io/api/token#get_extension
|
||||
"""
|
||||
return Underscore.token_extensions.get(name)
|
||||
|
||||
|
@ -65,7 +65,7 @@ cdef class Token:
|
|||
name (str): Name of the extension.
|
||||
RETURNS (bool): Whether the extension has been registered.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#has_extension
|
||||
DOCS: https://spacy.io/api/token#has_extension
|
||||
"""
|
||||
return name in Underscore.token_extensions
|
||||
|
||||
|
@ -77,7 +77,7 @@ cdef class Token:
|
|||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||
removed extension.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#remove_extension
|
||||
DOCS: https://spacy.io/api/token#remove_extension
|
||||
"""
|
||||
if not cls.has_extension(name):
|
||||
raise ValueError(Errors.E046.format(name=name))
|
||||
|
@ -90,7 +90,7 @@ cdef class Token:
|
|||
doc (Doc): The parent document.
|
||||
offset (int): The index of the token within the document.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#init
|
||||
DOCS: https://spacy.io/api/token#init
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.doc = doc
|
||||
|
@ -105,7 +105,7 @@ cdef class Token:
|
|||
|
||||
RETURNS (int): The number of unicode characters in the token.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#len
|
||||
DOCS: https://spacy.io/api/token#len
|
||||
"""
|
||||
return self.c.lex.length
|
||||
|
||||
|
@ -168,7 +168,7 @@ cdef class Token:
|
|||
flag_id (int): The ID of the flag attribute.
|
||||
RETURNS (bool): Whether the flag is set.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#check_flag
|
||||
DOCS: https://spacy.io/api/token#check_flag
|
||||
"""
|
||||
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
||||
|
||||
|
@ -178,7 +178,7 @@ cdef class Token:
|
|||
i (int): The relative position of the token to get. Defaults to 1.
|
||||
RETURNS (Token): The token at position `self.doc[self.i+i]`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#nbor
|
||||
DOCS: https://spacy.io/api/token#nbor
|
||||
"""
|
||||
if self.i+i < 0 or (self.i+i >= len(self.doc)):
|
||||
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
|
||||
|
@ -192,7 +192,7 @@ cdef class Token:
|
|||
`Span`, `Token` and `Lexeme` objects.
|
||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#similarity
|
||||
DOCS: https://spacy.io/api/token#similarity
|
||||
"""
|
||||
if "similarity" in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks["similarity"](self, other)
|
||||
|
@ -388,7 +388,7 @@ cdef class Token:
|
|||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#has_vector
|
||||
DOCS: https://spacy.io/api/token#has_vector
|
||||
"""
|
||||
if "has_vector" in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks["has_vector"](self)
|
||||
|
@ -403,7 +403,7 @@ cdef class Token:
|
|||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||
representing the token's semantics.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#vector
|
||||
DOCS: https://spacy.io/api/token#vector
|
||||
"""
|
||||
if "vector" in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks["vector"](self)
|
||||
|
@ -418,7 +418,7 @@ cdef class Token:
|
|||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#vector_norm
|
||||
DOCS: https://spacy.io/api/token#vector_norm
|
||||
"""
|
||||
if "vector_norm" in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks["vector_norm"](self)
|
||||
|
@ -441,7 +441,7 @@ cdef class Token:
|
|||
RETURNS (int): The number of leftward immediate children of the
|
||||
word, in the syntactic dependency parse.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#n_lefts
|
||||
DOCS: https://spacy.io/api/token#n_lefts
|
||||
"""
|
||||
return self.c.l_kids
|
||||
|
||||
|
@ -453,7 +453,7 @@ cdef class Token:
|
|||
RETURNS (int): The number of rightward immediate children of the
|
||||
word, in the syntactic dependency parse.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#n_rights
|
||||
DOCS: https://spacy.io/api/token#n_rights
|
||||
"""
|
||||
return self.c.r_kids
|
||||
|
||||
|
@ -485,7 +485,7 @@ cdef class Token:
|
|||
RETURNS (bool / None): Whether the token starts a sentence.
|
||||
None if unknown.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#is_sent_start
|
||||
DOCS: https://spacy.io/api/token#is_sent_start
|
||||
"""
|
||||
def __get__(self):
|
||||
if self.c.sent_start == 0:
|
||||
|
@ -514,7 +514,7 @@ cdef class Token:
|
|||
RETURNS (bool / None): Whether the token ends a sentence.
|
||||
None if unknown.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#is_sent_end
|
||||
DOCS: https://spacy.io/api/token#is_sent_end
|
||||
"""
|
||||
def __get__(self):
|
||||
if self.i + 1 == len(self.doc):
|
||||
|
@ -536,7 +536,7 @@ cdef class Token:
|
|||
|
||||
YIELDS (Token): A left-child of the token.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#lefts
|
||||
DOCS: https://spacy.io/api/token#lefts
|
||||
"""
|
||||
cdef int nr_iter = 0
|
||||
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
|
||||
|
@ -556,7 +556,7 @@ cdef class Token:
|
|||
|
||||
YIELDS (Token): A right-child of the token.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#rights
|
||||
DOCS: https://spacy.io/api/token#rights
|
||||
"""
|
||||
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
|
||||
tokens = []
|
||||
|
@ -578,7 +578,7 @@ cdef class Token:
|
|||
|
||||
YIELDS (Token): A child token such that `child.head==self`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#children
|
||||
DOCS: https://spacy.io/api/token#children
|
||||
"""
|
||||
yield from self.lefts
|
||||
yield from self.rights
|
||||
|
@ -591,7 +591,7 @@ cdef class Token:
|
|||
YIELDS (Token): A descendent token such that
|
||||
`self.is_ancestor(descendent) or token == self`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#subtree
|
||||
DOCS: https://spacy.io/api/token#subtree
|
||||
"""
|
||||
for word in self.lefts:
|
||||
yield from word.subtree
|
||||
|
@ -622,7 +622,7 @@ cdef class Token:
|
|||
YIELDS (Token): A sequence of ancestor tokens such that
|
||||
`ancestor.is_ancestor(self)`.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#ancestors
|
||||
DOCS: https://spacy.io/api/token#ancestors
|
||||
"""
|
||||
cdef const TokenC* head_ptr = self.c
|
||||
# Guard against infinite loop, no token can have
|
||||
|
@ -640,7 +640,7 @@ cdef class Token:
|
|||
descendant (Token): Another token.
|
||||
RETURNS (bool): Whether this token is the ancestor of the descendant.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#is_ancestor
|
||||
DOCS: https://spacy.io/api/token#is_ancestor
|
||||
"""
|
||||
if self.doc is not descendant.doc:
|
||||
return False
|
||||
|
@ -655,8 +655,8 @@ cdef class Token:
|
|||
return not Token.missing_head(self.c)
|
||||
|
||||
property head:
|
||||
"""The syntactic parent, or "governor", of this token.
|
||||
If token.has_head() is `False`, this method will return itself.
|
||||
"""The syntactic parent, or "governor", of this token.
|
||||
If token.has_head() is `False`, this method will return itself.
|
||||
|
||||
RETURNS (Token): The token predicted by the parser to be the head of
|
||||
the current token.
|
||||
|
@ -696,7 +696,7 @@ cdef class Token:
|
|||
|
||||
RETURNS (tuple): The coordinated tokens.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/token#conjuncts
|
||||
DOCS: https://spacy.io/api/token#conjuncts
|
||||
"""
|
||||
cdef Token word, child
|
||||
if "conjuncts" in self.doc.user_token_hooks:
|
||||
|
|
|
@ -97,7 +97,7 @@ class Corpus:
|
|||
augment (Callable[Example, Iterable[Example]]): Optional data augmentation
|
||||
function, to extrapolate additional examples from your annotations.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/corpus
|
||||
DOCS: https://spacy.io/api/corpus
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -121,7 +121,7 @@ class Corpus:
|
|||
nlp (Language): The current nlp object.
|
||||
YIELDS (Example): The examples.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/corpus#call
|
||||
DOCS: https://spacy.io/api/corpus#call
|
||||
"""
|
||||
ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE))
|
||||
if self.gold_preproc:
|
||||
|
@ -206,7 +206,7 @@ class JsonlCorpus:
|
|||
limit (int): Limit corpus to a subset of examples, e.g. for debugging.
|
||||
Defaults to 0, which indicates no limit.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus
|
||||
DOCS: https://spacy.io/api/corpus#jsonlcorpus
|
||||
"""
|
||||
|
||||
file_type = "jsonl"
|
||||
|
@ -230,7 +230,7 @@ class JsonlCorpus:
|
|||
nlp (Language): The current nlp object.
|
||||
YIELDS (Example): The example objects.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/corpus#jsonlcorpus-call
|
||||
DOCS: https://spacy.io/api/corpus#jsonlcorpus-call
|
||||
"""
|
||||
for loc in walk_corpus(self.path, ".jsonl"):
|
||||
records = srsly.read_jsonl(loc)
|
||||
|
|
|
@ -44,7 +44,7 @@ cdef class Vectors:
|
|||
the table need to be assigned - so len(list(vectors.keys())) may be
|
||||
greater or smaller than vectors.shape[0].
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors
|
||||
DOCS: https://spacy.io/api/vectors
|
||||
"""
|
||||
cdef public object name
|
||||
cdef public object data
|
||||
|
@ -59,7 +59,7 @@ cdef class Vectors:
|
|||
keys (iterable): A sequence of keys, aligned with the data.
|
||||
name (str): A name to identify the vectors table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#init
|
||||
DOCS: https://spacy.io/api/vectors#init
|
||||
"""
|
||||
self.name = name
|
||||
if data is None:
|
||||
|
@ -83,7 +83,7 @@ cdef class Vectors:
|
|||
|
||||
RETURNS (tuple): A `(rows, dims)` pair.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#shape
|
||||
DOCS: https://spacy.io/api/vectors#shape
|
||||
"""
|
||||
return self.data.shape
|
||||
|
||||
|
@ -93,7 +93,7 @@ cdef class Vectors:
|
|||
|
||||
RETURNS (int): The vector size.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#size
|
||||
DOCS: https://spacy.io/api/vectors#size
|
||||
"""
|
||||
return self.data.shape[0] * self.data.shape[1]
|
||||
|
||||
|
@ -103,7 +103,7 @@ cdef class Vectors:
|
|||
|
||||
RETURNS (bool): `True` if no slots are available for new keys.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#is_full
|
||||
DOCS: https://spacy.io/api/vectors#is_full
|
||||
"""
|
||||
return self._unset.size() == 0
|
||||
|
||||
|
@ -114,7 +114,7 @@ cdef class Vectors:
|
|||
|
||||
RETURNS (int): The number of keys in the table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#n_keys
|
||||
DOCS: https://spacy.io/api/vectors#n_keys
|
||||
"""
|
||||
return len(self.key2row)
|
||||
|
||||
|
@ -127,7 +127,7 @@ cdef class Vectors:
|
|||
key (int): The key to get the vector for.
|
||||
RETURNS (ndarray): The vector for the key.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#getitem
|
||||
DOCS: https://spacy.io/api/vectors#getitem
|
||||
"""
|
||||
i = self.key2row[key]
|
||||
if i is None:
|
||||
|
@ -141,7 +141,7 @@ cdef class Vectors:
|
|||
key (int): The key to set the vector for.
|
||||
vector (ndarray): The vector to set.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#setitem
|
||||
DOCS: https://spacy.io/api/vectors#setitem
|
||||
"""
|
||||
i = self.key2row[key]
|
||||
self.data[i] = vector
|
||||
|
@ -153,7 +153,7 @@ cdef class Vectors:
|
|||
|
||||
YIELDS (int): A key in the table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#iter
|
||||
DOCS: https://spacy.io/api/vectors#iter
|
||||
"""
|
||||
yield from self.key2row
|
||||
|
||||
|
@ -162,7 +162,7 @@ cdef class Vectors:
|
|||
|
||||
RETURNS (int): The number of vectors in the data.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#len
|
||||
DOCS: https://spacy.io/api/vectors#len
|
||||
"""
|
||||
return self.data.shape[0]
|
||||
|
||||
|
@ -172,7 +172,7 @@ cdef class Vectors:
|
|||
key (int): The key to check.
|
||||
RETURNS (bool): Whether the key has a vector entry.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#contains
|
||||
DOCS: https://spacy.io/api/vectors#contains
|
||||
"""
|
||||
return key in self.key2row
|
||||
|
||||
|
@ -189,7 +189,7 @@ cdef class Vectors:
|
|||
inplace (bool): Reallocate the memory.
|
||||
RETURNS (list): The removed items as a list of `(key, row)` tuples.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#resize
|
||||
DOCS: https://spacy.io/api/vectors#resize
|
||||
"""
|
||||
xp = get_array_module(self.data)
|
||||
if inplace:
|
||||
|
@ -224,7 +224,7 @@ cdef class Vectors:
|
|||
|
||||
YIELDS (ndarray): A vector in the table.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#values
|
||||
DOCS: https://spacy.io/api/vectors#values
|
||||
"""
|
||||
for row, vector in enumerate(range(self.data.shape[0])):
|
||||
if not self._unset.count(row):
|
||||
|
@ -235,7 +235,7 @@ cdef class Vectors:
|
|||
|
||||
YIELDS (tuple): A key/vector pair.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#items
|
||||
DOCS: https://spacy.io/api/vectors#items
|
||||
"""
|
||||
for key, row in self.key2row.items():
|
||||
yield key, self.data[row]
|
||||
|
@ -281,7 +281,7 @@ cdef class Vectors:
|
|||
row (int / None): The row number of a vector to map the key to.
|
||||
RETURNS (int): The row the vector was added to.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#add
|
||||
DOCS: https://spacy.io/api/vectors#add
|
||||
"""
|
||||
# use int for all keys and rows in key2row for more efficient access
|
||||
# and serialization
|
||||
|
@ -368,7 +368,7 @@ cdef class Vectors:
|
|||
path (str / Path): A path to a directory, which will be created if
|
||||
it doesn't exists.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#to_disk
|
||||
DOCS: https://spacy.io/api/vectors#to_disk
|
||||
"""
|
||||
xp = get_array_module(self.data)
|
||||
if xp is numpy:
|
||||
|
@ -396,7 +396,7 @@ cdef class Vectors:
|
|||
path (str / Path): Directory path, string or Path-like object.
|
||||
RETURNS (Vectors): The modified object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#from_disk
|
||||
DOCS: https://spacy.io/api/vectors#from_disk
|
||||
"""
|
||||
def load_key2row(path):
|
||||
if path.exists():
|
||||
|
@ -432,7 +432,7 @@ cdef class Vectors:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized form of the `Vectors` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#to_bytes
|
||||
DOCS: https://spacy.io/api/vectors#to_bytes
|
||||
"""
|
||||
def serialize_weights():
|
||||
if hasattr(self.data, "to_bytes"):
|
||||
|
@ -453,7 +453,7 @@ cdef class Vectors:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vectors): The `Vectors` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vectors#from_bytes
|
||||
DOCS: https://spacy.io/api/vectors#from_bytes
|
||||
"""
|
||||
def deserialize_weights(b):
|
||||
if hasattr(self.data, "from_bytes"):
|
||||
|
|
|
@ -47,7 +47,7 @@ cdef class Vocab:
|
|||
instance also provides access to the `StringStore`, and owns underlying
|
||||
C-data that is shared between `Doc` objects.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab
|
||||
DOCS: https://spacy.io/api/vocab
|
||||
"""
|
||||
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
|
||||
oov_prob=-20., vectors_name=None, writing_system={},
|
||||
|
@ -110,7 +110,7 @@ cdef class Vocab:
|
|||
available bit will be chosen.
|
||||
RETURNS (int): The integer ID by which the flag value can be checked.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#add_flag
|
||||
DOCS: https://spacy.io/api/vocab#add_flag
|
||||
"""
|
||||
if flag_id == -1:
|
||||
for bit in range(1, 64):
|
||||
|
@ -202,7 +202,7 @@ cdef class Vocab:
|
|||
string (unicode): The ID string.
|
||||
RETURNS (bool) Whether the string has an entry in the vocabulary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#contains
|
||||
DOCS: https://spacy.io/api/vocab#contains
|
||||
"""
|
||||
cdef hash_t int_key
|
||||
if isinstance(key, bytes):
|
||||
|
@ -219,7 +219,7 @@ cdef class Vocab:
|
|||
|
||||
YIELDS (Lexeme): An entry in the vocabulary.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#iter
|
||||
DOCS: https://spacy.io/api/vocab#iter
|
||||
"""
|
||||
cdef attr_t key
|
||||
cdef size_t addr
|
||||
|
@ -242,7 +242,7 @@ cdef class Vocab:
|
|||
>>> apple = nlp.vocab.strings["apple"]
|
||||
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#getitem
|
||||
DOCS: https://spacy.io/api/vocab#getitem
|
||||
"""
|
||||
cdef attr_t orth
|
||||
if isinstance(id_or_string, unicode):
|
||||
|
@ -310,7 +310,7 @@ cdef class Vocab:
|
|||
word was mapped to, and `score` the similarity score between the
|
||||
two words.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#prune_vectors
|
||||
DOCS: https://spacy.io/api/vocab#prune_vectors
|
||||
"""
|
||||
xp = get_array_module(self.vectors.data)
|
||||
# Make sure all vectors are in the vocab
|
||||
|
@ -353,7 +353,7 @@ cdef class Vocab:
|
|||
and shape determined by the `vocab.vectors` instance. Usually, a
|
||||
numpy ndarray of shape (300,) and dtype float32.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#get_vector
|
||||
DOCS: https://spacy.io/api/vocab#get_vector
|
||||
"""
|
||||
if isinstance(orth, str):
|
||||
orth = self.strings.add(orth)
|
||||
|
@ -400,7 +400,7 @@ cdef class Vocab:
|
|||
orth (int / unicode): The word.
|
||||
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#set_vector
|
||||
DOCS: https://spacy.io/api/vocab#set_vector
|
||||
"""
|
||||
if isinstance(orth, str):
|
||||
orth = self.strings.add(orth)
|
||||
|
@ -422,7 +422,7 @@ cdef class Vocab:
|
|||
orth (int / unicode): The word.
|
||||
RETURNS (bool): Whether the word has a vector.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#has_vector
|
||||
DOCS: https://spacy.io/api/vocab#has_vector
|
||||
"""
|
||||
if isinstance(orth, str):
|
||||
orth = self.strings.add(orth)
|
||||
|
@ -448,7 +448,7 @@ cdef class Vocab:
|
|||
it doesn't exist.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||
DOCS: https://spacy.io/api/vocab#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
if not path.exists():
|
||||
|
@ -469,7 +469,7 @@ cdef class Vocab:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vocab): The modified `Vocab` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||
DOCS: https://spacy.io/api/vocab#to_disk
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
getters = ["strings", "vectors"]
|
||||
|
@ -494,7 +494,7 @@ cdef class Vocab:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (bytes): The serialized form of the `Vocab` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
|
||||
DOCS: https://spacy.io/api/vocab#to_bytes
|
||||
"""
|
||||
def deserialize_vectors():
|
||||
if self.vectors is None:
|
||||
|
@ -516,7 +516,7 @@ cdef class Vocab:
|
|||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vocab): The `Vocab` object.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
|
||||
DOCS: https://spacy.io/api/vocab#from_bytes
|
||||
"""
|
||||
def serialize_vectors(b):
|
||||
if self.vectors is None:
|
||||
|
|
Loading…
Reference in New Issue
Block a user