mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Update docs links in codebase
This commit is contained in:
parent
2189046869
commit
ab1bb421ed
|
@ -25,7 +25,7 @@ COMMAND = "python -m spacy"
|
||||||
NAME = "spacy"
|
NAME = "spacy"
|
||||||
HELP = """spaCy Command-line Interface
|
HELP = """spaCy Command-line Interface
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/cli
|
DOCS: https://nightly.spacy.io/api/cli
|
||||||
"""
|
"""
|
||||||
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
|
PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
|
||||||
You'd typically start by cloning a project template to a local directory and
|
You'd typically start by cloning a project template to a local directory and
|
||||||
|
|
|
@ -61,6 +61,8 @@ def convert_cli(
|
||||||
If no output_dir is specified and the output format is JSON, the data
|
If no output_dir is specified and the output format is JSON, the data
|
||||||
is written to stdout, so you can pipe them forward to a JSON file:
|
is written to stdout, so you can pipe them forward to a JSON file:
|
||||||
$ spacy convert some_file.conllu --file-type json > some_file.json
|
$ spacy convert some_file.conllu --file-type json > some_file.json
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#convert
|
||||||
"""
|
"""
|
||||||
if isinstance(file_type, FileTypes):
|
if isinstance(file_type, FileTypes):
|
||||||
# We get an instance of the FileTypes from the CLI so we need its string value
|
# We get an instance of the FileTypes from the CLI so we need its string value
|
||||||
|
@ -261,6 +263,6 @@ def _get_converter(msg, converter, input_path):
|
||||||
msg.warn(
|
msg.warn(
|
||||||
"Can't automatically detect NER format. "
|
"Can't automatically detect NER format. "
|
||||||
"Conversion may not succeed. "
|
"Conversion may not succeed. "
|
||||||
"See https://spacy.io/api/cli#convert"
|
"See https://nightly.spacy.io/api/cli#convert"
|
||||||
)
|
)
|
||||||
return converter
|
return converter
|
||||||
|
|
|
@ -31,6 +31,8 @@ def debug_config_cli(
|
||||||
Similar as with the 'train' command, you can override settings from the config
|
Similar as with the 'train' command, you can override settings from the config
|
||||||
as command line options. For instance, --training.batch_size 128 overrides
|
as command line options. For instance, --training.batch_size 128 overrides
|
||||||
the value of "batch_size" in the block "[training]".
|
the value of "batch_size" in the block "[training]".
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#debug-config
|
||||||
"""
|
"""
|
||||||
overrides = parse_config_overrides(ctx.args)
|
overrides = parse_config_overrides(ctx.args)
|
||||||
import_code(code_path)
|
import_code(code_path)
|
||||||
|
|
|
@ -47,6 +47,8 @@ def debug_data_cli(
|
||||||
Analyze, debug and validate your training and development data. Outputs
|
Analyze, debug and validate your training and development data. Outputs
|
||||||
useful stats, and can help you find problems like invalid entity annotations,
|
useful stats, and can help you find problems like invalid entity annotations,
|
||||||
cyclic dependencies, low data labels and more.
|
cyclic dependencies, low data labels and more.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#debug-data
|
||||||
"""
|
"""
|
||||||
if ctx.command.name == "debug-data":
|
if ctx.command.name == "debug-data":
|
||||||
msg.warn(
|
msg.warn(
|
||||||
|
|
|
@ -30,6 +30,8 @@ def debug_model_cli(
|
||||||
"""
|
"""
|
||||||
Analyze a Thinc model implementation. Includes checks for internal structure
|
Analyze a Thinc model implementation. Includes checks for internal structure
|
||||||
and activations during training.
|
and activations during training.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#debug-model
|
||||||
"""
|
"""
|
||||||
if use_gpu >= 0:
|
if use_gpu >= 0:
|
||||||
msg.info("Using GPU")
|
msg.info("Using GPU")
|
||||||
|
|
|
@ -28,7 +28,7 @@ def download_cli(
|
||||||
additional arguments provided to this command will be passed to `pip install`
|
additional arguments provided to this command will be passed to `pip install`
|
||||||
on package installation.
|
on package installation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/cli#download
|
DOCS: https://nightly.spacy.io/api/cli#download
|
||||||
AVAILABLE PACKAGES: https://spacy.io/models
|
AVAILABLE PACKAGES: https://spacy.io/models
|
||||||
"""
|
"""
|
||||||
download(model, direct, *ctx.args)
|
download(model, direct, *ctx.args)
|
||||||
|
@ -77,7 +77,7 @@ def get_compatibility() -> dict:
|
||||||
f"Couldn't fetch compatibility table. Please find a package for your spaCy "
|
f"Couldn't fetch compatibility table. Please find a package for your spaCy "
|
||||||
f"installation (v{about.__version__}), and download it manually. "
|
f"installation (v{about.__version__}), and download it manually. "
|
||||||
f"For more details, see the documentation: "
|
f"For more details, see the documentation: "
|
||||||
f"https://spacy.io/usage/models",
|
f"https://nightly.spacy.io/usage/models",
|
||||||
exits=1,
|
exits=1,
|
||||||
)
|
)
|
||||||
comp_table = r.json()
|
comp_table = r.json()
|
||||||
|
|
|
@ -27,12 +27,15 @@ def evaluate_cli(
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
|
Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
|
||||||
data in the binary .spacy format. The --gold-preproc option sets up the evaluation
|
data in the binary .spacy format. The --gold-preproc option sets up the
|
||||||
examples with gold-standard sentences and tokens for the predictions. Gold
|
evaluation examples with gold-standard sentences and tokens for the
|
||||||
preprocessing helps the annotations align to the tokenization, and may
|
predictions. Gold preprocessing helps the annotations align to the
|
||||||
result in sequences of more consistent length. However, it may reduce
|
tokenization, and may result in sequences of more consistent length. However,
|
||||||
runtime accuracy due to train/test skew. To render a sample of dependency
|
it may reduce runtime accuracy due to train/test skew. To render a sample of
|
||||||
parses in a HTML file, set as output directory as the displacy_path argument.
|
dependency parses in a HTML file, set as output directory as the
|
||||||
|
displacy_path argument.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#evaluate
|
||||||
"""
|
"""
|
||||||
evaluate(
|
evaluate(
|
||||||
model,
|
model,
|
||||||
|
|
|
@ -21,6 +21,8 @@ def info_cli(
|
||||||
Print info about spaCy installation. If a pipeline is speficied as an argument,
|
Print info about spaCy installation. If a pipeline is speficied as an argument,
|
||||||
print its meta information. Flag --markdown prints details in Markdown for easy
|
print its meta information. Flag --markdown prints details in Markdown for easy
|
||||||
copy-pasting to GitHub issues.
|
copy-pasting to GitHub issues.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#info
|
||||||
"""
|
"""
|
||||||
info(model, markdown=markdown, silent=silent)
|
info(model, markdown=markdown, silent=silent)
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,8 @@ def init_config_cli(
|
||||||
specified via the CLI arguments, this command generates a config with the
|
specified via the CLI arguments, this command generates a config with the
|
||||||
optimal settings for you use case. This includes the choice of architecture,
|
optimal settings for you use case. This includes the choice of architecture,
|
||||||
pretrained weights and related hyperparameters.
|
pretrained weights and related hyperparameters.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#init-config
|
||||||
"""
|
"""
|
||||||
if isinstance(optimize, Optimizations): # instance of enum from the CLI
|
if isinstance(optimize, Optimizations): # instance of enum from the CLI
|
||||||
optimize = optimize.value
|
optimize = optimize.value
|
||||||
|
@ -59,6 +61,8 @@ def init_fill_config_cli(
|
||||||
functions for their default values and update the base config. This command
|
functions for their default values and update the base config. This command
|
||||||
can be used with a config generated via the training quickstart widget:
|
can be used with a config generated via the training quickstart widget:
|
||||||
https://nightly.spacy.io/usage/training#quickstart
|
https://nightly.spacy.io/usage/training#quickstart
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#init-fill-config
|
||||||
"""
|
"""
|
||||||
fill_config(output_file, base_path, pretraining=pretraining, diff=diff)
|
fill_config(output_file, base_path, pretraining=pretraining, diff=diff)
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ except ImportError:
|
||||||
DEFAULT_OOV_PROB = -20
|
DEFAULT_OOV_PROB = -20
|
||||||
|
|
||||||
|
|
||||||
@init_cli.command("vectors")
|
@init_cli.command("vocab")
|
||||||
@app.command(
|
@app.command(
|
||||||
"init-model",
|
"init-model",
|
||||||
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
||||||
|
@ -54,6 +54,8 @@ def init_model_cli(
|
||||||
Create a new blank pipeline directory with vocab and vectors from raw data.
|
Create a new blank pipeline directory with vocab and vectors from raw data.
|
||||||
If vectors are provided in Word2Vec format, they can be either a .txt or
|
If vectors are provided in Word2Vec format, they can be either a .txt or
|
||||||
zipped as a .zip or .tar.gz.
|
zipped as a .zip or .tar.gz.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#init-vocab
|
||||||
"""
|
"""
|
||||||
if ctx.command.name == "init-model":
|
if ctx.command.name == "init-model":
|
||||||
msg.warn(
|
msg.warn(
|
||||||
|
|
|
@ -31,6 +31,8 @@ def package_cli(
|
||||||
the existing values will be used as the defaults in the command-line prompt.
|
the existing values will be used as the defaults in the command-line prompt.
|
||||||
After packaging, "python setup.py sdist" is run in the package directory,
|
After packaging, "python setup.py sdist" is run in the package directory,
|
||||||
which will create a .tar.gz archive that can be installed via "pip install".
|
which will create a .tar.gz archive that can be installed via "pip install".
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#package
|
||||||
"""
|
"""
|
||||||
package(
|
package(
|
||||||
input_dir,
|
input_dir,
|
||||||
|
|
|
@ -57,6 +57,8 @@ def pretrain_cli(
|
||||||
To load the weights back in during 'spacy train', you need to ensure
|
To load the weights back in during 'spacy train', you need to ensure
|
||||||
all settings are the same between pretraining and training. Ideally,
|
all settings are the same between pretraining and training. Ideally,
|
||||||
this is done by using the same config file for both commands.
|
this is done by using the same config file for both commands.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#pretrain
|
||||||
"""
|
"""
|
||||||
overrides = parse_config_overrides(ctx.args)
|
overrides = parse_config_overrides(ctx.args)
|
||||||
import_code(code_path)
|
import_code(code_path)
|
||||||
|
|
|
@ -29,6 +29,8 @@ def profile_cli(
|
||||||
Input should be formatted as one JSON object per line with a key "text".
|
Input should be formatted as one JSON object per line with a key "text".
|
||||||
It can either be provided as a JSONL file, or be read from sys.sytdin.
|
It can either be provided as a JSONL file, or be read from sys.sytdin.
|
||||||
If no input file is specified, the IMDB dataset is loaded via Thinc.
|
If no input file is specified, the IMDB dataset is loaded via Thinc.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#debug-profile
|
||||||
"""
|
"""
|
||||||
if ctx.parent.command.name == NAME: # called as top-level command
|
if ctx.parent.command.name == NAME: # called as top-level command
|
||||||
msg.warn(
|
msg.warn(
|
||||||
|
|
|
@ -20,6 +20,8 @@ def project_assets_cli(
|
||||||
defined in the "assets" section of the project.yml. If a checksum is
|
defined in the "assets" section of the project.yml. If a checksum is
|
||||||
provided in the project.yml, the file is only downloaded if no local file
|
provided in the project.yml, the file is only downloaded if no local file
|
||||||
with the same checksum exists.
|
with the same checksum exists.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-assets
|
||||||
"""
|
"""
|
||||||
project_assets(project_dir)
|
project_assets(project_dir)
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ def project_clone_cli(
|
||||||
only download the files from the given subdirectory. The GitHub repo
|
only download the files from the given subdirectory. The GitHub repo
|
||||||
defaults to the official spaCy template repo, but can be customized
|
defaults to the official spaCy template repo, but can be customized
|
||||||
(including using a private repo).
|
(including using a private repo).
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-clone
|
||||||
"""
|
"""
|
||||||
if dest is None:
|
if dest is None:
|
||||||
dest = Path.cwd() / name
|
dest = Path.cwd() / name
|
||||||
|
|
|
@ -43,6 +43,8 @@ def project_document_cli(
|
||||||
hidden markers are added so you can add custom content before or after the
|
hidden markers are added so you can add custom content before or after the
|
||||||
auto-generated section and only the auto-generated docs will be replaced
|
auto-generated section and only the auto-generated docs will be replaced
|
||||||
when you re-run the command.
|
when you re-run the command.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-document
|
||||||
"""
|
"""
|
||||||
project_document(project_dir, output_file, no_emoji=no_emoji)
|
project_document(project_dir, output_file, no_emoji=no_emoji)
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,10 @@ def project_update_dvc_cli(
|
||||||
"""Auto-generate Data Version Control (DVC) config. A DVC
|
"""Auto-generate Data Version Control (DVC) config. A DVC
|
||||||
project can only define one pipeline, so you need to specify one workflow
|
project can only define one pipeline, so you need to specify one workflow
|
||||||
defined in the project.yml. If no workflow is specified, the first defined
|
defined in the project.yml. If no workflow is specified, the first defined
|
||||||
workflow is used. The DVC config will only be updated if the project.yml changed.
|
workflow is used. The DVC config will only be updated if the project.yml
|
||||||
|
changed.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-dvc
|
||||||
"""
|
"""
|
||||||
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
|
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,9 @@ def project_pull_cli(
|
||||||
"""Retrieve available precomputed outputs from a remote storage.
|
"""Retrieve available precomputed outputs from a remote storage.
|
||||||
You can alias remotes in your project.yml by mapping them to storage paths.
|
You can alias remotes in your project.yml by mapping them to storage paths.
|
||||||
A storage can be anything that the smart-open library can upload to, e.g.
|
A storage can be anything that the smart-open library can upload to, e.g.
|
||||||
gcs, aws, ssh, local directories etc
|
AWS, Google Cloud Storage, SSH, local directories etc.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-pull
|
||||||
"""
|
"""
|
||||||
for url, output_path in project_pull(project_dir, remote):
|
for url, output_path in project_pull(project_dir, remote):
|
||||||
if url is not None:
|
if url is not None:
|
||||||
|
|
|
@ -13,9 +13,12 @@ def project_push_cli(
|
||||||
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
|
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""Persist outputs to a remote storage. You can alias remotes in your project.yml
|
"""Persist outputs to a remote storage. You can alias remotes in your
|
||||||
by mapping them to storage paths. A storage can be anything that the smart-open
|
project.yml by mapping them to storage paths. A storage can be anything that
|
||||||
library can upload to, e.g. gcs, aws, ssh, local directories etc
|
the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
|
||||||
|
local directories etc.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-push
|
||||||
"""
|
"""
|
||||||
for output_path, url in project_push(project_dir, remote):
|
for output_path, url in project_push(project_dir, remote):
|
||||||
if url is None:
|
if url is None:
|
||||||
|
|
|
@ -24,6 +24,8 @@ def project_run_cli(
|
||||||
name is specified, all commands in the workflow are run, in order. If
|
name is specified, all commands in the workflow are run, in order. If
|
||||||
commands define dependencies and/or outputs, they will only be re-run if
|
commands define dependencies and/or outputs, they will only be re-run if
|
||||||
state has changed.
|
state has changed.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#project-run
|
||||||
"""
|
"""
|
||||||
if show_help or not subcommand:
|
if show_help or not subcommand:
|
||||||
print_run_help(project_dir, subcommand)
|
print_run_help(project_dir, subcommand)
|
||||||
|
|
|
@ -44,6 +44,8 @@ def train_cli(
|
||||||
lets you pass in a Python file that's imported before training. It can be
|
lets you pass in a Python file that's imported before training. It can be
|
||||||
used to register custom functions and architectures that can then be
|
used to register custom functions and architectures that can then be
|
||||||
referenced in the config.
|
referenced in the config.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#train
|
||||||
"""
|
"""
|
||||||
util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
|
util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
|
||||||
verify_cli_args(config_path, output_path)
|
verify_cli_args(config_path, output_path)
|
||||||
|
|
|
@ -16,6 +16,8 @@ def validate_cli():
|
||||||
Validate the currently installed pipeline packages and spaCy version. Checks
|
Validate the currently installed pipeline packages and spaCy version. Checks
|
||||||
if the installed packages are compatible and shows upgrade instructions if
|
if the installed packages are compatible and shows upgrade instructions if
|
||||||
available. Should be run after `pip install -U spacy`.
|
available. Should be run after `pip install -U spacy`.
|
||||||
|
|
||||||
|
DOCS: https://nightly.spacy.io/api/cli#validate
|
||||||
"""
|
"""
|
||||||
validate()
|
validate()
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
"""
|
"""
|
||||||
spaCy's built in visualization suite for dependencies and named entities.
|
spaCy's built in visualization suite for dependencies and named entities.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/top-level#displacy
|
DOCS: https://nightly.spacy.io/api/top-level#displacy
|
||||||
USAGE: https://spacy.io/usage/visualizers
|
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||||
"""
|
"""
|
||||||
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -37,8 +37,8 @@ def render(
|
||||||
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
|
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
|
||||||
RETURNS (str): Rendered HTML markup.
|
RETURNS (str): Rendered HTML markup.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/top-level#displacy.render
|
DOCS: https://nightly.spacy.io/api/top-level#displacy.render
|
||||||
USAGE: https://spacy.io/usage/visualizers
|
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||||
"""
|
"""
|
||||||
factories = {
|
factories = {
|
||||||
"dep": (DependencyRenderer, parse_deps),
|
"dep": (DependencyRenderer, parse_deps),
|
||||||
|
@ -88,8 +88,8 @@ def serve(
|
||||||
port (int): Port to serve visualisation.
|
port (int): Port to serve visualisation.
|
||||||
host (str): Host to serve visualisation.
|
host (str): Host to serve visualisation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/top-level#displacy.serve
|
DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
|
||||||
USAGE: https://spacy.io/usage/visualizers
|
USAGE: https://nightly.spacy.io/usage/visualizers
|
||||||
"""
|
"""
|
||||||
from wsgiref import simple_server
|
from wsgiref import simple_server
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ class Warnings:
|
||||||
"generate a dependency visualization for it. Make sure the Doc "
|
"generate a dependency visualization for it. Make sure the Doc "
|
||||||
"was processed with a model that supports dependency parsing, and "
|
"was processed with a model that supports dependency parsing, and "
|
||||||
"not just a language class like `English()`. For more info, see "
|
"not just a language class like `English()`. For more info, see "
|
||||||
"the docs:\nhttps://spacy.io/usage/models")
|
"the docs:\nhttps://nightly.spacy.io/usage/models")
|
||||||
W006 = ("No entities to visualize found in Doc object. If this is "
|
W006 = ("No entities to visualize found in Doc object. If this is "
|
||||||
"surprising to you, make sure the Doc was processed using a model "
|
"surprising to you, make sure the Doc was processed using a model "
|
||||||
"that supports named entity recognition, and check the `doc.ents` "
|
"that supports named entity recognition, and check the `doc.ents` "
|
||||||
|
@ -147,7 +147,7 @@ class Errors:
|
||||||
E010 = ("Word vectors set to length 0. This may be because you don't have "
|
E010 = ("Word vectors set to length 0. This may be because you don't have "
|
||||||
"a model installed or loaded, or because your model doesn't "
|
"a model installed or loaded, or because your model doesn't "
|
||||||
"include word vectors. For more info, see the docs:\n"
|
"include word vectors. For more info, see the docs:\n"
|
||||||
"https://spacy.io/usage/models")
|
"https://nightly.spacy.io/usage/models")
|
||||||
E011 = ("Unknown operator: '{op}'. Options: {opts}")
|
E011 = ("Unknown operator: '{op}'. Options: {opts}")
|
||||||
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
|
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
|
||||||
E014 = ("Unknown tag ID: {tag}")
|
E014 = ("Unknown tag ID: {tag}")
|
||||||
|
@ -181,7 +181,7 @@ class Errors:
|
||||||
"list of (unicode, bool) tuples. Got bytes instance: {value}")
|
"list of (unicode, bool) tuples. Got bytes instance: {value}")
|
||||||
E029 = ("noun_chunks requires the dependency parse, which requires a "
|
E029 = ("noun_chunks requires the dependency parse, which requires a "
|
||||||
"statistical model to be installed and loaded. For more info, see "
|
"statistical model to be installed and loaded. For more info, see "
|
||||||
"the documentation:\nhttps://spacy.io/usage/models")
|
"the documentation:\nhttps://nightly.spacy.io/usage/models")
|
||||||
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
|
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
|
||||||
"component to the pipeline with: "
|
"component to the pipeline with: "
|
||||||
"nlp.add_pipe('sentencizer'). "
|
"nlp.add_pipe('sentencizer'). "
|
||||||
|
@ -294,7 +294,7 @@ class Errors:
|
||||||
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
|
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
|
||||||
"tokens to merge. If you want to find the longest non-overlapping "
|
"tokens to merge. If you want to find the longest non-overlapping "
|
||||||
"spans, you can use the util.filter_spans helper:\n"
|
"spans, you can use the util.filter_spans helper:\n"
|
||||||
"https://spacy.io/api/top-level#util.filter_spans")
|
"https://nightly.spacy.io/api/top-level#util.filter_spans")
|
||||||
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
|
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
|
||||||
"token can only be part of one entity, so make sure the entities "
|
"token can only be part of one entity, so make sure the entities "
|
||||||
"you're setting don't overlap.")
|
"you're setting don't overlap.")
|
||||||
|
@ -364,10 +364,10 @@ class Errors:
|
||||||
E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure "
|
E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure "
|
||||||
"to provide a valid JSON object as input with either the `text` "
|
"to provide a valid JSON object as input with either the `text` "
|
||||||
"or `tokens` key. For more info, see the docs:\n"
|
"or `tokens` key. For more info, see the docs:\n"
|
||||||
"https://spacy.io/api/cli#pretrain-jsonl")
|
"https://nightly.spacy.io/api/cli#pretrain-jsonl")
|
||||||
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
|
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
|
||||||
"includes either the `text` or `tokens` key. For more info, see "
|
"includes either the `text` or `tokens` key. For more info, see "
|
||||||
"the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl")
|
"the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl")
|
||||||
E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
|
E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
|
||||||
"kb.add_entity and kb.add_alias to add entries.")
|
"kb.add_entity and kb.add_alias to add entries.")
|
||||||
E140 = ("The list of entities, prior probabilities and entity vectors "
|
E140 = ("The list of entities, prior probabilities and entity vectors "
|
||||||
|
|
|
@ -106,7 +106,7 @@ def conll_ner2docs(
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The token-per-line NER file is not formatted correctly. "
|
"The token-per-line NER file is not formatted correctly. "
|
||||||
"Try checking whitespace and delimiters. See "
|
"Try checking whitespace and delimiters. See "
|
||||||
"https://spacy.io/api/cli#convert"
|
"https://nightly.spacy.io/api/cli#convert"
|
||||||
)
|
)
|
||||||
length = len(cols[0])
|
length = len(cols[0])
|
||||||
words.extend(cols[0])
|
words.extend(cols[0])
|
||||||
|
|
|
@ -44,7 +44,7 @@ def read_iob(raw_sents, vocab, n_sents):
|
||||||
sent_tags = ["-"] * len(sent_words)
|
sent_tags = ["-"] * len(sent_words)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convert"
|
"The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert"
|
||||||
)
|
)
|
||||||
words.extend(sent_words)
|
words.extend(sent_words)
|
||||||
tags.extend(sent_tags)
|
tags.extend(sent_tags)
|
||||||
|
|
|
@ -38,7 +38,7 @@ class Corpus:
|
||||||
limit (int): Limit corpus to a subset of examples, e.g. for debugging.
|
limit (int): Limit corpus to a subset of examples, e.g. for debugging.
|
||||||
Defaults to 0, which indicates no limit.
|
Defaults to 0, which indicates no limit.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/corpus
|
DOCS: https://nightly.spacy.io/api/corpus
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -83,7 +83,7 @@ class Corpus:
|
||||||
nlp (Language): The current nlp object.
|
nlp (Language): The current nlp object.
|
||||||
YIELDS (Example): The examples.
|
YIELDS (Example): The examples.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/corpus#call
|
DOCS: https://nightly.spacy.io/api/corpus#call
|
||||||
"""
|
"""
|
||||||
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
|
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
|
||||||
if self.gold_preproc:
|
if self.gold_preproc:
|
||||||
|
|
|
@ -21,7 +21,7 @@ cdef class Candidate:
|
||||||
algorithm which will disambiguate the various candidates to the correct one.
|
algorithm which will disambiguate the various candidates to the correct one.
|
||||||
Each candidate (alias, entity) pair is assigned to a certain prior probability.
|
Each candidate (alias, entity) pair is assigned to a certain prior probability.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/kb/#candidate_init
|
DOCS: https://nightly.spacy.io/api/kb/#candidate_init
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
|
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
|
||||||
|
@ -79,7 +79,7 @@ cdef class KnowledgeBase:
|
||||||
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
|
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
|
||||||
to support entity linking of named entities to real-world concepts.
|
to support entity linking of named entities to real-world concepts.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/kb
|
DOCS: https://nightly.spacy.io/api/kb
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, Vocab vocab, entity_vector_length):
|
def __init__(self, Vocab vocab, entity_vector_length):
|
||||||
|
|
|
@ -95,7 +95,7 @@ class Language:
|
||||||
object and processing pipeline.
|
object and processing pipeline.
|
||||||
lang (str): Two-letter language ID, i.e. ISO code.
|
lang (str): Two-letter language ID, i.e. ISO code.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language
|
DOCS: https://nightly.spacy.io/api/language
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Defaults = BaseDefaults
|
Defaults = BaseDefaults
|
||||||
|
@ -130,7 +130,7 @@ class Language:
|
||||||
create_tokenizer (Callable): Function that takes the nlp object and
|
create_tokenizer (Callable): Function that takes the nlp object and
|
||||||
returns a tokenizer.
|
returns a tokenizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#init
|
DOCS: https://nightly.spacy.io/api/language#init
|
||||||
"""
|
"""
|
||||||
# We're only calling this to import all factories provided via entry
|
# We're only calling this to import all factories provided via entry
|
||||||
# points. The factory decorator applied to these functions takes care
|
# points. The factory decorator applied to these functions takes care
|
||||||
|
@ -185,7 +185,7 @@ class Language:
|
||||||
|
|
||||||
RETURNS (Dict[str, Any]): The meta.
|
RETURNS (Dict[str, Any]): The meta.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#meta
|
DOCS: https://nightly.spacy.io/api/language#meta
|
||||||
"""
|
"""
|
||||||
spacy_version = util.get_model_version_range(about.__version__)
|
spacy_version = util.get_model_version_range(about.__version__)
|
||||||
if self.vocab.lang:
|
if self.vocab.lang:
|
||||||
|
@ -225,7 +225,7 @@ class Language:
|
||||||
|
|
||||||
RETURNS (thinc.api.Config): The config.
|
RETURNS (thinc.api.Config): The config.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#config
|
DOCS: https://nightly.spacy.io/api/language#config
|
||||||
"""
|
"""
|
||||||
self._config.setdefault("nlp", {})
|
self._config.setdefault("nlp", {})
|
||||||
self._config.setdefault("training", {})
|
self._config.setdefault("training", {})
|
||||||
|
@ -433,7 +433,7 @@ class Language:
|
||||||
will be combined and normalized for the whole pipeline.
|
will be combined and normalized for the whole pipeline.
|
||||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#factory
|
DOCS: https://nightly.spacy.io/api/language#factory
|
||||||
"""
|
"""
|
||||||
if not isinstance(name, str):
|
if not isinstance(name, str):
|
||||||
raise ValueError(Errors.E963.format(decorator="factory"))
|
raise ValueError(Errors.E963.format(decorator="factory"))
|
||||||
|
@ -513,7 +513,7 @@ class Language:
|
||||||
Used for pipeline analysis.
|
Used for pipeline analysis.
|
||||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#component
|
DOCS: https://nightly.spacy.io/api/language#component
|
||||||
"""
|
"""
|
||||||
if name is not None and not isinstance(name, str):
|
if name is not None and not isinstance(name, str):
|
||||||
raise ValueError(Errors.E963.format(decorator="component"))
|
raise ValueError(Errors.E963.format(decorator="component"))
|
||||||
|
@ -579,7 +579,7 @@ class Language:
|
||||||
name (str): Name of pipeline component to get.
|
name (str): Name of pipeline component to get.
|
||||||
RETURNS (callable): The pipeline component.
|
RETURNS (callable): The pipeline component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#get_pipe
|
DOCS: https://nightly.spacy.io/api/language#get_pipe
|
||||||
"""
|
"""
|
||||||
for pipe_name, component in self._components:
|
for pipe_name, component in self._components:
|
||||||
if pipe_name == name:
|
if pipe_name == name:
|
||||||
|
@ -608,7 +608,7 @@ class Language:
|
||||||
arguments and types expected by the factory.
|
arguments and types expected by the factory.
|
||||||
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#create_pipe
|
DOCS: https://nightly.spacy.io/api/language#create_pipe
|
||||||
"""
|
"""
|
||||||
name = name if name is not None else factory_name
|
name = name if name is not None else factory_name
|
||||||
if not isinstance(config, dict):
|
if not isinstance(config, dict):
|
||||||
|
@ -722,7 +722,7 @@ class Language:
|
||||||
arguments and types expected by the factory.
|
arguments and types expected by the factory.
|
||||||
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
RETURNS (Callable[[Doc], Doc]): The pipeline component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#add_pipe
|
DOCS: https://nightly.spacy.io/api/language#add_pipe
|
||||||
"""
|
"""
|
||||||
if not isinstance(factory_name, str):
|
if not isinstance(factory_name, str):
|
||||||
bad_val = repr(factory_name)
|
bad_val = repr(factory_name)
|
||||||
|
@ -820,7 +820,7 @@ class Language:
|
||||||
name (str): Name of the component.
|
name (str): Name of the component.
|
||||||
RETURNS (bool): Whether a component of the name exists in the pipeline.
|
RETURNS (bool): Whether a component of the name exists in the pipeline.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#has_pipe
|
DOCS: https://nightly.spacy.io/api/language#has_pipe
|
||||||
"""
|
"""
|
||||||
return name in self.pipe_names
|
return name in self.pipe_names
|
||||||
|
|
||||||
|
@ -841,7 +841,7 @@ class Language:
|
||||||
validate (bool): Whether to validate the component config against the
|
validate (bool): Whether to validate the component config against the
|
||||||
arguments and types expected by the factory.
|
arguments and types expected by the factory.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#replace_pipe
|
DOCS: https://nightly.spacy.io/api/language#replace_pipe
|
||||||
"""
|
"""
|
||||||
if name not in self.pipe_names:
|
if name not in self.pipe_names:
|
||||||
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
|
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
|
||||||
|
@ -870,7 +870,7 @@ class Language:
|
||||||
old_name (str): Name of the component to rename.
|
old_name (str): Name of the component to rename.
|
||||||
new_name (str): New name of the component.
|
new_name (str): New name of the component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#rename_pipe
|
DOCS: https://nightly.spacy.io/api/language#rename_pipe
|
||||||
"""
|
"""
|
||||||
if old_name not in self.component_names:
|
if old_name not in self.component_names:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
@ -891,7 +891,7 @@ class Language:
|
||||||
name (str): Name of the component to remove.
|
name (str): Name of the component to remove.
|
||||||
RETURNS (tuple): A `(name, component)` tuple of the removed component.
|
RETURNS (tuple): A `(name, component)` tuple of the removed component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#remove_pipe
|
DOCS: https://nightly.spacy.io/api/language#remove_pipe
|
||||||
"""
|
"""
|
||||||
if name not in self.component_names:
|
if name not in self.component_names:
|
||||||
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
|
raise ValueError(Errors.E001.format(name=name, opts=self.component_names))
|
||||||
|
@ -944,7 +944,7 @@ class Language:
|
||||||
keyword arguments for specific components.
|
keyword arguments for specific components.
|
||||||
RETURNS (Doc): A container for accessing the annotations.
|
RETURNS (Doc): A container for accessing the annotations.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#call
|
DOCS: https://nightly.spacy.io/api/language#call
|
||||||
"""
|
"""
|
||||||
if len(text) > self.max_length:
|
if len(text) > self.max_length:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
@ -993,7 +993,7 @@ class Language:
|
||||||
disable (str or iterable): The name(s) of the pipes to disable
|
disable (str or iterable): The name(s) of the pipes to disable
|
||||||
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
|
enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#select_pipes
|
DOCS: https://nightly.spacy.io/api/language#select_pipes
|
||||||
"""
|
"""
|
||||||
if enable is None and disable is None:
|
if enable is None and disable is None:
|
||||||
raise ValueError(Errors.E991)
|
raise ValueError(Errors.E991)
|
||||||
|
@ -1044,7 +1044,7 @@ class Language:
|
||||||
exclude (Iterable[str]): Names of components that shouldn't be updated.
|
exclude (Iterable[str]): Names of components that shouldn't be updated.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary
|
RETURNS (Dict[str, float]): The updated losses dictionary
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#update
|
DOCS: https://nightly.spacy.io/api/language#update
|
||||||
"""
|
"""
|
||||||
if _ is not None:
|
if _ is not None:
|
||||||
raise ValueError(Errors.E989)
|
raise ValueError(Errors.E989)
|
||||||
|
@ -1106,7 +1106,7 @@ class Language:
|
||||||
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
|
>>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)]
|
||||||
>>> nlp.rehearse(raw_batch)
|
>>> nlp.rehearse(raw_batch)
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#rehearse
|
DOCS: https://nightly.spacy.io/api/language#rehearse
|
||||||
"""
|
"""
|
||||||
if len(examples) == 0:
|
if len(examples) == 0:
|
||||||
return
|
return
|
||||||
|
@ -1153,7 +1153,7 @@ class Language:
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#begin_training
|
DOCS: https://nightly.spacy.io/api/language#begin_training
|
||||||
"""
|
"""
|
||||||
# TODO: throw warning when get_gold_tuples is provided instead of get_examples
|
# TODO: throw warning when get_gold_tuples is provided instead of get_examples
|
||||||
if get_examples is None:
|
if get_examples is None:
|
||||||
|
@ -1200,7 +1200,7 @@ class Language:
|
||||||
sgd (Optional[Optimizer]): An optimizer.
|
sgd (Optional[Optimizer]): An optimizer.
|
||||||
RETURNS (Optimizer): The optimizer.
|
RETURNS (Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#resume_training
|
DOCS: https://nightly.spacy.io/api/language#resume_training
|
||||||
"""
|
"""
|
||||||
if device >= 0: # TODO: do we need this here?
|
if device >= 0: # TODO: do we need this here?
|
||||||
require_gpu(device)
|
require_gpu(device)
|
||||||
|
@ -1236,7 +1236,7 @@ class Language:
|
||||||
for the scorer.
|
for the scorer.
|
||||||
RETURNS (Scorer): The scorer containing the evaluation results.
|
RETURNS (Scorer): The scorer containing the evaluation results.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#evaluate
|
DOCS: https://nightly.spacy.io/api/language#evaluate
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Language.evaluate")
|
validate_examples(examples, "Language.evaluate")
|
||||||
if component_cfg is None:
|
if component_cfg is None:
|
||||||
|
@ -1286,7 +1286,7 @@ class Language:
|
||||||
>>> with nlp.use_params(optimizer.averages):
|
>>> with nlp.use_params(optimizer.averages):
|
||||||
>>> nlp.to_disk("/tmp/checkpoint")
|
>>> nlp.to_disk("/tmp/checkpoint")
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#use_params
|
DOCS: https://nightly.spacy.io/api/language#use_params
|
||||||
"""
|
"""
|
||||||
if not params:
|
if not params:
|
||||||
yield
|
yield
|
||||||
|
@ -1333,7 +1333,7 @@ class Language:
|
||||||
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
|
n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`.
|
||||||
YIELDS (Doc): Documents in the order of the original text.
|
YIELDS (Doc): Documents in the order of the original text.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#pipe
|
DOCS: https://nightly.spacy.io/api/language#pipe
|
||||||
"""
|
"""
|
||||||
if n_process == -1:
|
if n_process == -1:
|
||||||
n_process = mp.cpu_count()
|
n_process = mp.cpu_count()
|
||||||
|
@ -1469,7 +1469,7 @@ class Language:
|
||||||
the types expected by the factory.
|
the types expected by the factory.
|
||||||
RETURNS (Language): The initialized Language class.
|
RETURNS (Language): The initialized Language class.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#from_config
|
DOCS: https://nightly.spacy.io/api/language#from_config
|
||||||
"""
|
"""
|
||||||
if auto_fill:
|
if auto_fill:
|
||||||
config = Config(
|
config = Config(
|
||||||
|
@ -1582,7 +1582,7 @@ class Language:
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#to_disk
|
DOCS: https://nightly.spacy.io/api/language#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
serializers = {}
|
serializers = {}
|
||||||
|
@ -1611,7 +1611,7 @@ class Language:
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
RETURNS (Language): The modified `Language` object.
|
RETURNS (Language): The modified `Language` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#from_disk
|
DOCS: https://nightly.spacy.io/api/language#from_disk
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def deserialize_meta(path: Path) -> None:
|
def deserialize_meta(path: Path) -> None:
|
||||||
|
@ -1659,7 +1659,7 @@ class Language:
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized form of the `Language` object.
|
RETURNS (bytes): The serialized form of the `Language` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#to_bytes
|
DOCS: https://nightly.spacy.io/api/language#to_bytes
|
||||||
"""
|
"""
|
||||||
serializers = {}
|
serializers = {}
|
||||||
serializers["vocab"] = lambda: self.vocab.to_bytes()
|
serializers["vocab"] = lambda: self.vocab.to_bytes()
|
||||||
|
@ -1683,7 +1683,7 @@ class Language:
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
RETURNS (Language): The `Language` object.
|
RETURNS (Language): The `Language` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/language#from_bytes
|
DOCS: https://nightly.spacy.io/api/language#from_bytes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def deserialize_meta(b):
|
def deserialize_meta(b):
|
||||||
|
|
|
@ -30,7 +30,7 @@ cdef class Lexeme:
|
||||||
tag, dependency parse, or lemma (lemmatization depends on the
|
tag, dependency parse, or lemma (lemmatization depends on the
|
||||||
part-of-speech tag).
|
part-of-speech tag).
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lexeme
|
DOCS: https://nightly.spacy.io/api/lexeme
|
||||||
"""
|
"""
|
||||||
def __init__(self, Vocab vocab, attr_t orth):
|
def __init__(self, Vocab vocab, attr_t orth):
|
||||||
"""Create a Lexeme object.
|
"""Create a Lexeme object.
|
||||||
|
|
|
@ -57,7 +57,7 @@ class Table(OrderedDict):
|
||||||
data (dict): The dictionary.
|
data (dict): The dictionary.
|
||||||
name (str): Optional table name for reference.
|
name (str): Optional table name for reference.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#table.from_dict
|
DOCS: https://nightly.spacy.io/api/lookups#table.from_dict
|
||||||
"""
|
"""
|
||||||
self = cls(name=name)
|
self = cls(name=name)
|
||||||
self.update(data)
|
self.update(data)
|
||||||
|
@ -69,7 +69,7 @@ class Table(OrderedDict):
|
||||||
name (str): Optional table name for reference.
|
name (str): Optional table name for reference.
|
||||||
data (dict): Initial data, used to hint Bloom Filter.
|
data (dict): Initial data, used to hint Bloom Filter.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#table.init
|
DOCS: https://nightly.spacy.io/api/lookups#table.init
|
||||||
"""
|
"""
|
||||||
OrderedDict.__init__(self)
|
OrderedDict.__init__(self)
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -135,7 +135,7 @@ class Table(OrderedDict):
|
||||||
|
|
||||||
RETURNS (bytes): The serialized table.
|
RETURNS (bytes): The serialized table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#table.to_bytes
|
DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"name": self.name,
|
"name": self.name,
|
||||||
|
@ -150,7 +150,7 @@ class Table(OrderedDict):
|
||||||
bytes_data (bytes): The data to load.
|
bytes_data (bytes): The data to load.
|
||||||
RETURNS (Table): The loaded table.
|
RETURNS (Table): The loaded table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#table.from_bytes
|
DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes
|
||||||
"""
|
"""
|
||||||
loaded = srsly.msgpack_loads(bytes_data)
|
loaded = srsly.msgpack_loads(bytes_data)
|
||||||
data = loaded.get("dict", {})
|
data = loaded.get("dict", {})
|
||||||
|
@ -172,7 +172,7 @@ class Lookups:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
"""Initialize the Lookups object.
|
"""Initialize the Lookups object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#init
|
DOCS: https://nightly.spacy.io/api/lookups#init
|
||||||
"""
|
"""
|
||||||
self._tables = {}
|
self._tables = {}
|
||||||
|
|
||||||
|
@ -201,7 +201,7 @@ class Lookups:
|
||||||
data (dict): Optional data to add to the table.
|
data (dict): Optional data to add to the table.
|
||||||
RETURNS (Table): The newly added table.
|
RETURNS (Table): The newly added table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#add_table
|
DOCS: https://nightly.spacy.io/api/lookups#add_table
|
||||||
"""
|
"""
|
||||||
if name in self.tables:
|
if name in self.tables:
|
||||||
raise ValueError(Errors.E158.format(name=name))
|
raise ValueError(Errors.E158.format(name=name))
|
||||||
|
@ -215,7 +215,7 @@ class Lookups:
|
||||||
name (str): Name of the table to set.
|
name (str): Name of the table to set.
|
||||||
table (Table): The Table to set.
|
table (Table): The Table to set.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#set_table
|
DOCS: https://nightly.spacy.io/api/lookups#set_table
|
||||||
"""
|
"""
|
||||||
self._tables[name] = table
|
self._tables[name] = table
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ class Lookups:
|
||||||
default (Any): Optional default value to return if table doesn't exist.
|
default (Any): Optional default value to return if table doesn't exist.
|
||||||
RETURNS (Table): The table.
|
RETURNS (Table): The table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#get_table
|
DOCS: https://nightly.spacy.io/api/lookups#get_table
|
||||||
"""
|
"""
|
||||||
if name not in self._tables:
|
if name not in self._tables:
|
||||||
if default == UNSET:
|
if default == UNSET:
|
||||||
|
@ -241,7 +241,7 @@ class Lookups:
|
||||||
name (str): Name of the table to remove.
|
name (str): Name of the table to remove.
|
||||||
RETURNS (Table): The removed table.
|
RETURNS (Table): The removed table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#remove_table
|
DOCS: https://nightly.spacy.io/api/lookups#remove_table
|
||||||
"""
|
"""
|
||||||
if name not in self._tables:
|
if name not in self._tables:
|
||||||
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
raise KeyError(Errors.E159.format(name=name, tables=self.tables))
|
||||||
|
@ -253,7 +253,7 @@ class Lookups:
|
||||||
name (str): Name of the table.
|
name (str): Name of the table.
|
||||||
RETURNS (bool): Whether a table of that name exists.
|
RETURNS (bool): Whether a table of that name exists.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#has_table
|
DOCS: https://nightly.spacy.io/api/lookups#has_table
|
||||||
"""
|
"""
|
||||||
return name in self._tables
|
return name in self._tables
|
||||||
|
|
||||||
|
@ -262,7 +262,7 @@ class Lookups:
|
||||||
|
|
||||||
RETURNS (bytes): The serialized Lookups.
|
RETURNS (bytes): The serialized Lookups.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#to_bytes
|
DOCS: https://nightly.spacy.io/api/lookups#to_bytes
|
||||||
"""
|
"""
|
||||||
return srsly.msgpack_dumps(self._tables)
|
return srsly.msgpack_dumps(self._tables)
|
||||||
|
|
||||||
|
@ -272,7 +272,7 @@ class Lookups:
|
||||||
bytes_data (bytes): The data to load.
|
bytes_data (bytes): The data to load.
|
||||||
RETURNS (Lookups): The loaded Lookups.
|
RETURNS (Lookups): The loaded Lookups.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#from_bytes
|
DOCS: https://nightly.spacy.io/api/lookups#from_bytes
|
||||||
"""
|
"""
|
||||||
self._tables = {}
|
self._tables = {}
|
||||||
for key, value in srsly.msgpack_loads(bytes_data).items():
|
for key, value in srsly.msgpack_loads(bytes_data).items():
|
||||||
|
@ -287,7 +287,7 @@ class Lookups:
|
||||||
|
|
||||||
path (str / Path): The file path.
|
path (str / Path): The file path.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#to_disk
|
DOCS: https://nightly.spacy.io/api/lookups#to_disk
|
||||||
"""
|
"""
|
||||||
if len(self._tables):
|
if len(self._tables):
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
|
@ -306,7 +306,7 @@ class Lookups:
|
||||||
path (str / Path): The directory path.
|
path (str / Path): The directory path.
|
||||||
RETURNS (Lookups): The loaded lookups.
|
RETURNS (Lookups): The loaded lookups.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lookups#from_disk
|
DOCS: https://nightly.spacy.io/api/lookups#from_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
filepath = path / filename
|
filepath = path / filename
|
||||||
|
|
|
@ -31,8 +31,8 @@ DEF PADDING = 5
|
||||||
cdef class Matcher:
|
cdef class Matcher:
|
||||||
"""Match sequences of tokens, based on pattern rules.
|
"""Match sequences of tokens, based on pattern rules.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/matcher
|
DOCS: https://nightly.spacy.io/api/matcher
|
||||||
USAGE: https://spacy.io/usage/rule-based-matching
|
USAGE: https://nightly.spacy.io/usage/rule-based-matching
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, vocab, validate=True):
|
def __init__(self, vocab, validate=True):
|
||||||
|
|
|
@ -19,8 +19,8 @@ cdef class PhraseMatcher:
|
||||||
sequences based on lists of token descriptions, the `PhraseMatcher` accepts
|
sequences based on lists of token descriptions, the `PhraseMatcher` accepts
|
||||||
match patterns in the form of `Doc` objects.
|
match patterns in the form of `Doc` objects.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher
|
DOCS: https://nightly.spacy.io/api/phrasematcher
|
||||||
USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher
|
USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher
|
||||||
|
|
||||||
Adapted from FlashText: https://github.com/vi3k6i5/flashtext
|
Adapted from FlashText: https://github.com/vi3k6i5/flashtext
|
||||||
MIT License (see `LICENSE`)
|
MIT License (see `LICENSE`)
|
||||||
|
@ -34,7 +34,7 @@ cdef class PhraseMatcher:
|
||||||
attr (int / str): Token attribute to match on.
|
attr (int / str): Token attribute to match on.
|
||||||
validate (bool): Perform additional validation when patterns are added.
|
validate (bool): Perform additional validation when patterns are added.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#init
|
DOCS: https://nightly.spacy.io/api/phrasematcher#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self._callbacks = {}
|
self._callbacks = {}
|
||||||
|
@ -61,7 +61,7 @@ cdef class PhraseMatcher:
|
||||||
|
|
||||||
RETURNS (int): The number of rules.
|
RETURNS (int): The number of rules.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#len
|
DOCS: https://nightly.spacy.io/api/phrasematcher#len
|
||||||
"""
|
"""
|
||||||
return len(self._callbacks)
|
return len(self._callbacks)
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ cdef class PhraseMatcher:
|
||||||
key (str): The match ID.
|
key (str): The match ID.
|
||||||
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#contains
|
DOCS: https://nightly.spacy.io/api/phrasematcher#contains
|
||||||
"""
|
"""
|
||||||
return key in self._callbacks
|
return key in self._callbacks
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ cdef class PhraseMatcher:
|
||||||
|
|
||||||
key (str): The match ID.
|
key (str): The match ID.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#remove
|
DOCS: https://nightly.spacy.io/api/phrasematcher#remove
|
||||||
"""
|
"""
|
||||||
if key not in self._docs:
|
if key not in self._docs:
|
||||||
raise KeyError(key)
|
raise KeyError(key)
|
||||||
|
@ -164,7 +164,7 @@ cdef class PhraseMatcher:
|
||||||
as variable arguments. Will be ignored if a list of patterns is
|
as variable arguments. Will be ignored if a list of patterns is
|
||||||
provided as the second argument.
|
provided as the second argument.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#add
|
DOCS: https://nightly.spacy.io/api/phrasematcher#add
|
||||||
"""
|
"""
|
||||||
if docs is None or hasattr(docs, "__call__"): # old API
|
if docs is None or hasattr(docs, "__call__"): # old API
|
||||||
on_match = docs
|
on_match = docs
|
||||||
|
@ -228,7 +228,7 @@ cdef class PhraseMatcher:
|
||||||
`doc[start:end]`. The `match_id` is an integer. If as_spans is set
|
`doc[start:end]`. The `match_id` is an integer. If as_spans is set
|
||||||
to True, a list of Span objects is returned.
|
to True, a list of Span objects is returned.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/phrasematcher#call
|
DOCS: https://nightly.spacy.io/api/phrasematcher#call
|
||||||
"""
|
"""
|
||||||
matches = []
|
matches = []
|
||||||
if doc is None or len(doc) == 0:
|
if doc is None or len(doc) == 0:
|
||||||
|
|
|
@ -38,7 +38,7 @@ class AttributeRuler(Pipe):
|
||||||
"""Set token-level attributes for tokens matched by Matcher patterns.
|
"""Set token-level attributes for tokens matched by Matcher patterns.
|
||||||
Additionally supports importing patterns from tag maps and morph rules.
|
Additionally supports importing patterns from tag maps and morph rules.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler
|
DOCS: https://nightly.spacy.io/api/attributeruler
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -59,7 +59,7 @@ class AttributeRuler(Pipe):
|
||||||
|
|
||||||
RETURNS (AttributeRuler): The AttributeRuler component.
|
RETURNS (AttributeRuler): The AttributeRuler component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#init
|
DOCS: https://nightly.spacy.io/api/attributeruler#init
|
||||||
"""
|
"""
|
||||||
self.name = name
|
self.name = name
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -77,7 +77,7 @@ class AttributeRuler(Pipe):
|
||||||
doc (Doc): The document to process.
|
doc (Doc): The document to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#call
|
DOCS: https://nightly.spacy.io/api/attributeruler#call
|
||||||
"""
|
"""
|
||||||
matches = sorted(self.matcher(doc))
|
matches = sorted(self.matcher(doc))
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ class AttributeRuler(Pipe):
|
||||||
tag_map (dict): The tag map that maps fine-grained tags to
|
tag_map (dict): The tag map that maps fine-grained tags to
|
||||||
coarse-grained tags and morphological features.
|
coarse-grained tags and morphological features.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
|
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
|
||||||
"""
|
"""
|
||||||
for tag, attrs in tag_map.items():
|
for tag, attrs in tag_map.items():
|
||||||
pattern = [{"TAG": tag}]
|
pattern = [{"TAG": tag}]
|
||||||
|
@ -139,7 +139,7 @@ class AttributeRuler(Pipe):
|
||||||
fine-grained tags to coarse-grained tags, lemmas and morphological
|
fine-grained tags to coarse-grained tags, lemmas and morphological
|
||||||
features.
|
features.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
|
DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules
|
||||||
"""
|
"""
|
||||||
for tag in morph_rules:
|
for tag in morph_rules:
|
||||||
for word in morph_rules[tag]:
|
for word in morph_rules[tag]:
|
||||||
|
@ -163,7 +163,7 @@ class AttributeRuler(Pipe):
|
||||||
index (int): The index of the token in the matched span to modify. May
|
index (int): The index of the token in the matched span to modify. May
|
||||||
be negative to index from the end of the span. Defaults to 0.
|
be negative to index from the end of the span. Defaults to 0.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#add
|
DOCS: https://nightly.spacy.io/api/attributeruler#add
|
||||||
"""
|
"""
|
||||||
self.matcher.add(len(self.attrs), patterns)
|
self.matcher.add(len(self.attrs), patterns)
|
||||||
self._attrs_unnormed.append(attrs)
|
self._attrs_unnormed.append(attrs)
|
||||||
|
@ -178,7 +178,7 @@ class AttributeRuler(Pipe):
|
||||||
as the arguments to AttributeRuler.add (patterns/attrs/index) to
|
as the arguments to AttributeRuler.add (patterns/attrs/index) to
|
||||||
add as patterns.
|
add as patterns.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#add_patterns
|
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
|
||||||
"""
|
"""
|
||||||
for p in pattern_dicts:
|
for p in pattern_dicts:
|
||||||
self.add(**p)
|
self.add(**p)
|
||||||
|
@ -203,7 +203,7 @@ class AttributeRuler(Pipe):
|
||||||
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
|
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
|
||||||
and "lemma" for the target token attributes.
|
and "lemma" for the target token attributes.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#score
|
DOCS: https://nightly.spacy.io/api/tagger#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "AttributeRuler.score")
|
validate_examples(examples, "AttributeRuler.score")
|
||||||
results = {}
|
results = {}
|
||||||
|
@ -227,7 +227,7 @@ class AttributeRuler(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#to_bytes
|
DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["vocab"] = self.vocab.to_bytes
|
serialize["vocab"] = self.vocab.to_bytes
|
||||||
|
@ -243,7 +243,7 @@ class AttributeRuler(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
returns (AttributeRuler): The loaded object.
|
returns (AttributeRuler): The loaded object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/attributeruler#from_bytes
|
DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_patterns(b):
|
def load_patterns(b):
|
||||||
|
@ -264,7 +264,7 @@ class AttributeRuler(Pipe):
|
||||||
|
|
||||||
path (Union[Path, str]): A path to a directory.
|
path (Union[Path, str]): A path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
DOCS: https://spacy.io/api/attributeruler#to_disk
|
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {
|
serialize = {
|
||||||
"vocab": lambda p: self.vocab.to_disk(p),
|
"vocab": lambda p: self.vocab.to_disk(p),
|
||||||
|
@ -279,7 +279,7 @@ class AttributeRuler(Pipe):
|
||||||
|
|
||||||
path (Union[Path, str]): A path to a directory.
|
path (Union[Path, str]): A path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
DOCS: https://spacy.io/api/attributeruler#from_disk
|
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_patterns(p):
|
def load_patterns(p):
|
||||||
|
|
|
@ -105,7 +105,7 @@ def make_parser(
|
||||||
cdef class DependencyParser(Parser):
|
cdef class DependencyParser(Parser):
|
||||||
"""Pipeline component for dependency parsing.
|
"""Pipeline component for dependency parsing.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/dependencyparser
|
DOCS: https://nightly.spacy.io/api/dependencyparser
|
||||||
"""
|
"""
|
||||||
TransitionSystem = ArcEager
|
TransitionSystem = ArcEager
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ cdef class DependencyParser(Parser):
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans
|
||||||
and Scorer.score_deps.
|
and Scorer.score_deps.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/dependencyparser#score
|
DOCS: https://nightly.spacy.io/api/dependencyparser#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "DependencyParser.score")
|
validate_examples(examples, "DependencyParser.score")
|
||||||
def dep_getter(token, attr):
|
def dep_getter(token, attr):
|
||||||
|
|
|
@ -83,7 +83,7 @@ def make_entity_linker(
|
||||||
class EntityLinker(Pipe):
|
class EntityLinker(Pipe):
|
||||||
"""Pipeline component for named entity linking.
|
"""Pipeline component for named entity linking.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker
|
DOCS: https://nightly.spacy.io/api/entitylinker
|
||||||
"""
|
"""
|
||||||
|
|
||||||
NIL = "NIL" # string used to refer to a non-existing link
|
NIL = "NIL" # string used to refer to a non-existing link
|
||||||
|
@ -111,7 +111,7 @@ class EntityLinker(Pipe):
|
||||||
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
|
incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
|
||||||
incl_context (bool): Whether or not to include the local context in the model.
|
incl_context (bool): Whether or not to include the local context in the model.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#init
|
DOCS: https://nightly.spacy.io/api/entitylinker#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -151,7 +151,7 @@ class EntityLinker(Pipe):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#begin_training
|
DOCS: https://nightly.spacy.io/api/entitylinker#begin_training
|
||||||
"""
|
"""
|
||||||
self.require_kb()
|
self.require_kb()
|
||||||
nO = self.kb.entity_vector_length
|
nO = self.kb.entity_vector_length
|
||||||
|
@ -182,7 +182,7 @@ class EntityLinker(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#update
|
DOCS: https://nightly.spacy.io/api/entitylinker#update
|
||||||
"""
|
"""
|
||||||
self.require_kb()
|
self.require_kb()
|
||||||
if losses is None:
|
if losses is None:
|
||||||
|
@ -264,7 +264,7 @@ class EntityLinker(Pipe):
|
||||||
doc (Doc): The document to process.
|
doc (Doc): The document to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#call
|
DOCS: https://nightly.spacy.io/api/entitylinker#call
|
||||||
"""
|
"""
|
||||||
kb_ids = self.predict([doc])
|
kb_ids = self.predict([doc])
|
||||||
self.set_annotations([doc], kb_ids)
|
self.set_annotations([doc], kb_ids)
|
||||||
|
@ -279,7 +279,7 @@ class EntityLinker(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#pipe
|
DOCS: https://nightly.spacy.io/api/entitylinker#pipe
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
kb_ids = self.predict(docs)
|
kb_ids = self.predict(docs)
|
||||||
|
@ -294,7 +294,7 @@ class EntityLinker(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS (List[int]): The models prediction for each document.
|
RETURNS (List[int]): The models prediction for each document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#predict
|
DOCS: https://nightly.spacy.io/api/entitylinker#predict
|
||||||
"""
|
"""
|
||||||
self.require_kb()
|
self.require_kb()
|
||||||
entity_count = 0
|
entity_count = 0
|
||||||
|
@ -391,7 +391,7 @@ class EntityLinker(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
|
kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#set_annotations
|
DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations
|
||||||
"""
|
"""
|
||||||
count_ents = len([ent for doc in docs for ent in doc.ents])
|
count_ents = len([ent for doc in docs for ent in doc.ents])
|
||||||
if count_ents != len(kb_ids):
|
if count_ents != len(kb_ids):
|
||||||
|
@ -412,7 +412,7 @@ class EntityLinker(Pipe):
|
||||||
path (str / Path): Path to a directory.
|
path (str / Path): Path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#to_disk
|
DOCS: https://nightly.spacy.io/api/entitylinker#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
|
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
|
||||||
|
@ -430,7 +430,7 @@ class EntityLinker(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (EntityLinker): The modified EntityLinker object.
|
RETURNS (EntityLinker): The modified EntityLinker object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entitylinker#from_disk
|
DOCS: https://nightly.spacy.io/api/entitylinker#from_disk
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
|
|
|
@ -53,8 +53,8 @@ class EntityRuler:
|
||||||
purely rule-based entity recognition system. After initialization, the
|
purely rule-based entity recognition system. After initialization, the
|
||||||
component is typically added to the pipeline using `nlp.add_pipe`.
|
component is typically added to the pipeline using `nlp.add_pipe`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler
|
DOCS: https://nightly.spacy.io/api/entityruler
|
||||||
USAGE: https://spacy.io/usage/rule-based-matching#entityruler
|
USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -88,7 +88,7 @@ class EntityRuler:
|
||||||
added by the model, overwrite them by matches if necessary.
|
added by the model, overwrite them by matches if necessary.
|
||||||
ent_id_sep (str): Separator used internally for entity IDs.
|
ent_id_sep (str): Separator used internally for entity IDs.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#init
|
DOCS: https://nightly.spacy.io/api/entityruler#init
|
||||||
"""
|
"""
|
||||||
self.nlp = nlp
|
self.nlp = nlp
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -127,7 +127,7 @@ class EntityRuler:
|
||||||
doc (Doc): The Doc object in the pipeline.
|
doc (Doc): The Doc object in the pipeline.
|
||||||
RETURNS (Doc): The Doc with added entities, if available.
|
RETURNS (Doc): The Doc with added entities, if available.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#call
|
DOCS: https://nightly.spacy.io/api/entityruler#call
|
||||||
"""
|
"""
|
||||||
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
||||||
matches = set(
|
matches = set(
|
||||||
|
@ -165,7 +165,7 @@ class EntityRuler:
|
||||||
|
|
||||||
RETURNS (set): The string labels.
|
RETURNS (set): The string labels.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#labels
|
DOCS: https://nightly.spacy.io/api/entityruler#labels
|
||||||
"""
|
"""
|
||||||
keys = set(self.token_patterns.keys())
|
keys = set(self.token_patterns.keys())
|
||||||
keys.update(self.phrase_patterns.keys())
|
keys.update(self.phrase_patterns.keys())
|
||||||
|
@ -185,7 +185,7 @@ class EntityRuler:
|
||||||
|
|
||||||
RETURNS (set): The string entity ids.
|
RETURNS (set): The string entity ids.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#ent_ids
|
DOCS: https://nightly.spacy.io/api/entityruler#ent_ids
|
||||||
"""
|
"""
|
||||||
keys = set(self.token_patterns.keys())
|
keys = set(self.token_patterns.keys())
|
||||||
keys.update(self.phrase_patterns.keys())
|
keys.update(self.phrase_patterns.keys())
|
||||||
|
@ -203,7 +203,7 @@ class EntityRuler:
|
||||||
|
|
||||||
RETURNS (list): The original patterns, one dictionary per pattern.
|
RETURNS (list): The original patterns, one dictionary per pattern.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#patterns
|
DOCS: https://nightly.spacy.io/api/entityruler#patterns
|
||||||
"""
|
"""
|
||||||
all_patterns = []
|
all_patterns = []
|
||||||
for label, patterns in self.token_patterns.items():
|
for label, patterns in self.token_patterns.items():
|
||||||
|
@ -230,7 +230,7 @@ class EntityRuler:
|
||||||
|
|
||||||
patterns (list): The patterns to add.
|
patterns (list): The patterns to add.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#add_patterns
|
DOCS: https://nightly.spacy.io/api/entityruler#add_patterns
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet
|
# disable the nlp components after this one in case they hadn't been initialized / deserialised yet
|
||||||
|
@ -324,7 +324,7 @@ class EntityRuler:
|
||||||
patterns_bytes (bytes): The bytestring to load.
|
patterns_bytes (bytes): The bytestring to load.
|
||||||
RETURNS (EntityRuler): The loaded entity ruler.
|
RETURNS (EntityRuler): The loaded entity ruler.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#from_bytes
|
DOCS: https://nightly.spacy.io/api/entityruler#from_bytes
|
||||||
"""
|
"""
|
||||||
cfg = srsly.msgpack_loads(patterns_bytes)
|
cfg = srsly.msgpack_loads(patterns_bytes)
|
||||||
self.clear()
|
self.clear()
|
||||||
|
@ -346,7 +346,7 @@ class EntityRuler:
|
||||||
|
|
||||||
RETURNS (bytes): The serialized patterns.
|
RETURNS (bytes): The serialized patterns.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#to_bytes
|
DOCS: https://nightly.spacy.io/api/entityruler#to_bytes
|
||||||
"""
|
"""
|
||||||
serial = {
|
serial = {
|
||||||
"overwrite": self.overwrite,
|
"overwrite": self.overwrite,
|
||||||
|
@ -365,7 +365,7 @@ class EntityRuler:
|
||||||
path (str / Path): The JSONL file to load.
|
path (str / Path): The JSONL file to load.
|
||||||
RETURNS (EntityRuler): The loaded entity ruler.
|
RETURNS (EntityRuler): The loaded entity ruler.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#from_disk
|
DOCS: https://nightly.spacy.io/api/entityruler#from_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
self.clear()
|
self.clear()
|
||||||
|
@ -401,7 +401,7 @@ class EntityRuler:
|
||||||
|
|
||||||
path (str / Path): The JSONL file to save.
|
path (str / Path): The JSONL file to save.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#to_disk
|
DOCS: https://nightly.spacy.io/api/entityruler#to_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
cfg = {
|
cfg = {
|
||||||
|
|
|
@ -15,7 +15,7 @@ def merge_noun_chunks(doc: Doc) -> Doc:
|
||||||
doc (Doc): The Doc object.
|
doc (Doc): The Doc object.
|
||||||
RETURNS (Doc): The Doc object with merged noun chunks.
|
RETURNS (Doc): The Doc object with merged noun chunks.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
|
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks
|
||||||
"""
|
"""
|
||||||
if not doc.is_parsed:
|
if not doc.is_parsed:
|
||||||
return doc
|
return doc
|
||||||
|
@ -37,7 +37,7 @@ def merge_entities(doc: Doc):
|
||||||
doc (Doc): The Doc object.
|
doc (Doc): The Doc object.
|
||||||
RETURNS (Doc): The Doc object with merged entities.
|
RETURNS (Doc): The Doc object with merged entities.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipeline-functions#merge_entities
|
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities
|
||||||
"""
|
"""
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
for ent in doc.ents:
|
for ent in doc.ents:
|
||||||
|
@ -54,7 +54,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc:
|
||||||
label (str): The subtoken dependency label.
|
label (str): The subtoken dependency label.
|
||||||
RETURNS (Doc): The Doc object with merged subtokens.
|
RETURNS (Doc): The Doc object with merged subtokens.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
|
DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens
|
||||||
"""
|
"""
|
||||||
# TODO: make stateful component with "label" config
|
# TODO: make stateful component with "label" config
|
||||||
merger = Matcher(doc.vocab)
|
merger = Matcher(doc.vocab)
|
||||||
|
|
|
@ -43,7 +43,7 @@ class Lemmatizer(Pipe):
|
||||||
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
||||||
lookup tables.
|
lookup tables.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer
|
DOCS: https://nightly.spacy.io/api/lemmatizer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -54,7 +54,7 @@ class Lemmatizer(Pipe):
|
||||||
mode (str): The lemmatizer mode.
|
mode (str): The lemmatizer mode.
|
||||||
RETURNS (dict): The lookups configuration settings for this mode.
|
RETURNS (dict): The lookups configuration settings for this mode.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
|
DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
|
||||||
"""
|
"""
|
||||||
if mode == "lookup":
|
if mode == "lookup":
|
||||||
return {
|
return {
|
||||||
|
@ -80,7 +80,7 @@ class Lemmatizer(Pipe):
|
||||||
lookups should be loaded.
|
lookups should be loaded.
|
||||||
RETURNS (Lookups): The Lookups object.
|
RETURNS (Lookups): The Lookups object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#get_lookups_config
|
DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config
|
||||||
"""
|
"""
|
||||||
config = cls.get_lookups_config(mode)
|
config = cls.get_lookups_config(mode)
|
||||||
required_tables = config.get("required_tables", [])
|
required_tables = config.get("required_tables", [])
|
||||||
|
@ -123,7 +123,7 @@ class Lemmatizer(Pipe):
|
||||||
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
|
overwrite (bool): Whether to overwrite existing lemmas. Defaults to
|
||||||
`False`.
|
`False`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#init
|
DOCS: https://nightly.spacy.io/api/lemmatizer#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -152,7 +152,7 @@ class Lemmatizer(Pipe):
|
||||||
doc (Doc): The Doc to process.
|
doc (Doc): The Doc to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#call
|
DOCS: https://nightly.spacy.io/api/lemmatizer#call
|
||||||
"""
|
"""
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if self.overwrite or token.lemma == 0:
|
if self.overwrite or token.lemma == 0:
|
||||||
|
@ -168,7 +168,7 @@ class Lemmatizer(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#pipe
|
DOCS: https://nightly.spacy.io/api/lemmatizer#pipe
|
||||||
"""
|
"""
|
||||||
for doc in stream:
|
for doc in stream:
|
||||||
doc = self(doc)
|
doc = self(doc)
|
||||||
|
@ -180,7 +180,7 @@ class Lemmatizer(Pipe):
|
||||||
token (Token): The token to lemmatize.
|
token (Token): The token to lemmatize.
|
||||||
RETURNS (list): The available lemmas for the string.
|
RETURNS (list): The available lemmas for the string.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
|
DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize
|
||||||
"""
|
"""
|
||||||
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
lookup_table = self.lookups.get_table("lemma_lookup", {})
|
||||||
result = lookup_table.get(token.text, token.text)
|
result = lookup_table.get(token.text, token.text)
|
||||||
|
@ -194,7 +194,7 @@ class Lemmatizer(Pipe):
|
||||||
token (Token): The token to lemmatize.
|
token (Token): The token to lemmatize.
|
||||||
RETURNS (list): The available lemmas for the string.
|
RETURNS (list): The available lemmas for the string.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
|
DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize
|
||||||
"""
|
"""
|
||||||
cache_key = (token.orth, token.pos, token.morph)
|
cache_key = (token.orth, token.pos, token.morph)
|
||||||
if cache_key in self.cache:
|
if cache_key in self.cache:
|
||||||
|
@ -260,7 +260,7 @@ class Lemmatizer(Pipe):
|
||||||
token (Token): The token.
|
token (Token): The token.
|
||||||
RETURNS (bool): Whether the token is a base form.
|
RETURNS (bool): Whether the token is a base form.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#is_base_form
|
DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form
|
||||||
"""
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -270,7 +270,7 @@ class Lemmatizer(Pipe):
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores.
|
RETURNS (Dict[str, Any]): The scores.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/lemmatizer#score
|
DOCS: https://nightly.spacy.io/api/lemmatizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Lemmatizer.score")
|
validate_examples(examples, "Lemmatizer.score")
|
||||||
return Scorer.score_token_attr(examples, "lemma", **kwargs)
|
return Scorer.score_token_attr(examples, "lemma", **kwargs)
|
||||||
|
@ -282,7 +282,7 @@ class Lemmatizer(Pipe):
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_disk
|
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
|
serialize["vocab"] = lambda p: self.vocab.to_disk(p)
|
||||||
|
@ -297,7 +297,7 @@ class Lemmatizer(Pipe):
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vocab): The modified `Vocab` object.
|
RETURNS (Vocab): The modified `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_disk
|
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||||
"""
|
"""
|
||||||
deserialize = {}
|
deserialize = {}
|
||||||
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
|
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
|
||||||
|
@ -310,7 +310,7 @@ class Lemmatizer(Pipe):
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized form of the `Vocab` object.
|
RETURNS (bytes): The serialized form of the `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_bytes
|
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["vocab"] = self.vocab.to_bytes
|
serialize["vocab"] = self.vocab.to_bytes
|
||||||
|
@ -324,7 +324,7 @@ class Lemmatizer(Pipe):
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vocab): The `Vocab` object.
|
RETURNS (Vocab): The `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#from_bytes
|
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
|
||||||
"""
|
"""
|
||||||
deserialize = {}
|
deserialize = {}
|
||||||
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
|
deserialize["vocab"] = lambda b: self.vocab.from_bytes(b)
|
||||||
|
|
|
@ -79,7 +79,7 @@ class Morphologizer(Tagger):
|
||||||
labels_morph (dict): Mapping of morph + POS tags to morph labels.
|
labels_morph (dict): Mapping of morph + POS tags to morph labels.
|
||||||
labels_pos (dict): Mapping of morph + POS tags to POS tags.
|
labels_pos (dict): Mapping of morph + POS tags to POS tags.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#init
|
DOCS: https://nightly.spacy.io/api/morphologizer#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -106,7 +106,7 @@ class Morphologizer(Tagger):
|
||||||
label (str): The label to add.
|
label (str): The label to add.
|
||||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#add_label
|
DOCS: https://nightly.spacy.io/api/morphologizer#add_label
|
||||||
"""
|
"""
|
||||||
if not isinstance(label, str):
|
if not isinstance(label, str):
|
||||||
raise ValueError(Errors.E187)
|
raise ValueError(Errors.E187)
|
||||||
|
@ -139,7 +139,7 @@ class Morphologizer(Tagger):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#begin_training
|
DOCS: https://nightly.spacy.io/api/morphologizer#begin_training
|
||||||
"""
|
"""
|
||||||
if not hasattr(get_examples, "__call__"):
|
if not hasattr(get_examples, "__call__"):
|
||||||
err = Errors.E930.format(name="Morphologizer", obj=type(get_examples))
|
err = Errors.E930.format(name="Morphologizer", obj=type(get_examples))
|
||||||
|
@ -169,7 +169,7 @@ class Morphologizer(Tagger):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
|
batch_tag_ids: The IDs to set, produced by Morphologizer.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#set_annotations
|
DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations
|
||||||
"""
|
"""
|
||||||
if isinstance(docs, Doc):
|
if isinstance(docs, Doc):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
|
@ -194,7 +194,7 @@ class Morphologizer(Tagger):
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#get_loss
|
DOCS: https://nightly.spacy.io/api/morphologizer#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Morphologizer.get_loss")
|
validate_examples(examples, "Morphologizer.get_loss")
|
||||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
||||||
|
@ -231,7 +231,7 @@ class Morphologizer(Tagger):
|
||||||
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
Scorer.score_token_attr for the attributes "pos" and "morph" and
|
||||||
Scorer.score_token_attr_per_feat for the attribute "morph".
|
Scorer.score_token_attr_per_feat for the attribute "morph".
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#score
|
DOCS: https://nightly.spacy.io/api/morphologizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Morphologizer.score")
|
validate_examples(examples, "Morphologizer.score")
|
||||||
results = {}
|
results = {}
|
||||||
|
@ -247,7 +247,7 @@ class Morphologizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#to_bytes
|
DOCS: https://nightly.spacy.io/api/morphologizer#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["model"] = self.model.to_bytes
|
serialize["model"] = self.model.to_bytes
|
||||||
|
@ -262,7 +262,7 @@ class Morphologizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Morphologizer): The loaded Morphologizer.
|
RETURNS (Morphologizer): The loaded Morphologizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#from_bytes
|
DOCS: https://nightly.spacy.io/api/morphologizer#from_bytes
|
||||||
"""
|
"""
|
||||||
def load_model(b):
|
def load_model(b):
|
||||||
try:
|
try:
|
||||||
|
@ -284,7 +284,7 @@ class Morphologizer(Tagger):
|
||||||
path (str / Path): Path to a directory.
|
path (str / Path): Path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#to_disk
|
DOCS: https://nightly.spacy.io/api/morphologizer#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {
|
serialize = {
|
||||||
"vocab": lambda p: self.vocab.to_disk(p),
|
"vocab": lambda p: self.vocab.to_disk(p),
|
||||||
|
@ -300,7 +300,7 @@ class Morphologizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Morphologizer): The modified Morphologizer object.
|
RETURNS (Morphologizer): The modified Morphologizer object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/morphologizer#from_disk
|
DOCS: https://nightly.spacy.io/api/morphologizer#from_disk
|
||||||
"""
|
"""
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
with p.open("rb") as file_:
|
with p.open("rb") as file_:
|
||||||
|
|
|
@ -88,7 +88,7 @@ def make_ner(
|
||||||
cdef class EntityRecognizer(Parser):
|
cdef class EntityRecognizer(Parser):
|
||||||
"""Pipeline component for named entity recognition.
|
"""Pipeline component for named entity recognition.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityrecognizer
|
DOCS: https://nightly.spacy.io/api/entityrecognizer
|
||||||
"""
|
"""
|
||||||
TransitionSystem = BiluoPushDown
|
TransitionSystem = BiluoPushDown
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ cdef class EntityRecognizer(Parser):
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityrecognizer#score
|
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "EntityRecognizer.score")
|
validate_examples(examples, "EntityRecognizer.score")
|
||||||
return Scorer.score_spans(examples, "ents", **kwargs)
|
return Scorer.score_spans(examples, "ents", **kwargs)
|
||||||
|
|
|
@ -15,7 +15,7 @@ cdef class Pipe:
|
||||||
from it and it defines the interface that components should follow to
|
from it and it defines the interface that components should follow to
|
||||||
function as trainable components in a spaCy pipeline.
|
function as trainable components in a spaCy pipeline.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe
|
DOCS: https://nightly.spacy.io/api/pipe
|
||||||
"""
|
"""
|
||||||
def __init__(self, vocab, model, name, **cfg):
|
def __init__(self, vocab, model, name, **cfg):
|
||||||
"""Initialize a pipeline component.
|
"""Initialize a pipeline component.
|
||||||
|
@ -25,7 +25,7 @@ cdef class Pipe:
|
||||||
name (str): The component instance name.
|
name (str): The component instance name.
|
||||||
**cfg: Additonal settings and config parameters.
|
**cfg: Additonal settings and config parameters.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#init
|
DOCS: https://nightly.spacy.io/api/pipe#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -40,7 +40,7 @@ cdef class Pipe:
|
||||||
docs (Doc): The Doc to process.
|
docs (Doc): The Doc to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#call
|
DOCS: https://nightly.spacy.io/api/pipe#call
|
||||||
"""
|
"""
|
||||||
scores = self.predict([doc])
|
scores = self.predict([doc])
|
||||||
self.set_annotations([doc], scores)
|
self.set_annotations([doc], scores)
|
||||||
|
@ -55,7 +55,7 @@ cdef class Pipe:
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#pipe
|
DOCS: https://nightly.spacy.io/api/pipe#pipe
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
scores = self.predict(docs)
|
scores = self.predict(docs)
|
||||||
|
@ -69,7 +69,7 @@ cdef class Pipe:
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS: Vector representations for each token in the documents.
|
RETURNS: Vector representations for each token in the documents.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#predict
|
DOCS: https://nightly.spacy.io/api/pipe#predict
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(Errors.E931.format(method="predict", name=self.name))
|
raise NotImplementedError(Errors.E931.format(method="predict", name=self.name))
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ cdef class Pipe:
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
scores: The scores to assign.
|
scores: The scores to assign.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#set_annotations
|
DOCS: https://nightly.spacy.io/api/pipe#set_annotations
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(Errors.E931.format(method="set_annotations", name=self.name))
|
raise NotImplementedError(Errors.E931.format(method="set_annotations", name=self.name))
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ cdef class Pipe:
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#update
|
DOCS: https://nightly.spacy.io/api/pipe#update
|
||||||
"""
|
"""
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -132,7 +132,7 @@ cdef class Pipe:
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#rehearse
|
DOCS: https://nightly.spacy.io/api/pipe#rehearse
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -144,7 +144,7 @@ cdef class Pipe:
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#get_loss
|
DOCS: https://nightly.spacy.io/api/pipe#get_loss
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(Errors.E931.format(method="get_loss", name=self.name))
|
raise NotImplementedError(Errors.E931.format(method="get_loss", name=self.name))
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ cdef class Pipe:
|
||||||
label (str): The label to add.
|
label (str): The label to add.
|
||||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#add_label
|
DOCS: https://nightly.spacy.io/api/pipe#add_label
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
|
raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name))
|
||||||
|
|
||||||
|
@ -165,7 +165,7 @@ cdef class Pipe:
|
||||||
|
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#create_optimizer
|
DOCS: https://nightly.spacy.io/api/pipe#create_optimizer
|
||||||
"""
|
"""
|
||||||
return util.create_default_optimizer()
|
return util.create_default_optimizer()
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ cdef class Pipe:
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#begin_training
|
DOCS: https://nightly.spacy.io/api/pipe#begin_training
|
||||||
"""
|
"""
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
|
@ -200,7 +200,7 @@ cdef class Pipe:
|
||||||
|
|
||||||
params (dict): The parameter values to use in the model.
|
params (dict): The parameter values to use in the model.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#use_params
|
DOCS: https://nightly.spacy.io/api/pipe#use_params
|
||||||
"""
|
"""
|
||||||
with self.model.use_params(params):
|
with self.model.use_params(params):
|
||||||
yield
|
yield
|
||||||
|
@ -211,7 +211,7 @@ cdef class Pipe:
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores.
|
RETURNS (Dict[str, Any]): The scores.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#score
|
DOCS: https://nightly.spacy.io/api/pipe#score
|
||||||
"""
|
"""
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ cdef class Pipe:
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#to_bytes
|
DOCS: https://nightly.spacy.io/api/pipe#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
|
serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
|
||||||
|
@ -236,7 +236,7 @@ cdef class Pipe:
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Pipe): The loaded object.
|
RETURNS (Pipe): The loaded object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#from_bytes
|
DOCS: https://nightly.spacy.io/api/pipe#from_bytes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_model(b):
|
def load_model(b):
|
||||||
|
@ -259,7 +259,7 @@ cdef class Pipe:
|
||||||
path (str / Path): Path to a directory.
|
path (str / Path): Path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#to_disk
|
DOCS: https://nightly.spacy.io/api/pipe#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
|
serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
|
||||||
|
@ -274,7 +274,7 @@ cdef class Pipe:
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Pipe): The loaded object.
|
RETURNS (Pipe): The loaded object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/pipe#from_disk
|
DOCS: https://nightly.spacy.io/api/pipe#from_disk
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
|
|
|
@ -29,7 +29,7 @@ def make_sentencizer(
|
||||||
class Sentencizer(Pipe):
|
class Sentencizer(Pipe):
|
||||||
"""Segment the Doc into sentences using a rule-based strategy.
|
"""Segment the Doc into sentences using a rule-based strategy.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer
|
DOCS: https://nightly.spacy.io/api/sentencizer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
|
default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹',
|
||||||
|
@ -51,7 +51,7 @@ class Sentencizer(Pipe):
|
||||||
serialized with the nlp object.
|
serialized with the nlp object.
|
||||||
RETURNS (Sentencizer): The sentencizer component.
|
RETURNS (Sentencizer): The sentencizer component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#init
|
DOCS: https://nightly.spacy.io/api/sentencizer#init
|
||||||
"""
|
"""
|
||||||
self.name = name
|
self.name = name
|
||||||
if punct_chars:
|
if punct_chars:
|
||||||
|
@ -68,7 +68,7 @@ class Sentencizer(Pipe):
|
||||||
doc (Doc): The document to process.
|
doc (Doc): The document to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#call
|
DOCS: https://nightly.spacy.io/api/sentencizer#call
|
||||||
"""
|
"""
|
||||||
start = 0
|
start = 0
|
||||||
seen_period = False
|
seen_period = False
|
||||||
|
@ -94,7 +94,7 @@ class Sentencizer(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#pipe
|
DOCS: https://nightly.spacy.io/api/sentencizer#pipe
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
predictions = self.predict(docs)
|
predictions = self.predict(docs)
|
||||||
|
@ -157,7 +157,7 @@ class Sentencizer(Pipe):
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#score
|
DOCS: https://nightly.spacy.io/api/sentencizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Sentencizer.score")
|
validate_examples(examples, "Sentencizer.score")
|
||||||
results = Scorer.score_spans(examples, "sents", **kwargs)
|
results = Scorer.score_spans(examples, "sents", **kwargs)
|
||||||
|
@ -169,7 +169,7 @@ class Sentencizer(Pipe):
|
||||||
|
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#to_bytes
|
DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes
|
||||||
"""
|
"""
|
||||||
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
|
return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})
|
||||||
|
|
||||||
|
@ -179,7 +179,7 @@ class Sentencizer(Pipe):
|
||||||
bytes_data (bytes): The data to load.
|
bytes_data (bytes): The data to load.
|
||||||
returns (Sentencizer): The loaded object.
|
returns (Sentencizer): The loaded object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#from_bytes
|
DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes
|
||||||
"""
|
"""
|
||||||
cfg = srsly.msgpack_loads(bytes_data)
|
cfg = srsly.msgpack_loads(bytes_data)
|
||||||
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
|
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
|
||||||
|
@ -188,7 +188,7 @@ class Sentencizer(Pipe):
|
||||||
def to_disk(self, path, *, exclude=tuple()):
|
def to_disk(self, path, *, exclude=tuple()):
|
||||||
"""Serialize the sentencizer to disk.
|
"""Serialize the sentencizer to disk.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#to_disk
|
DOCS: https://nightly.spacy.io/api/sentencizer#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
path = path.with_suffix(".json")
|
path = path.with_suffix(".json")
|
||||||
|
@ -198,7 +198,7 @@ class Sentencizer(Pipe):
|
||||||
def from_disk(self, path, *, exclude=tuple()):
|
def from_disk(self, path, *, exclude=tuple()):
|
||||||
"""Load the sentencizer from disk.
|
"""Load the sentencizer from disk.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencizer#from_disk
|
DOCS: https://nightly.spacy.io/api/sentencizer#from_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
path = path.with_suffix(".json")
|
path = path.with_suffix(".json")
|
||||||
|
|
|
@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model):
|
||||||
class SentenceRecognizer(Tagger):
|
class SentenceRecognizer(Tagger):
|
||||||
"""Pipeline component for sentence segmentation.
|
"""Pipeline component for sentence segmentation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer
|
||||||
"""
|
"""
|
||||||
def __init__(self, vocab, model, name="senter"):
|
def __init__(self, vocab, model, name="senter"):
|
||||||
"""Initialize a sentence recognizer.
|
"""Initialize a sentence recognizer.
|
||||||
|
@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger):
|
||||||
name (str): The component instance name, used to add entries to the
|
name (str): The component instance name, used to add entries to the
|
||||||
losses during training.
|
losses during training.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#init
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -76,7 +76,7 @@ class SentenceRecognizer(Tagger):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
|
batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#set_annotations
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations
|
||||||
"""
|
"""
|
||||||
if isinstance(docs, Doc):
|
if isinstance(docs, Doc):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
|
@ -101,7 +101,7 @@ class SentenceRecognizer(Tagger):
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#get_loss
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "SentenceRecognizer.get_loss")
|
validate_examples(examples, "SentenceRecognizer.get_loss")
|
||||||
labels = self.labels
|
labels = self.labels
|
||||||
|
@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#begin_training
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#begin_training
|
||||||
"""
|
"""
|
||||||
self.set_output(len(self.labels))
|
self.set_output(len(self.labels))
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
|
@ -151,7 +151,7 @@ class SentenceRecognizer(Tagger):
|
||||||
|
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#score
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "SentenceRecognizer.score")
|
validate_examples(examples, "SentenceRecognizer.score")
|
||||||
results = Scorer.score_spans(examples, "sents", **kwargs)
|
results = Scorer.score_spans(examples, "sents", **kwargs)
|
||||||
|
@ -164,7 +164,7 @@ class SentenceRecognizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#to_bytes
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["model"] = self.model.to_bytes
|
serialize["model"] = self.model.to_bytes
|
||||||
|
@ -179,7 +179,7 @@ class SentenceRecognizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Tagger): The loaded SentenceRecognizer.
|
RETURNS (Tagger): The loaded SentenceRecognizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#from_bytes
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_bytes
|
||||||
"""
|
"""
|
||||||
def load_model(b):
|
def load_model(b):
|
||||||
try:
|
try:
|
||||||
|
@ -201,7 +201,7 @@ class SentenceRecognizer(Tagger):
|
||||||
path (str / Path): Path to a directory.
|
path (str / Path): Path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#to_disk
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {
|
serialize = {
|
||||||
"vocab": lambda p: self.vocab.to_disk(p),
|
"vocab": lambda p: self.vocab.to_disk(p),
|
||||||
|
@ -217,7 +217,7 @@ class SentenceRecognizer(Tagger):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Tagger): The modified SentenceRecognizer object.
|
RETURNS (Tagger): The modified SentenceRecognizer object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/sentencerecognizer#from_disk
|
DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_disk
|
||||||
"""
|
"""
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
with p.open("rb") as file_:
|
with p.open("rb") as file_:
|
||||||
|
|
|
@ -78,7 +78,7 @@ class SimpleNER(Pipe):
|
||||||
def add_label(self, label: str) -> None:
|
def add_label(self, label: str) -> None:
|
||||||
"""Add a new label to the pipe.
|
"""Add a new label to the pipe.
|
||||||
label (str): The label to add.
|
label (str): The label to add.
|
||||||
DOCS: https://spacy.io/api/simplener#add_label
|
DOCS: https://nightly.spacy.io/api/simplener#add_label
|
||||||
"""
|
"""
|
||||||
if not isinstance(label, str):
|
if not isinstance(label, str):
|
||||||
raise ValueError(Errors.E187)
|
raise ValueError(Errors.E187)
|
||||||
|
|
|
@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model):
|
||||||
class Tagger(Pipe):
|
class Tagger(Pipe):
|
||||||
"""Pipeline component for part-of-speech tagging.
|
"""Pipeline component for part-of-speech tagging.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger
|
DOCS: https://nightly.spacy.io/api/tagger
|
||||||
"""
|
"""
|
||||||
def __init__(self, vocab, model, name="tagger", *, labels=None):
|
def __init__(self, vocab, model, name="tagger", *, labels=None):
|
||||||
"""Initialize a part-of-speech tagger.
|
"""Initialize a part-of-speech tagger.
|
||||||
|
@ -69,7 +69,7 @@ class Tagger(Pipe):
|
||||||
losses during training.
|
losses during training.
|
||||||
labels (List): The set of labels. Defaults to None.
|
labels (List): The set of labels. Defaults to None.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#init
|
DOCS: https://nightly.spacy.io/api/tagger#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -86,7 +86,7 @@ class Tagger(Pipe):
|
||||||
|
|
||||||
RETURNS (Tuple[str]): The labels.
|
RETURNS (Tuple[str]): The labels.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#labels
|
DOCS: https://nightly.spacy.io/api/tagger#labels
|
||||||
"""
|
"""
|
||||||
return tuple(self.cfg["labels"])
|
return tuple(self.cfg["labels"])
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ class Tagger(Pipe):
|
||||||
doc (Doc): The document to process.
|
doc (Doc): The document to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#call
|
DOCS: https://nightly.spacy.io/api/tagger#call
|
||||||
"""
|
"""
|
||||||
tags = self.predict([doc])
|
tags = self.predict([doc])
|
||||||
self.set_annotations([doc], tags)
|
self.set_annotations([doc], tags)
|
||||||
|
@ -111,7 +111,7 @@ class Tagger(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#pipe
|
DOCS: https://nightly.spacy.io/api/tagger#pipe
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
tag_ids = self.predict(docs)
|
tag_ids = self.predict(docs)
|
||||||
|
@ -124,7 +124,7 @@ class Tagger(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS: The models prediction for each document.
|
RETURNS: The models prediction for each document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#predict
|
DOCS: https://nightly.spacy.io/api/tagger#predict
|
||||||
"""
|
"""
|
||||||
if not any(len(doc) for doc in docs):
|
if not any(len(doc) for doc in docs):
|
||||||
# Handle cases where there are no tokens in any docs.
|
# Handle cases where there are no tokens in any docs.
|
||||||
|
@ -153,7 +153,7 @@ class Tagger(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
batch_tag_ids: The IDs to set, produced by Tagger.predict.
|
batch_tag_ids: The IDs to set, produced by Tagger.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#set_annotations
|
DOCS: https://nightly.spacy.io/api/tagger#set_annotations
|
||||||
"""
|
"""
|
||||||
if isinstance(docs, Doc):
|
if isinstance(docs, Doc):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
|
@ -182,7 +182,7 @@ class Tagger(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#update
|
DOCS: https://nightly.spacy.io/api/tagger#update
|
||||||
"""
|
"""
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -220,7 +220,7 @@ class Tagger(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#rehearse
|
DOCS: https://nightly.spacy.io/api/tagger#rehearse
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Tagger.rehearse")
|
validate_examples(examples, "Tagger.rehearse")
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = [eg.predicted for eg in examples]
|
||||||
|
@ -247,7 +247,7 @@ class Tagger(Pipe):
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#get_loss
|
DOCS: https://nightly.spacy.io/api/tagger#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Tagger.get_loss")
|
validate_examples(examples, "Tagger.get_loss")
|
||||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
||||||
|
@ -269,7 +269,7 @@ class Tagger(Pipe):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#begin_training
|
DOCS: https://nightly.spacy.io/api/tagger#begin_training
|
||||||
"""
|
"""
|
||||||
if not hasattr(get_examples, "__call__"):
|
if not hasattr(get_examples, "__call__"):
|
||||||
err = Errors.E930.format(name="Tagger", obj=type(get_examples))
|
err = Errors.E930.format(name="Tagger", obj=type(get_examples))
|
||||||
|
@ -307,7 +307,7 @@ class Tagger(Pipe):
|
||||||
label (str): The label to add.
|
label (str): The label to add.
|
||||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#add_label
|
DOCS: https://nightly.spacy.io/api/tagger#add_label
|
||||||
"""
|
"""
|
||||||
if not isinstance(label, str):
|
if not isinstance(label, str):
|
||||||
raise ValueError(Errors.E187)
|
raise ValueError(Errors.E187)
|
||||||
|
@ -324,7 +324,7 @@ class Tagger(Pipe):
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by
|
RETURNS (Dict[str, Any]): The scores, produced by
|
||||||
Scorer.score_token_attr for the attributes "tag".
|
Scorer.score_token_attr for the attributes "tag".
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#score
|
DOCS: https://nightly.spacy.io/api/tagger#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Tagger.score")
|
validate_examples(examples, "Tagger.score")
|
||||||
return Scorer.score_token_attr(examples, "tag", **kwargs)
|
return Scorer.score_token_attr(examples, "tag", **kwargs)
|
||||||
|
@ -335,7 +335,7 @@ class Tagger(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized object.
|
RETURNS (bytes): The serialized object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#to_bytes
|
DOCS: https://nightly.spacy.io/api/tagger#to_bytes
|
||||||
"""
|
"""
|
||||||
serialize = {}
|
serialize = {}
|
||||||
serialize["model"] = self.model.to_bytes
|
serialize["model"] = self.model.to_bytes
|
||||||
|
@ -350,7 +350,7 @@ class Tagger(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Tagger): The loaded Tagger.
|
RETURNS (Tagger): The loaded Tagger.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#from_bytes
|
DOCS: https://nightly.spacy.io/api/tagger#from_bytes
|
||||||
"""
|
"""
|
||||||
def load_model(b):
|
def load_model(b):
|
||||||
try:
|
try:
|
||||||
|
@ -372,7 +372,7 @@ class Tagger(Pipe):
|
||||||
path (str / Path): Path to a directory.
|
path (str / Path): Path to a directory.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#to_disk
|
DOCS: https://nightly.spacy.io/api/tagger#to_disk
|
||||||
"""
|
"""
|
||||||
serialize = {
|
serialize = {
|
||||||
"vocab": lambda p: self.vocab.to_disk(p),
|
"vocab": lambda p: self.vocab.to_disk(p),
|
||||||
|
@ -388,7 +388,7 @@ class Tagger(Pipe):
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
RETURNS (Tagger): The modified Tagger object.
|
RETURNS (Tagger): The modified Tagger object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#from_disk
|
DOCS: https://nightly.spacy.io/api/tagger#from_disk
|
||||||
"""
|
"""
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
with p.open("rb") as file_:
|
with p.open("rb") as file_:
|
||||||
|
|
|
@ -92,7 +92,7 @@ def make_textcat(
|
||||||
class TextCategorizer(Pipe):
|
class TextCategorizer(Pipe):
|
||||||
"""Pipeline component for text classification.
|
"""Pipeline component for text classification.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer
|
DOCS: https://nightly.spacy.io/api/textcategorizer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -111,7 +111,7 @@ class TextCategorizer(Pipe):
|
||||||
losses during training.
|
losses during training.
|
||||||
labels (Iterable[str]): The labels to use.
|
labels (Iterable[str]): The labels to use.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#init
|
DOCS: https://nightly.spacy.io/api/textcategorizer#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -124,7 +124,7 @@ class TextCategorizer(Pipe):
|
||||||
def labels(self) -> Tuple[str]:
|
def labels(self) -> Tuple[str]:
|
||||||
"""RETURNS (Tuple[str]): The labels currently added to the component.
|
"""RETURNS (Tuple[str]): The labels currently added to the component.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#labels
|
DOCS: https://nightly.spacy.io/api/textcategorizer#labels
|
||||||
"""
|
"""
|
||||||
return tuple(self.cfg.setdefault("labels", []))
|
return tuple(self.cfg.setdefault("labels", []))
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ class TextCategorizer(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#pipe
|
DOCS: https://nightly.spacy.io/api/textcategorizer#pipe
|
||||||
"""
|
"""
|
||||||
for docs in util.minibatch(stream, size=batch_size):
|
for docs in util.minibatch(stream, size=batch_size):
|
||||||
scores = self.predict(docs)
|
scores = self.predict(docs)
|
||||||
|
@ -159,7 +159,7 @@ class TextCategorizer(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS: The models prediction for each document.
|
RETURNS: The models prediction for each document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#predict
|
DOCS: https://nightly.spacy.io/api/textcategorizer#predict
|
||||||
"""
|
"""
|
||||||
tensors = [doc.tensor for doc in docs]
|
tensors = [doc.tensor for doc in docs]
|
||||||
if not any(len(doc) for doc in docs):
|
if not any(len(doc) for doc in docs):
|
||||||
|
@ -177,7 +177,7 @@ class TextCategorizer(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
scores: The scores to set, produced by TextCategorizer.predict.
|
scores: The scores to set, produced by TextCategorizer.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#set_annotations
|
DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations
|
||||||
"""
|
"""
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
for j, label in enumerate(self.labels):
|
for j, label in enumerate(self.labels):
|
||||||
|
@ -204,7 +204,7 @@ class TextCategorizer(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#update
|
DOCS: https://nightly.spacy.io/api/textcategorizer#update
|
||||||
"""
|
"""
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -245,7 +245,7 @@ class TextCategorizer(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#rehearse
|
DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse
|
||||||
"""
|
"""
|
||||||
if losses is not None:
|
if losses is not None:
|
||||||
losses.setdefault(self.name, 0.0)
|
losses.setdefault(self.name, 0.0)
|
||||||
|
@ -289,7 +289,7 @@ class TextCategorizer(Pipe):
|
||||||
scores: Scores representing the model's predictions.
|
scores: Scores representing the model's predictions.
|
||||||
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
RETUTNRS (Tuple[float, float]): The loss and the gradient.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#get_loss
|
DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "TextCategorizer.get_loss")
|
validate_examples(examples, "TextCategorizer.get_loss")
|
||||||
truths, not_missing = self._examples_to_truth(examples)
|
truths, not_missing = self._examples_to_truth(examples)
|
||||||
|
@ -305,7 +305,7 @@ class TextCategorizer(Pipe):
|
||||||
label (str): The label to add.
|
label (str): The label to add.
|
||||||
RETURNS (int): 0 if label is already present, otherwise 1.
|
RETURNS (int): 0 if label is already present, otherwise 1.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#add_label
|
DOCS: https://nightly.spacy.io/api/textcategorizer#add_label
|
||||||
"""
|
"""
|
||||||
if not isinstance(label, str):
|
if not isinstance(label, str):
|
||||||
raise ValueError(Errors.E187)
|
raise ValueError(Errors.E187)
|
||||||
|
@ -343,7 +343,7 @@ class TextCategorizer(Pipe):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#begin_training
|
DOCS: https://nightly.spacy.io/api/textcategorizer#begin_training
|
||||||
"""
|
"""
|
||||||
if not hasattr(get_examples, "__call__"):
|
if not hasattr(get_examples, "__call__"):
|
||||||
err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples))
|
err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples))
|
||||||
|
@ -378,7 +378,7 @@ class TextCategorizer(Pipe):
|
||||||
positive_label (str): Optional positive label.
|
positive_label (str): Optional positive label.
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
|
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/textcategorizer#score
|
DOCS: https://nightly.spacy.io/api/textcategorizer#score
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "TextCategorizer.score")
|
validate_examples(examples, "TextCategorizer.score")
|
||||||
return Scorer.score_cats(
|
return Scorer.score_cats(
|
||||||
|
|
|
@ -56,7 +56,7 @@ class Tok2Vec(Pipe):
|
||||||
a list of Doc objects as input, and output a list of 2d float arrays.
|
a list of Doc objects as input, and output a list of 2d float arrays.
|
||||||
name (str): The component instance name.
|
name (str): The component instance name.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#init
|
DOCS: https://nightly.spacy.io/api/tok2vec#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.model = model
|
self.model = model
|
||||||
|
@ -91,7 +91,7 @@ class Tok2Vec(Pipe):
|
||||||
docs (Doc): The Doc to process.
|
docs (Doc): The Doc to process.
|
||||||
RETURNS (Doc): The processed Doc.
|
RETURNS (Doc): The processed Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#call
|
DOCS: https://nightly.spacy.io/api/tok2vec#call
|
||||||
"""
|
"""
|
||||||
tokvecses = self.predict([doc])
|
tokvecses = self.predict([doc])
|
||||||
self.set_annotations([doc], tokvecses)
|
self.set_annotations([doc], tokvecses)
|
||||||
|
@ -106,7 +106,7 @@ class Tok2Vec(Pipe):
|
||||||
batch_size (int): The number of documents to buffer.
|
batch_size (int): The number of documents to buffer.
|
||||||
YIELDS (Doc): Processed documents in order.
|
YIELDS (Doc): Processed documents in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#pipe
|
DOCS: https://nightly.spacy.io/api/tok2vec#pipe
|
||||||
"""
|
"""
|
||||||
for docs in minibatch(stream, batch_size):
|
for docs in minibatch(stream, batch_size):
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
|
@ -121,7 +121,7 @@ class Tok2Vec(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to predict.
|
docs (Iterable[Doc]): The documents to predict.
|
||||||
RETURNS: Vector representations for each token in the documents.
|
RETURNS: Vector representations for each token in the documents.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#predict
|
DOCS: https://nightly.spacy.io/api/tok2vec#predict
|
||||||
"""
|
"""
|
||||||
tokvecs = self.model.predict(docs)
|
tokvecs = self.model.predict(docs)
|
||||||
batch_id = Tok2VecListener.get_batch_id(docs)
|
batch_id = Tok2VecListener.get_batch_id(docs)
|
||||||
|
@ -135,7 +135,7 @@ class Tok2Vec(Pipe):
|
||||||
docs (Iterable[Doc]): The documents to modify.
|
docs (Iterable[Doc]): The documents to modify.
|
||||||
tokvecses: The tensors to set, produced by Tok2Vec.predict.
|
tokvecses: The tensors to set, produced by Tok2Vec.predict.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#set_annotations
|
DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations
|
||||||
"""
|
"""
|
||||||
for doc, tokvecs in zip(docs, tokvecses):
|
for doc, tokvecs in zip(docs, tokvecses):
|
||||||
assert tokvecs.shape[0] == len(doc)
|
assert tokvecs.shape[0] == len(doc)
|
||||||
|
@ -162,7 +162,7 @@ class Tok2Vec(Pipe):
|
||||||
Updated using the component name as the key.
|
Updated using the component name as the key.
|
||||||
RETURNS (Dict[str, float]): The updated losses dictionary.
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#update
|
DOCS: https://nightly.spacy.io/api/tok2vec#update
|
||||||
"""
|
"""
|
||||||
if losses is None:
|
if losses is None:
|
||||||
losses = {}
|
losses = {}
|
||||||
|
@ -220,7 +220,7 @@ class Tok2Vec(Pipe):
|
||||||
create_optimizer if it doesn't exist.
|
create_optimizer if it doesn't exist.
|
||||||
RETURNS (thinc.api.Optimizer): The optimizer.
|
RETURNS (thinc.api.Optimizer): The optimizer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tok2vec#begin_training
|
DOCS: https://nightly.spacy.io/api/tok2vec#begin_training
|
||||||
"""
|
"""
|
||||||
docs = [Doc(self.vocab, words=["hello"])]
|
docs = [Doc(self.vocab, words=["hello"])]
|
||||||
self.model.initialize(X=docs)
|
self.model.initialize(X=docs)
|
||||||
|
|
|
@ -85,7 +85,7 @@ class Scorer:
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the Scorer.
|
"""Initialize the Scorer.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#init
|
DOCS: https://nightly.spacy.io/api/scorer#init
|
||||||
"""
|
"""
|
||||||
self.nlp = nlp
|
self.nlp = nlp
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
|
@ -101,7 +101,7 @@ class Scorer:
|
||||||
examples (Iterable[Example]): The predicted annotations + correct annotations.
|
examples (Iterable[Example]): The predicted annotations + correct annotations.
|
||||||
RETURNS (Dict): A dictionary of scores.
|
RETURNS (Dict): A dictionary of scores.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score
|
DOCS: https://nightly.spacy.io/api/scorer#score
|
||||||
"""
|
"""
|
||||||
scores = {}
|
scores = {}
|
||||||
if hasattr(self.nlp.tokenizer, "score"):
|
if hasattr(self.nlp.tokenizer, "score"):
|
||||||
|
@ -121,7 +121,7 @@ class Scorer:
|
||||||
RETURNS (Dict[str, float]): A dictionary containing the scores
|
RETURNS (Dict[str, float]): A dictionary containing the scores
|
||||||
token_acc/p/r/f.
|
token_acc/p/r/f.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score_tokenization
|
DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
|
||||||
"""
|
"""
|
||||||
acc_score = PRFScore()
|
acc_score = PRFScore()
|
||||||
prf_score = PRFScore()
|
prf_score = PRFScore()
|
||||||
|
@ -169,7 +169,7 @@ class Scorer:
|
||||||
RETURNS (Dict[str, float]): A dictionary containing the accuracy score
|
RETURNS (Dict[str, float]): A dictionary containing the accuracy score
|
||||||
under the key attr_acc.
|
under the key attr_acc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score_token_attr
|
DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
|
||||||
"""
|
"""
|
||||||
tag_score = PRFScore()
|
tag_score = PRFScore()
|
||||||
for example in examples:
|
for example in examples:
|
||||||
|
@ -263,7 +263,7 @@ class Scorer:
|
||||||
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
|
RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
|
||||||
the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
|
the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score_spans
|
DOCS: https://nightly.spacy.io/api/scorer#score_spans
|
||||||
"""
|
"""
|
||||||
score = PRFScore()
|
score = PRFScore()
|
||||||
score_per_type = dict()
|
score_per_type = dict()
|
||||||
|
@ -350,7 +350,7 @@ class Scorer:
|
||||||
attr_f_per_type,
|
attr_f_per_type,
|
||||||
attr_auc_per_type
|
attr_auc_per_type
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score_cats
|
DOCS: https://nightly.spacy.io/api/scorer#score_cats
|
||||||
"""
|
"""
|
||||||
if threshold is None:
|
if threshold is None:
|
||||||
threshold = 0.5 if multi_label else 0.0
|
threshold = 0.5 if multi_label else 0.0
|
||||||
|
@ -467,7 +467,7 @@ class Scorer:
|
||||||
RETURNS (Dict[str, Any]): A dictionary containing the scores:
|
RETURNS (Dict[str, Any]): A dictionary containing the scores:
|
||||||
attr_uas, attr_las, and attr_las_per_type.
|
attr_uas, attr_las, and attr_las_per_type.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/scorer#score_deps
|
DOCS: https://nightly.spacy.io/api/scorer#score_deps
|
||||||
"""
|
"""
|
||||||
unlabelled = PRFScore()
|
unlabelled = PRFScore()
|
||||||
labelled = PRFScore()
|
labelled = PRFScore()
|
||||||
|
|
|
@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e
|
||||||
cdef class StringStore:
|
cdef class StringStore:
|
||||||
"""Look up strings by 64-bit hashes.
|
"""Look up strings by 64-bit hashes.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/stringstore
|
DOCS: https://nightly.spacy.io/api/stringstore
|
||||||
"""
|
"""
|
||||||
def __init__(self, strings=None, freeze=False):
|
def __init__(self, strings=None, freeze=False):
|
||||||
"""Create the StringStore.
|
"""Create the StringStore.
|
||||||
|
|
|
@ -31,7 +31,7 @@ cdef class Tokenizer:
|
||||||
"""Segment text, and create Doc objects with the discovered segment
|
"""Segment text, and create Doc objects with the discovered segment
|
||||||
boundaries.
|
boundaries.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer
|
DOCS: https://nightly.spacy.io/api/tokenizer
|
||||||
"""
|
"""
|
||||||
def __init__(self, Vocab vocab, rules=None, prefix_search=None,
|
def __init__(self, Vocab vocab, rules=None, prefix_search=None,
|
||||||
suffix_search=None, infix_finditer=None, token_match=None,
|
suffix_search=None, infix_finditer=None, token_match=None,
|
||||||
|
@ -54,7 +54,7 @@ cdef class Tokenizer:
|
||||||
EXAMPLE:
|
EXAMPLE:
|
||||||
>>> tokenizer = Tokenizer(nlp.vocab)
|
>>> tokenizer = Tokenizer(nlp.vocab)
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#init
|
DOCS: https://nightly.spacy.io/api/tokenizer#init
|
||||||
"""
|
"""
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._cache = PreshMap()
|
self._cache = PreshMap()
|
||||||
|
@ -147,7 +147,7 @@ cdef class Tokenizer:
|
||||||
string (str): The string to tokenize.
|
string (str): The string to tokenize.
|
||||||
RETURNS (Doc): A container for linguistic annotations.
|
RETURNS (Doc): A container for linguistic annotations.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#call
|
DOCS: https://nightly.spacy.io/api/tokenizer#call
|
||||||
"""
|
"""
|
||||||
doc = self._tokenize_affixes(string, True)
|
doc = self._tokenize_affixes(string, True)
|
||||||
self._apply_special_cases(doc)
|
self._apply_special_cases(doc)
|
||||||
|
@ -209,7 +209,7 @@ cdef class Tokenizer:
|
||||||
Defaults to 1000.
|
Defaults to 1000.
|
||||||
YIELDS (Doc): A sequence of Doc objects, in order.
|
YIELDS (Doc): A sequence of Doc objects, in order.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#pipe
|
DOCS: https://nightly.spacy.io/api/tokenizer#pipe
|
||||||
"""
|
"""
|
||||||
for text in texts:
|
for text in texts:
|
||||||
yield self(text)
|
yield self(text)
|
||||||
|
@ -529,7 +529,7 @@ cdef class Tokenizer:
|
||||||
and `.end()` methods, denoting the placement of internal segment
|
and `.end()` methods, denoting the placement of internal segment
|
||||||
separators, e.g. hyphens.
|
separators, e.g. hyphens.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#find_infix
|
DOCS: https://nightly.spacy.io/api/tokenizer#find_infix
|
||||||
"""
|
"""
|
||||||
if self.infix_finditer is None:
|
if self.infix_finditer is None:
|
||||||
return 0
|
return 0
|
||||||
|
@ -542,7 +542,7 @@ cdef class Tokenizer:
|
||||||
string (str): The string to segment.
|
string (str): The string to segment.
|
||||||
RETURNS (int): The length of the prefix if present, otherwise `None`.
|
RETURNS (int): The length of the prefix if present, otherwise `None`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#find_prefix
|
DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix
|
||||||
"""
|
"""
|
||||||
if self.prefix_search is None:
|
if self.prefix_search is None:
|
||||||
return 0
|
return 0
|
||||||
|
@ -556,7 +556,7 @@ cdef class Tokenizer:
|
||||||
string (str): The string to segment.
|
string (str): The string to segment.
|
||||||
Returns (int): The length of the suffix if present, otherwise `None`.
|
Returns (int): The length of the suffix if present, otherwise `None`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#find_suffix
|
DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix
|
||||||
"""
|
"""
|
||||||
if self.suffix_search is None:
|
if self.suffix_search is None:
|
||||||
return 0
|
return 0
|
||||||
|
@ -596,7 +596,7 @@ cdef class Tokenizer:
|
||||||
a token and its attributes. The `ORTH` fields of the attributes
|
a token and its attributes. The `ORTH` fields of the attributes
|
||||||
must exactly match the string when they are concatenated.
|
must exactly match the string when they are concatenated.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#add_special_case
|
DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case
|
||||||
"""
|
"""
|
||||||
self._validate_special_case(string, substrings)
|
self._validate_special_case(string, substrings)
|
||||||
substrings = list(substrings)
|
substrings = list(substrings)
|
||||||
|
@ -635,7 +635,7 @@ cdef class Tokenizer:
|
||||||
string (str): The string to tokenize.
|
string (str): The string to tokenize.
|
||||||
RETURNS (list): A list of (pattern_string, token_string) tuples
|
RETURNS (list): A list of (pattern_string, token_string) tuples
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#explain
|
DOCS: https://nightly.spacy.io/api/tokenizer#explain
|
||||||
"""
|
"""
|
||||||
prefix_search = self.prefix_search
|
prefix_search = self.prefix_search
|
||||||
suffix_search = self.suffix_search
|
suffix_search = self.suffix_search
|
||||||
|
@ -716,7 +716,7 @@ cdef class Tokenizer:
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#to_disk
|
DOCS: https://nightly.spacy.io/api/tokenizer#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
with path.open("wb") as file_:
|
with path.open("wb") as file_:
|
||||||
|
@ -730,7 +730,7 @@ cdef class Tokenizer:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#from_disk
|
DOCS: https://nightly.spacy.io/api/tokenizer#from_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
with path.open("rb") as file_:
|
with path.open("rb") as file_:
|
||||||
|
@ -744,7 +744,7 @@ cdef class Tokenizer:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized form of the `Tokenizer` object.
|
RETURNS (bytes): The serialized form of the `Tokenizer` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#to_bytes
|
DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes
|
||||||
"""
|
"""
|
||||||
serializers = {
|
serializers = {
|
||||||
"vocab": lambda: self.vocab.to_bytes(),
|
"vocab": lambda: self.vocab.to_bytes(),
|
||||||
|
@ -764,7 +764,7 @@ cdef class Tokenizer:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Tokenizer): The `Tokenizer` object.
|
RETURNS (Tokenizer): The `Tokenizer` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tokenizer#from_bytes
|
DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes
|
||||||
"""
|
"""
|
||||||
data = {}
|
data = {}
|
||||||
deserializers = {
|
deserializers = {
|
||||||
|
|
|
@ -24,8 +24,8 @@ from ..strings import get_string_id
|
||||||
cdef class Retokenizer:
|
cdef class Retokenizer:
|
||||||
"""Helper class for doc.retokenize() context manager.
|
"""Helper class for doc.retokenize() context manager.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#retokenize
|
DOCS: https://nightly.spacy.io/api/doc#retokenize
|
||||||
USAGE: https://spacy.io/usage/linguistic-features#retokenization
|
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
|
||||||
"""
|
"""
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
cdef list merges
|
cdef list merges
|
||||||
|
@ -47,7 +47,7 @@ cdef class Retokenizer:
|
||||||
span (Span): The span to merge.
|
span (Span): The span to merge.
|
||||||
attrs (dict): Attributes to set on the merged token.
|
attrs (dict): Attributes to set on the merged token.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#retokenizer.merge
|
DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge
|
||||||
"""
|
"""
|
||||||
if (span.start, span.end) in self._spans_to_merge:
|
if (span.start, span.end) in self._spans_to_merge:
|
||||||
return
|
return
|
||||||
|
@ -73,7 +73,7 @@ cdef class Retokenizer:
|
||||||
attrs (dict): Attributes to set on all split tokens. Attribute names
|
attrs (dict): Attributes to set on all split tokens. Attribute names
|
||||||
mapped to list of per-token attribute values.
|
mapped to list of per-token attribute values.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#retokenizer.split
|
DOCS: https://nightly.spacy.io/api/doc#retokenizer.split
|
||||||
"""
|
"""
|
||||||
if ''.join(orths) != token.text:
|
if ''.join(orths) != token.text:
|
||||||
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))
|
raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text))
|
||||||
|
|
|
@ -61,7 +61,7 @@ class DocBin:
|
||||||
store_user_data (bool): Whether to include the `Doc.user_data`.
|
store_user_data (bool): Whether to include the `Doc.user_data`.
|
||||||
docs (Iterable[Doc]): Docs to add.
|
docs (Iterable[Doc]): Docs to add.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#init
|
DOCS: https://nightly.spacy.io/api/docbin#init
|
||||||
"""
|
"""
|
||||||
attrs = sorted([intify_attr(attr) for attr in attrs])
|
attrs = sorted([intify_attr(attr) for attr in attrs])
|
||||||
self.version = "0.1"
|
self.version = "0.1"
|
||||||
|
@ -86,7 +86,7 @@ class DocBin:
|
||||||
|
|
||||||
doc (Doc): The Doc object to add.
|
doc (Doc): The Doc object to add.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#add
|
DOCS: https://nightly.spacy.io/api/docbin#add
|
||||||
"""
|
"""
|
||||||
array = doc.to_array(self.attrs)
|
array = doc.to_array(self.attrs)
|
||||||
if len(array.shape) == 1:
|
if len(array.shape) == 1:
|
||||||
|
@ -115,7 +115,7 @@ class DocBin:
|
||||||
vocab (Vocab): The shared vocab.
|
vocab (Vocab): The shared vocab.
|
||||||
YIELDS (Doc): The Doc objects.
|
YIELDS (Doc): The Doc objects.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#get_docs
|
DOCS: https://nightly.spacy.io/api/docbin#get_docs
|
||||||
"""
|
"""
|
||||||
for string in self.strings:
|
for string in self.strings:
|
||||||
vocab[string]
|
vocab[string]
|
||||||
|
@ -141,7 +141,7 @@ class DocBin:
|
||||||
|
|
||||||
other (DocBin): The DocBin to merge into the current bin.
|
other (DocBin): The DocBin to merge into the current bin.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#merge
|
DOCS: https://nightly.spacy.io/api/docbin#merge
|
||||||
"""
|
"""
|
||||||
if self.attrs != other.attrs:
|
if self.attrs != other.attrs:
|
||||||
raise ValueError(Errors.E166.format(current=self.attrs, other=other.attrs))
|
raise ValueError(Errors.E166.format(current=self.attrs, other=other.attrs))
|
||||||
|
@ -158,7 +158,7 @@ class DocBin:
|
||||||
|
|
||||||
RETURNS (bytes): The serialized DocBin.
|
RETURNS (bytes): The serialized DocBin.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#to_bytes
|
DOCS: https://nightly.spacy.io/api/docbin#to_bytes
|
||||||
"""
|
"""
|
||||||
for tokens in self.tokens:
|
for tokens in self.tokens:
|
||||||
assert len(tokens.shape) == 2, tokens.shape # this should never happen
|
assert len(tokens.shape) == 2, tokens.shape # this should never happen
|
||||||
|
@ -185,7 +185,7 @@ class DocBin:
|
||||||
bytes_data (bytes): The data to load from.
|
bytes_data (bytes): The data to load from.
|
||||||
RETURNS (DocBin): The loaded DocBin.
|
RETURNS (DocBin): The loaded DocBin.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#from_bytes
|
DOCS: https://nightly.spacy.io/api/docbin#from_bytes
|
||||||
"""
|
"""
|
||||||
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
|
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
|
||||||
self.attrs = msg["attrs"]
|
self.attrs = msg["attrs"]
|
||||||
|
@ -211,7 +211,7 @@ class DocBin:
|
||||||
|
|
||||||
path (str / Path): The file path.
|
path (str / Path): The file path.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#to_disk
|
DOCS: https://nightly.spacy.io/api/docbin#to_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
with path.open("wb") as file_:
|
with path.open("wb") as file_:
|
||||||
|
@ -223,7 +223,7 @@ class DocBin:
|
||||||
path (str / Path): The file path.
|
path (str / Path): The file path.
|
||||||
RETURNS (DocBin): The loaded DocBin.
|
RETURNS (DocBin): The loaded DocBin.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#to_disk
|
DOCS: https://nightly.spacy.io/api/docbin#to_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
with path.open("rb") as file_:
|
with path.open("rb") as file_:
|
||||||
|
|
|
@ -104,7 +104,7 @@ cdef class Doc:
|
||||||
>>> from spacy.tokens import Doc
|
>>> from spacy.tokens import Doc
|
||||||
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
|
>>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False])
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc
|
DOCS: https://nightly.spacy.io/api/doc
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -118,8 +118,8 @@ cdef class Doc:
|
||||||
method (callable): Optional method for method extension.
|
method (callable): Optional method for method extension.
|
||||||
force (bool): Force overwriting existing attribute.
|
force (bool): Force overwriting existing attribute.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#set_extension
|
DOCS: https://nightly.spacy.io/api/doc#set_extension
|
||||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||||
"""
|
"""
|
||||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||||
raise ValueError(Errors.E090.format(name=name, obj="Doc"))
|
raise ValueError(Errors.E090.format(name=name, obj="Doc"))
|
||||||
|
@ -132,7 +132,7 @@ cdef class Doc:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#get_extension
|
DOCS: https://nightly.spacy.io/api/doc#get_extension
|
||||||
"""
|
"""
|
||||||
return Underscore.doc_extensions.get(name)
|
return Underscore.doc_extensions.get(name)
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ cdef class Doc:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (bool): Whether the extension has been registered.
|
RETURNS (bool): Whether the extension has been registered.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#has_extension
|
DOCS: https://nightly.spacy.io/api/doc#has_extension
|
||||||
"""
|
"""
|
||||||
return name in Underscore.doc_extensions
|
return name in Underscore.doc_extensions
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ cdef class Doc:
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||||
removed extension.
|
removed extension.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#remove_extension
|
DOCS: https://nightly.spacy.io/api/doc#remove_extension
|
||||||
"""
|
"""
|
||||||
if not cls.has_extension(name):
|
if not cls.has_extension(name):
|
||||||
raise ValueError(Errors.E046.format(name=name))
|
raise ValueError(Errors.E046.format(name=name))
|
||||||
|
@ -173,7 +173,7 @@ cdef class Doc:
|
||||||
it is not. If `None`, defaults to `[True]*len(words)`
|
it is not. If `None`, defaults to `[True]*len(words)`
|
||||||
user_data (dict or None): Optional extra data to attach to the Doc.
|
user_data (dict or None): Optional extra data to attach to the Doc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#init
|
DOCS: https://nightly.spacy.io/api/doc#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
size = max(20, (len(words) if words is not None else 0))
|
size = max(20, (len(words) if words is not None else 0))
|
||||||
|
@ -288,7 +288,7 @@ cdef class Doc:
|
||||||
You can use negative indices and open-ended ranges, which have
|
You can use negative indices and open-ended ranges, which have
|
||||||
their normal Python semantics.
|
their normal Python semantics.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#getitem
|
DOCS: https://nightly.spacy.io/api/doc#getitem
|
||||||
"""
|
"""
|
||||||
if isinstance(i, slice):
|
if isinstance(i, slice):
|
||||||
start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
|
start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
|
||||||
|
@ -305,7 +305,7 @@ cdef class Doc:
|
||||||
than-Python speeds are required, you can instead access the annotations
|
than-Python speeds are required, you can instead access the annotations
|
||||||
as a numpy array, or access the underlying C data directly from Cython.
|
as a numpy array, or access the underlying C data directly from Cython.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#iter
|
DOCS: https://nightly.spacy.io/api/doc#iter
|
||||||
"""
|
"""
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
|
@ -316,7 +316,7 @@ cdef class Doc:
|
||||||
|
|
||||||
RETURNS (int): The number of tokens in the document.
|
RETURNS (int): The number of tokens in the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#len
|
DOCS: https://nightly.spacy.io/api/doc#len
|
||||||
"""
|
"""
|
||||||
return self.length
|
return self.length
|
||||||
|
|
||||||
|
@ -349,7 +349,7 @@ cdef class Doc:
|
||||||
the span.
|
the span.
|
||||||
RETURNS (Span): The newly constructed object.
|
RETURNS (Span): The newly constructed object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#char_span
|
DOCS: https://nightly.spacy.io/api/doc#char_span
|
||||||
"""
|
"""
|
||||||
if not isinstance(label, int):
|
if not isinstance(label, int):
|
||||||
label = self.vocab.strings.add(label)
|
label = self.vocab.strings.add(label)
|
||||||
|
@ -374,7 +374,7 @@ cdef class Doc:
|
||||||
`Span`, `Token` and `Lexeme` objects.
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#similarity
|
DOCS: https://nightly.spacy.io/api/doc#similarity
|
||||||
"""
|
"""
|
||||||
if "similarity" in self.user_hooks:
|
if "similarity" in self.user_hooks:
|
||||||
return self.user_hooks["similarity"](self, other)
|
return self.user_hooks["similarity"](self, other)
|
||||||
|
@ -407,7 +407,7 @@ cdef class Doc:
|
||||||
|
|
||||||
RETURNS (bool): Whether a word vector is associated with the object.
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#has_vector
|
DOCS: https://nightly.spacy.io/api/doc#has_vector
|
||||||
"""
|
"""
|
||||||
if "has_vector" in self.user_hooks:
|
if "has_vector" in self.user_hooks:
|
||||||
return self.user_hooks["has_vector"](self)
|
return self.user_hooks["has_vector"](self)
|
||||||
|
@ -425,7 +425,7 @@ cdef class Doc:
|
||||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
representing the document's semantics.
|
representing the document's semantics.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#vector
|
DOCS: https://nightly.spacy.io/api/doc#vector
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if "vector" in self.user_hooks:
|
if "vector" in self.user_hooks:
|
||||||
|
@ -453,7 +453,7 @@ cdef class Doc:
|
||||||
|
|
||||||
RETURNS (float): The L2 norm of the vector representation.
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#vector_norm
|
DOCS: https://nightly.spacy.io/api/doc#vector_norm
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if "vector_norm" in self.user_hooks:
|
if "vector_norm" in self.user_hooks:
|
||||||
|
@ -493,7 +493,7 @@ cdef class Doc:
|
||||||
|
|
||||||
RETURNS (tuple): Entities in the document, one `Span` per entity.
|
RETURNS (tuple): Entities in the document, one `Span` per entity.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#ents
|
DOCS: https://nightly.spacy.io/api/doc#ents
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef int i
|
cdef int i
|
||||||
|
@ -584,7 +584,7 @@ cdef class Doc:
|
||||||
|
|
||||||
YIELDS (Span): Noun chunks in the document.
|
YIELDS (Span): Noun chunks in the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#noun_chunks
|
DOCS: https://nightly.spacy.io/api/doc#noun_chunks
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Accumulate the result before beginning to iterate over it. This
|
# Accumulate the result before beginning to iterate over it. This
|
||||||
|
@ -609,7 +609,7 @@ cdef class Doc:
|
||||||
|
|
||||||
YIELDS (Span): Sentences in the document.
|
YIELDS (Span): Sentences in the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#sents
|
DOCS: https://nightly.spacy.io/api/doc#sents
|
||||||
"""
|
"""
|
||||||
if not self.is_sentenced:
|
if not self.is_sentenced:
|
||||||
raise ValueError(Errors.E030)
|
raise ValueError(Errors.E030)
|
||||||
|
@ -722,7 +722,7 @@ cdef class Doc:
|
||||||
attr_id (int): The attribute ID to key the counts.
|
attr_id (int): The attribute ID to key the counts.
|
||||||
RETURNS (dict): A dictionary mapping attributes to integer counts.
|
RETURNS (dict): A dictionary mapping attributes to integer counts.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#count_by
|
DOCS: https://nightly.spacy.io/api/doc#count_by
|
||||||
"""
|
"""
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef attr_t attr
|
cdef attr_t attr
|
||||||
|
@ -777,7 +777,7 @@ cdef class Doc:
|
||||||
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
|
array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values.
|
||||||
RETURNS (Doc): Itself.
|
RETURNS (Doc): Itself.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_array
|
DOCS: https://nightly.spacy.io/api/doc#from_array
|
||||||
"""
|
"""
|
||||||
# Handle scalar/list inputs of strings/ints for py_attr_ids
|
# Handle scalar/list inputs of strings/ints for py_attr_ids
|
||||||
# See also #3064
|
# See also #3064
|
||||||
|
@ -872,7 +872,7 @@ cdef class Doc:
|
||||||
attrs (list): Optional list of attribute ID ints or attribute name strings.
|
attrs (list): Optional list of attribute ID ints or attribute name strings.
|
||||||
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
|
RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_docs
|
DOCS: https://nightly.spacy.io/api/doc#from_docs
|
||||||
"""
|
"""
|
||||||
if not docs:
|
if not docs:
|
||||||
return None
|
return None
|
||||||
|
@ -953,7 +953,7 @@ cdef class Doc:
|
||||||
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
||||||
(n, n), where n = len(self).
|
(n, n), where n = len(self).
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#get_lca_matrix
|
DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix
|
||||||
"""
|
"""
|
||||||
return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
|
return numpy.asarray(_get_lca_matrix(self, 0, len(self)))
|
||||||
|
|
||||||
|
@ -987,7 +987,7 @@ cdef class Doc:
|
||||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||||
exclude (Iterable[str]): String names of serialization fields to exclude.
|
exclude (Iterable[str]): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#to_disk
|
DOCS: https://nightly.spacy.io/api/doc#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
with path.open("wb") as file_:
|
with path.open("wb") as file_:
|
||||||
|
@ -1002,7 +1002,7 @@ cdef class Doc:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Doc): The modified `Doc` object.
|
RETURNS (Doc): The modified `Doc` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_disk
|
DOCS: https://nightly.spacy.io/api/doc#from_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
with path.open("rb") as file_:
|
with path.open("rb") as file_:
|
||||||
|
@ -1016,7 +1016,7 @@ cdef class Doc:
|
||||||
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
||||||
all annotations.
|
all annotations.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#to_bytes
|
DOCS: https://nightly.spacy.io/api/doc#to_bytes
|
||||||
"""
|
"""
|
||||||
return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
|
return srsly.msgpack_dumps(self.to_dict(exclude=exclude))
|
||||||
|
|
||||||
|
@ -1027,7 +1027,7 @@ cdef class Doc:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Doc): Itself.
|
RETURNS (Doc): Itself.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_bytes
|
DOCS: https://nightly.spacy.io/api/doc#from_bytes
|
||||||
"""
|
"""
|
||||||
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
|
return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)
|
||||||
|
|
||||||
|
@ -1038,7 +1038,7 @@ cdef class Doc:
|
||||||
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
||||||
all annotations.
|
all annotations.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#to_bytes
|
DOCS: https://nightly.spacy.io/api/doc#to_bytes
|
||||||
"""
|
"""
|
||||||
array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, NORM, ENT_KB_ID]
|
array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, NORM, ENT_KB_ID]
|
||||||
if self.is_tagged:
|
if self.is_tagged:
|
||||||
|
@ -1086,7 +1086,7 @@ cdef class Doc:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Doc): Itself.
|
RETURNS (Doc): Itself.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#from_dict
|
DOCS: https://nightly.spacy.io/api/doc#from_dict
|
||||||
"""
|
"""
|
||||||
if self.length != 0:
|
if self.length != 0:
|
||||||
raise ValueError(Errors.E033.format(length=self.length))
|
raise ValueError(Errors.E033.format(length=self.length))
|
||||||
|
@ -1166,8 +1166,8 @@ cdef class Doc:
|
||||||
retokenization are invalidated, although they may accidentally
|
retokenization are invalidated, although they may accidentally
|
||||||
continue to work.
|
continue to work.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#retokenize
|
DOCS: https://nightly.spacy.io/api/doc#retokenize
|
||||||
USAGE: https://spacy.io/usage/linguistic-features#retokenization
|
USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization
|
||||||
"""
|
"""
|
||||||
return Retokenizer(self)
|
return Retokenizer(self)
|
||||||
|
|
||||||
|
@ -1202,7 +1202,7 @@ cdef class Doc:
|
||||||
be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
|
be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
|
||||||
RETURNS (dict): The data in spaCy's JSON format.
|
RETURNS (dict): The data in spaCy's JSON format.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#to_json
|
DOCS: https://nightly.spacy.io/api/doc#to_json
|
||||||
"""
|
"""
|
||||||
data = {"text": self.text}
|
data = {"text": self.text}
|
||||||
if self.is_nered:
|
if self.is_nered:
|
||||||
|
|
|
@ -27,7 +27,7 @@ from .underscore import Underscore, get_ext_args
|
||||||
cdef class Span:
|
cdef class Span:
|
||||||
"""A slice from a Doc object.
|
"""A slice from a Doc object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span
|
DOCS: https://nightly.spacy.io/api/span
|
||||||
"""
|
"""
|
||||||
@classmethod
|
@classmethod
|
||||||
def set_extension(cls, name, **kwargs):
|
def set_extension(cls, name, **kwargs):
|
||||||
|
@ -40,8 +40,8 @@ cdef class Span:
|
||||||
method (callable): Optional method for method extension.
|
method (callable): Optional method for method extension.
|
||||||
force (bool): Force overwriting existing attribute.
|
force (bool): Force overwriting existing attribute.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#set_extension
|
DOCS: https://nightly.spacy.io/api/span#set_extension
|
||||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||||
"""
|
"""
|
||||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||||
raise ValueError(Errors.E090.format(name=name, obj="Span"))
|
raise ValueError(Errors.E090.format(name=name, obj="Span"))
|
||||||
|
@ -54,7 +54,7 @@ cdef class Span:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#get_extension
|
DOCS: https://nightly.spacy.io/api/span#get_extension
|
||||||
"""
|
"""
|
||||||
return Underscore.span_extensions.get(name)
|
return Underscore.span_extensions.get(name)
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ cdef class Span:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (bool): Whether the extension has been registered.
|
RETURNS (bool): Whether the extension has been registered.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#has_extension
|
DOCS: https://nightly.spacy.io/api/span#has_extension
|
||||||
"""
|
"""
|
||||||
return name in Underscore.span_extensions
|
return name in Underscore.span_extensions
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ cdef class Span:
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||||
removed extension.
|
removed extension.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#remove_extension
|
DOCS: https://nightly.spacy.io/api/span#remove_extension
|
||||||
"""
|
"""
|
||||||
if not cls.has_extension(name):
|
if not cls.has_extension(name):
|
||||||
raise ValueError(Errors.E046.format(name=name))
|
raise ValueError(Errors.E046.format(name=name))
|
||||||
|
@ -95,7 +95,7 @@ cdef class Span:
|
||||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation
|
vector (ndarray[ndim=1, dtype='float32']): A meaning representation
|
||||||
of the span.
|
of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#init
|
DOCS: https://nightly.spacy.io/api/span#init
|
||||||
"""
|
"""
|
||||||
if not (0 <= start <= end <= len(doc)):
|
if not (0 <= start <= end <= len(doc)):
|
||||||
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
|
raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc)))
|
||||||
|
@ -151,7 +151,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (int): The number of tokens in the span.
|
RETURNS (int): The number of tokens in the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#len
|
DOCS: https://nightly.spacy.io/api/span#len
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
if self.end < self.start:
|
if self.end < self.start:
|
||||||
|
@ -168,7 +168,7 @@ cdef class Span:
|
||||||
the span to get.
|
the span to get.
|
||||||
RETURNS (Token or Span): The token at `span[i]`.
|
RETURNS (Token or Span): The token at `span[i]`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#getitem
|
DOCS: https://nightly.spacy.io/api/span#getitem
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
if isinstance(i, slice):
|
if isinstance(i, slice):
|
||||||
|
@ -189,7 +189,7 @@ cdef class Span:
|
||||||
|
|
||||||
YIELDS (Token): A `Token` object.
|
YIELDS (Token): A `Token` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#iter
|
DOCS: https://nightly.spacy.io/api/span#iter
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
for i in range(self.start, self.end):
|
for i in range(self.start, self.end):
|
||||||
|
@ -210,7 +210,7 @@ cdef class Span:
|
||||||
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
||||||
RETURNS (Doc): The `Doc` copy of the span.
|
RETURNS (Doc): The `Doc` copy of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#as_doc
|
DOCS: https://nightly.spacy.io/api/span#as_doc
|
||||||
"""
|
"""
|
||||||
# TODO: make copy_user_data a keyword-only argument (Python 3 only)
|
# TODO: make copy_user_data a keyword-only argument (Python 3 only)
|
||||||
words = [t.text for t in self]
|
words = [t.text for t in self]
|
||||||
|
@ -292,7 +292,7 @@ cdef class Span:
|
||||||
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape
|
||||||
(n, n), where n = len(self).
|
(n, n), where n = len(self).
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#get_lca_matrix
|
DOCS: https://nightly.spacy.io/api/span#get_lca_matrix
|
||||||
"""
|
"""
|
||||||
return numpy.asarray(_get_lca_matrix(self.doc, self.start, self.end))
|
return numpy.asarray(_get_lca_matrix(self.doc, self.start, self.end))
|
||||||
|
|
||||||
|
@ -304,7 +304,7 @@ cdef class Span:
|
||||||
`Span`, `Token` and `Lexeme` objects.
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#similarity
|
DOCS: https://nightly.spacy.io/api/span#similarity
|
||||||
"""
|
"""
|
||||||
if "similarity" in self.doc.user_span_hooks:
|
if "similarity" in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks["similarity"](self, other)
|
return self.doc.user_span_hooks["similarity"](self, other)
|
||||||
|
@ -400,7 +400,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (tuple): Entities in the span, one `Span` per entity.
|
RETURNS (tuple): Entities in the span, one `Span` per entity.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#ents
|
DOCS: https://nightly.spacy.io/api/span#ents
|
||||||
"""
|
"""
|
||||||
ents = []
|
ents = []
|
||||||
for ent in self.doc.ents:
|
for ent in self.doc.ents:
|
||||||
|
@ -415,7 +415,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (bool): Whether a word vector is associated with the object.
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#has_vector
|
DOCS: https://nightly.spacy.io/api/span#has_vector
|
||||||
"""
|
"""
|
||||||
if "has_vector" in self.doc.user_span_hooks:
|
if "has_vector" in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks["has_vector"](self)
|
return self.doc.user_span_hooks["has_vector"](self)
|
||||||
|
@ -434,7 +434,7 @@ cdef class Span:
|
||||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
representing the span's semantics.
|
representing the span's semantics.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#vector
|
DOCS: https://nightly.spacy.io/api/span#vector
|
||||||
"""
|
"""
|
||||||
if "vector" in self.doc.user_span_hooks:
|
if "vector" in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks["vector"](self)
|
return self.doc.user_span_hooks["vector"](self)
|
||||||
|
@ -448,7 +448,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (float): The L2 norm of the vector representation.
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#vector_norm
|
DOCS: https://nightly.spacy.io/api/span#vector_norm
|
||||||
"""
|
"""
|
||||||
if "vector_norm" in self.doc.user_span_hooks:
|
if "vector_norm" in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks["vector"](self)
|
return self.doc.user_span_hooks["vector"](self)
|
||||||
|
@ -508,7 +508,7 @@ cdef class Span:
|
||||||
|
|
||||||
YIELDS (Span): Base noun-phrase `Span` objects.
|
YIELDS (Span): Base noun-phrase `Span` objects.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#noun_chunks
|
DOCS: https://nightly.spacy.io/api/span#noun_chunks
|
||||||
"""
|
"""
|
||||||
if not self.doc.is_parsed:
|
if not self.doc.is_parsed:
|
||||||
raise ValueError(Errors.E029)
|
raise ValueError(Errors.E029)
|
||||||
|
@ -533,7 +533,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (Token): The root token.
|
RETURNS (Token): The root token.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#root
|
DOCS: https://nightly.spacy.io/api/span#root
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
if "root" in self.doc.user_span_hooks:
|
if "root" in self.doc.user_span_hooks:
|
||||||
|
@ -590,7 +590,7 @@ cdef class Span:
|
||||||
|
|
||||||
RETURNS (tuple): A tuple of Token objects.
|
RETURNS (tuple): A tuple of Token objects.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#lefts
|
DOCS: https://nightly.spacy.io/api/span#lefts
|
||||||
"""
|
"""
|
||||||
return self.root.conjuncts
|
return self.root.conjuncts
|
||||||
|
|
||||||
|
@ -601,7 +601,7 @@ cdef class Span:
|
||||||
|
|
||||||
YIELDS (Token):A left-child of a token of the span.
|
YIELDS (Token):A left-child of a token of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#lefts
|
DOCS: https://nightly.spacy.io/api/span#lefts
|
||||||
"""
|
"""
|
||||||
for token in reversed(self): # Reverse, so we get tokens in order
|
for token in reversed(self): # Reverse, so we get tokens in order
|
||||||
for left in token.lefts:
|
for left in token.lefts:
|
||||||
|
@ -615,7 +615,7 @@ cdef class Span:
|
||||||
|
|
||||||
YIELDS (Token): A right-child of a token of the span.
|
YIELDS (Token): A right-child of a token of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#rights
|
DOCS: https://nightly.spacy.io/api/span#rights
|
||||||
"""
|
"""
|
||||||
for token in self:
|
for token in self:
|
||||||
for right in token.rights:
|
for right in token.rights:
|
||||||
|
@ -630,7 +630,7 @@ cdef class Span:
|
||||||
RETURNS (int): The number of leftward immediate children of the
|
RETURNS (int): The number of leftward immediate children of the
|
||||||
span, in the syntactic dependency parse.
|
span, in the syntactic dependency parse.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#n_lefts
|
DOCS: https://nightly.spacy.io/api/span#n_lefts
|
||||||
"""
|
"""
|
||||||
return len(list(self.lefts))
|
return len(list(self.lefts))
|
||||||
|
|
||||||
|
@ -642,7 +642,7 @@ cdef class Span:
|
||||||
RETURNS (int): The number of rightward immediate children of the
|
RETURNS (int): The number of rightward immediate children of the
|
||||||
span, in the syntactic dependency parse.
|
span, in the syntactic dependency parse.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#n_rights
|
DOCS: https://nightly.spacy.io/api/span#n_rights
|
||||||
"""
|
"""
|
||||||
return len(list(self.rights))
|
return len(list(self.rights))
|
||||||
|
|
||||||
|
@ -652,7 +652,7 @@ cdef class Span:
|
||||||
|
|
||||||
YIELDS (Token): A token within the span, or a descendant from it.
|
YIELDS (Token): A token within the span, or a descendant from it.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#subtree
|
DOCS: https://nightly.spacy.io/api/span#subtree
|
||||||
"""
|
"""
|
||||||
for word in self.lefts:
|
for word in self.lefts:
|
||||||
yield from word.subtree
|
yield from word.subtree
|
||||||
|
|
|
@ -30,7 +30,7 @@ cdef class Token:
|
||||||
"""An individual token – i.e. a word, punctuation symbol, whitespace,
|
"""An individual token – i.e. a word, punctuation symbol, whitespace,
|
||||||
etc.
|
etc.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token
|
DOCS: https://nightly.spacy.io/api/token
|
||||||
"""
|
"""
|
||||||
@classmethod
|
@classmethod
|
||||||
def set_extension(cls, name, **kwargs):
|
def set_extension(cls, name, **kwargs):
|
||||||
|
@ -43,8 +43,8 @@ cdef class Token:
|
||||||
method (callable): Optional method for method extension.
|
method (callable): Optional method for method extension.
|
||||||
force (bool): Force overwriting existing attribute.
|
force (bool): Force overwriting existing attribute.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#set_extension
|
DOCS: https://nightly.spacy.io/api/token#set_extension
|
||||||
USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes
|
USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes
|
||||||
"""
|
"""
|
||||||
if cls.has_extension(name) and not kwargs.get("force", False):
|
if cls.has_extension(name) and not kwargs.get("force", False):
|
||||||
raise ValueError(Errors.E090.format(name=name, obj="Token"))
|
raise ValueError(Errors.E090.format(name=name, obj="Token"))
|
||||||
|
@ -57,7 +57,7 @@ cdef class Token:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#get_extension
|
DOCS: https://nightly.spacy.io/api/token#get_extension
|
||||||
"""
|
"""
|
||||||
return Underscore.token_extensions.get(name)
|
return Underscore.token_extensions.get(name)
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ cdef class Token:
|
||||||
name (str): Name of the extension.
|
name (str): Name of the extension.
|
||||||
RETURNS (bool): Whether the extension has been registered.
|
RETURNS (bool): Whether the extension has been registered.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#has_extension
|
DOCS: https://nightly.spacy.io/api/token#has_extension
|
||||||
"""
|
"""
|
||||||
return name in Underscore.token_extensions
|
return name in Underscore.token_extensions
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ cdef class Token:
|
||||||
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
|
||||||
removed extension.
|
removed extension.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#remove_extension
|
DOCS: https://nightly.spacy.io/api/token#remove_extension
|
||||||
"""
|
"""
|
||||||
if not cls.has_extension(name):
|
if not cls.has_extension(name):
|
||||||
raise ValueError(Errors.E046.format(name=name))
|
raise ValueError(Errors.E046.format(name=name))
|
||||||
|
@ -93,7 +93,7 @@ cdef class Token:
|
||||||
doc (Doc): The parent document.
|
doc (Doc): The parent document.
|
||||||
offset (int): The index of the token within the document.
|
offset (int): The index of the token within the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#init
|
DOCS: https://nightly.spacy.io/api/token#init
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.doc = doc
|
self.doc = doc
|
||||||
|
@ -108,7 +108,7 @@ cdef class Token:
|
||||||
|
|
||||||
RETURNS (int): The number of unicode characters in the token.
|
RETURNS (int): The number of unicode characters in the token.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#len
|
DOCS: https://nightly.spacy.io/api/token#len
|
||||||
"""
|
"""
|
||||||
return self.c.lex.length
|
return self.c.lex.length
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ cdef class Token:
|
||||||
flag_id (int): The ID of the flag attribute.
|
flag_id (int): The ID of the flag attribute.
|
||||||
RETURNS (bool): Whether the flag is set.
|
RETURNS (bool): Whether the flag is set.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#check_flag
|
DOCS: https://nightly.spacy.io/api/token#check_flag
|
||||||
"""
|
"""
|
||||||
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ cdef class Token:
|
||||||
i (int): The relative position of the token to get. Defaults to 1.
|
i (int): The relative position of the token to get. Defaults to 1.
|
||||||
RETURNS (Token): The token at position `self.doc[self.i+i]`.
|
RETURNS (Token): The token at position `self.doc[self.i+i]`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#nbor
|
DOCS: https://nightly.spacy.io/api/token#nbor
|
||||||
"""
|
"""
|
||||||
if self.i+i < 0 or (self.i+i >= len(self.doc)):
|
if self.i+i < 0 or (self.i+i >= len(self.doc)):
|
||||||
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
|
raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc)))
|
||||||
|
@ -195,7 +195,7 @@ cdef class Token:
|
||||||
`Span`, `Token` and `Lexeme` objects.
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#similarity
|
DOCS: https://nightly.spacy.io/api/token#similarity
|
||||||
"""
|
"""
|
||||||
if "similarity" in self.doc.user_token_hooks:
|
if "similarity" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["similarity"](self, other)
|
return self.doc.user_token_hooks["similarity"](self, other)
|
||||||
|
@ -373,7 +373,7 @@ cdef class Token:
|
||||||
|
|
||||||
RETURNS (bool): Whether a word vector is associated with the object.
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#has_vector
|
DOCS: https://nightly.spacy.io/api/token#has_vector
|
||||||
"""
|
"""
|
||||||
if "has_vector" in self.doc.user_token_hooks:
|
if "has_vector" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["has_vector"](self)
|
return self.doc.user_token_hooks["has_vector"](self)
|
||||||
|
@ -388,7 +388,7 @@ cdef class Token:
|
||||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
representing the token's semantics.
|
representing the token's semantics.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#vector
|
DOCS: https://nightly.spacy.io/api/token#vector
|
||||||
"""
|
"""
|
||||||
if "vector" in self.doc.user_token_hooks:
|
if "vector" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["vector"](self)
|
return self.doc.user_token_hooks["vector"](self)
|
||||||
|
@ -403,7 +403,7 @@ cdef class Token:
|
||||||
|
|
||||||
RETURNS (float): The L2 norm of the vector representation.
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#vector_norm
|
DOCS: https://nightly.spacy.io/api/token#vector_norm
|
||||||
"""
|
"""
|
||||||
if "vector_norm" in self.doc.user_token_hooks:
|
if "vector_norm" in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks["vector_norm"](self)
|
return self.doc.user_token_hooks["vector_norm"](self)
|
||||||
|
@ -426,7 +426,7 @@ cdef class Token:
|
||||||
RETURNS (int): The number of leftward immediate children of the
|
RETURNS (int): The number of leftward immediate children of the
|
||||||
word, in the syntactic dependency parse.
|
word, in the syntactic dependency parse.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#n_lefts
|
DOCS: https://nightly.spacy.io/api/token#n_lefts
|
||||||
"""
|
"""
|
||||||
return self.c.l_kids
|
return self.c.l_kids
|
||||||
|
|
||||||
|
@ -438,7 +438,7 @@ cdef class Token:
|
||||||
RETURNS (int): The number of rightward immediate children of the
|
RETURNS (int): The number of rightward immediate children of the
|
||||||
word, in the syntactic dependency parse.
|
word, in the syntactic dependency parse.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#n_rights
|
DOCS: https://nightly.spacy.io/api/token#n_rights
|
||||||
"""
|
"""
|
||||||
return self.c.r_kids
|
return self.c.r_kids
|
||||||
|
|
||||||
|
@ -470,7 +470,7 @@ cdef class Token:
|
||||||
RETURNS (bool / None): Whether the token starts a sentence.
|
RETURNS (bool / None): Whether the token starts a sentence.
|
||||||
None if unknown.
|
None if unknown.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#is_sent_start
|
DOCS: https://nightly.spacy.io/api/token#is_sent_start
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if self.c.sent_start == 0:
|
if self.c.sent_start == 0:
|
||||||
|
@ -499,7 +499,7 @@ cdef class Token:
|
||||||
RETURNS (bool / None): Whether the token ends a sentence.
|
RETURNS (bool / None): Whether the token ends a sentence.
|
||||||
None if unknown.
|
None if unknown.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#is_sent_end
|
DOCS: https://nightly.spacy.io/api/token#is_sent_end
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if self.i + 1 == len(self.doc):
|
if self.i + 1 == len(self.doc):
|
||||||
|
@ -521,7 +521,7 @@ cdef class Token:
|
||||||
|
|
||||||
YIELDS (Token): A left-child of the token.
|
YIELDS (Token): A left-child of the token.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#lefts
|
DOCS: https://nightly.spacy.io/api/token#lefts
|
||||||
"""
|
"""
|
||||||
cdef int nr_iter = 0
|
cdef int nr_iter = 0
|
||||||
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
|
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
|
||||||
|
@ -541,7 +541,7 @@ cdef class Token:
|
||||||
|
|
||||||
YIELDS (Token): A right-child of the token.
|
YIELDS (Token): A right-child of the token.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#rights
|
DOCS: https://nightly.spacy.io/api/token#rights
|
||||||
"""
|
"""
|
||||||
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
|
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
|
||||||
tokens = []
|
tokens = []
|
||||||
|
@ -563,7 +563,7 @@ cdef class Token:
|
||||||
|
|
||||||
YIELDS (Token): A child token such that `child.head==self`.
|
YIELDS (Token): A child token such that `child.head==self`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#children
|
DOCS: https://nightly.spacy.io/api/token#children
|
||||||
"""
|
"""
|
||||||
yield from self.lefts
|
yield from self.lefts
|
||||||
yield from self.rights
|
yield from self.rights
|
||||||
|
@ -576,7 +576,7 @@ cdef class Token:
|
||||||
YIELDS (Token): A descendent token such that
|
YIELDS (Token): A descendent token such that
|
||||||
`self.is_ancestor(descendent) or token == self`.
|
`self.is_ancestor(descendent) or token == self`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#subtree
|
DOCS: https://nightly.spacy.io/api/token#subtree
|
||||||
"""
|
"""
|
||||||
for word in self.lefts:
|
for word in self.lefts:
|
||||||
yield from word.subtree
|
yield from word.subtree
|
||||||
|
@ -607,7 +607,7 @@ cdef class Token:
|
||||||
YIELDS (Token): A sequence of ancestor tokens such that
|
YIELDS (Token): A sequence of ancestor tokens such that
|
||||||
`ancestor.is_ancestor(self)`.
|
`ancestor.is_ancestor(self)`.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#ancestors
|
DOCS: https://nightly.spacy.io/api/token#ancestors
|
||||||
"""
|
"""
|
||||||
cdef const TokenC* head_ptr = self.c
|
cdef const TokenC* head_ptr = self.c
|
||||||
# Guard against infinite loop, no token can have
|
# Guard against infinite loop, no token can have
|
||||||
|
@ -625,7 +625,7 @@ cdef class Token:
|
||||||
descendant (Token): Another token.
|
descendant (Token): Another token.
|
||||||
RETURNS (bool): Whether this token is the ancestor of the descendant.
|
RETURNS (bool): Whether this token is the ancestor of the descendant.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#is_ancestor
|
DOCS: https://nightly.spacy.io/api/token#is_ancestor
|
||||||
"""
|
"""
|
||||||
if self.doc is not descendant.doc:
|
if self.doc is not descendant.doc:
|
||||||
return False
|
return False
|
||||||
|
@ -729,7 +729,7 @@ cdef class Token:
|
||||||
|
|
||||||
RETURNS (tuple): The coordinated tokens.
|
RETURNS (tuple): The coordinated tokens.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#conjuncts
|
DOCS: https://nightly.spacy.io/api/token#conjuncts
|
||||||
"""
|
"""
|
||||||
cdef Token word, child
|
cdef Token word, child
|
||||||
if "conjuncts" in self.doc.user_token_hooks:
|
if "conjuncts" in self.doc.user_token_hooks:
|
||||||
|
|
|
@ -44,7 +44,7 @@ cdef class Vectors:
|
||||||
the table need to be assigned - so len(list(vectors.keys())) may be
|
the table need to be assigned - so len(list(vectors.keys())) may be
|
||||||
greater or smaller than vectors.shape[0].
|
greater or smaller than vectors.shape[0].
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors
|
DOCS: https://nightly.spacy.io/api/vectors
|
||||||
"""
|
"""
|
||||||
cdef public object name
|
cdef public object name
|
||||||
cdef public object data
|
cdef public object data
|
||||||
|
@ -59,7 +59,7 @@ cdef class Vectors:
|
||||||
keys (iterable): A sequence of keys, aligned with the data.
|
keys (iterable): A sequence of keys, aligned with the data.
|
||||||
name (str): A name to identify the vectors table.
|
name (str): A name to identify the vectors table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#init
|
DOCS: https://nightly.spacy.io/api/vectors#init
|
||||||
"""
|
"""
|
||||||
self.name = name
|
self.name = name
|
||||||
if data is None:
|
if data is None:
|
||||||
|
@ -83,7 +83,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
RETURNS (tuple): A `(rows, dims)` pair.
|
RETURNS (tuple): A `(rows, dims)` pair.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#shape
|
DOCS: https://nightly.spacy.io/api/vectors#shape
|
||||||
"""
|
"""
|
||||||
return self.data.shape
|
return self.data.shape
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
RETURNS (int): The vector size.
|
RETURNS (int): The vector size.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#size
|
DOCS: https://nightly.spacy.io/api/vectors#size
|
||||||
"""
|
"""
|
||||||
return self.data.shape[0] * self.data.shape[1]
|
return self.data.shape[0] * self.data.shape[1]
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
RETURNS (bool): `True` if no slots are available for new keys.
|
RETURNS (bool): `True` if no slots are available for new keys.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#is_full
|
DOCS: https://nightly.spacy.io/api/vectors#is_full
|
||||||
"""
|
"""
|
||||||
return self._unset.size() == 0
|
return self._unset.size() == 0
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
RETURNS (int): The number of keys in the table.
|
RETURNS (int): The number of keys in the table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#n_keys
|
DOCS: https://nightly.spacy.io/api/vectors#n_keys
|
||||||
"""
|
"""
|
||||||
return len(self.key2row)
|
return len(self.key2row)
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ cdef class Vectors:
|
||||||
key (int): The key to get the vector for.
|
key (int): The key to get the vector for.
|
||||||
RETURNS (ndarray): The vector for the key.
|
RETURNS (ndarray): The vector for the key.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#getitem
|
DOCS: https://nightly.spacy.io/api/vectors#getitem
|
||||||
"""
|
"""
|
||||||
i = self.key2row[key]
|
i = self.key2row[key]
|
||||||
if i is None:
|
if i is None:
|
||||||
|
@ -141,7 +141,7 @@ cdef class Vectors:
|
||||||
key (int): The key to set the vector for.
|
key (int): The key to set the vector for.
|
||||||
vector (ndarray): The vector to set.
|
vector (ndarray): The vector to set.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#setitem
|
DOCS: https://nightly.spacy.io/api/vectors#setitem
|
||||||
"""
|
"""
|
||||||
i = self.key2row[key]
|
i = self.key2row[key]
|
||||||
self.data[i] = vector
|
self.data[i] = vector
|
||||||
|
@ -153,7 +153,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
YIELDS (int): A key in the table.
|
YIELDS (int): A key in the table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#iter
|
DOCS: https://nightly.spacy.io/api/vectors#iter
|
||||||
"""
|
"""
|
||||||
yield from self.key2row
|
yield from self.key2row
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
RETURNS (int): The number of vectors in the data.
|
RETURNS (int): The number of vectors in the data.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#len
|
DOCS: https://nightly.spacy.io/api/vectors#len
|
||||||
"""
|
"""
|
||||||
return self.data.shape[0]
|
return self.data.shape[0]
|
||||||
|
|
||||||
|
@ -172,7 +172,7 @@ cdef class Vectors:
|
||||||
key (int): The key to check.
|
key (int): The key to check.
|
||||||
RETURNS (bool): Whether the key has a vector entry.
|
RETURNS (bool): Whether the key has a vector entry.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#contains
|
DOCS: https://nightly.spacy.io/api/vectors#contains
|
||||||
"""
|
"""
|
||||||
return key in self.key2row
|
return key in self.key2row
|
||||||
|
|
||||||
|
@ -189,7 +189,7 @@ cdef class Vectors:
|
||||||
inplace (bool): Reallocate the memory.
|
inplace (bool): Reallocate the memory.
|
||||||
RETURNS (list): The removed items as a list of `(key, row)` tuples.
|
RETURNS (list): The removed items as a list of `(key, row)` tuples.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#resize
|
DOCS: https://nightly.spacy.io/api/vectors#resize
|
||||||
"""
|
"""
|
||||||
xp = get_array_module(self.data)
|
xp = get_array_module(self.data)
|
||||||
if inplace:
|
if inplace:
|
||||||
|
@ -224,7 +224,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
YIELDS (ndarray): A vector in the table.
|
YIELDS (ndarray): A vector in the table.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#values
|
DOCS: https://nightly.spacy.io/api/vectors#values
|
||||||
"""
|
"""
|
||||||
for row, vector in enumerate(range(self.data.shape[0])):
|
for row, vector in enumerate(range(self.data.shape[0])):
|
||||||
if not self._unset.count(row):
|
if not self._unset.count(row):
|
||||||
|
@ -235,7 +235,7 @@ cdef class Vectors:
|
||||||
|
|
||||||
YIELDS (tuple): A key/vector pair.
|
YIELDS (tuple): A key/vector pair.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#items
|
DOCS: https://nightly.spacy.io/api/vectors#items
|
||||||
"""
|
"""
|
||||||
for key, row in self.key2row.items():
|
for key, row in self.key2row.items():
|
||||||
yield key, self.data[row]
|
yield key, self.data[row]
|
||||||
|
@ -281,7 +281,7 @@ cdef class Vectors:
|
||||||
row (int / None): The row number of a vector to map the key to.
|
row (int / None): The row number of a vector to map the key to.
|
||||||
RETURNS (int): The row the vector was added to.
|
RETURNS (int): The row the vector was added to.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#add
|
DOCS: https://nightly.spacy.io/api/vectors#add
|
||||||
"""
|
"""
|
||||||
# use int for all keys and rows in key2row for more efficient access
|
# use int for all keys and rows in key2row for more efficient access
|
||||||
# and serialization
|
# and serialization
|
||||||
|
@ -368,7 +368,7 @@ cdef class Vectors:
|
||||||
path (str / Path): A path to a directory, which will be created if
|
path (str / Path): A path to a directory, which will be created if
|
||||||
it doesn't exists.
|
it doesn't exists.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#to_disk
|
DOCS: https://nightly.spacy.io/api/vectors#to_disk
|
||||||
"""
|
"""
|
||||||
xp = get_array_module(self.data)
|
xp = get_array_module(self.data)
|
||||||
if xp is numpy:
|
if xp is numpy:
|
||||||
|
@ -396,7 +396,7 @@ cdef class Vectors:
|
||||||
path (str / Path): Directory path, string or Path-like object.
|
path (str / Path): Directory path, string or Path-like object.
|
||||||
RETURNS (Vectors): The modified object.
|
RETURNS (Vectors): The modified object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#from_disk
|
DOCS: https://nightly.spacy.io/api/vectors#from_disk
|
||||||
"""
|
"""
|
||||||
def load_key2row(path):
|
def load_key2row(path):
|
||||||
if path.exists():
|
if path.exists():
|
||||||
|
@ -432,7 +432,7 @@ cdef class Vectors:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized form of the `Vectors` object.
|
RETURNS (bytes): The serialized form of the `Vectors` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#to_bytes
|
DOCS: https://nightly.spacy.io/api/vectors#to_bytes
|
||||||
"""
|
"""
|
||||||
def serialize_weights():
|
def serialize_weights():
|
||||||
if hasattr(self.data, "to_bytes"):
|
if hasattr(self.data, "to_bytes"):
|
||||||
|
@ -453,7 +453,7 @@ cdef class Vectors:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vectors): The `Vectors` object.
|
RETURNS (Vectors): The `Vectors` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#from_bytes
|
DOCS: https://nightly.spacy.io/api/vectors#from_bytes
|
||||||
"""
|
"""
|
||||||
def deserialize_weights(b):
|
def deserialize_weights(b):
|
||||||
if hasattr(self.data, "from_bytes"):
|
if hasattr(self.data, "from_bytes"):
|
||||||
|
|
|
@ -54,7 +54,7 @@ cdef class Vocab:
|
||||||
instance also provides access to the `StringStore`, and owns underlying
|
instance also provides access to the `StringStore`, and owns underlying
|
||||||
C-data that is shared between `Doc` objects.
|
C-data that is shared between `Doc` objects.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab
|
DOCS: https://nightly.spacy.io/api/vocab
|
||||||
"""
|
"""
|
||||||
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
|
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
|
||||||
oov_prob=-20., vectors_name=None, writing_system={},
|
oov_prob=-20., vectors_name=None, writing_system={},
|
||||||
|
@ -117,7 +117,7 @@ cdef class Vocab:
|
||||||
available bit will be chosen.
|
available bit will be chosen.
|
||||||
RETURNS (int): The integer ID by which the flag value can be checked.
|
RETURNS (int): The integer ID by which the flag value can be checked.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#add_flag
|
DOCS: https://nightly.spacy.io/api/vocab#add_flag
|
||||||
"""
|
"""
|
||||||
if flag_id == -1:
|
if flag_id == -1:
|
||||||
for bit in range(1, 64):
|
for bit in range(1, 64):
|
||||||
|
@ -201,7 +201,7 @@ cdef class Vocab:
|
||||||
string (unicode): The ID string.
|
string (unicode): The ID string.
|
||||||
RETURNS (bool) Whether the string has an entry in the vocabulary.
|
RETURNS (bool) Whether the string has an entry in the vocabulary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#contains
|
DOCS: https://nightly.spacy.io/api/vocab#contains
|
||||||
"""
|
"""
|
||||||
cdef hash_t int_key
|
cdef hash_t int_key
|
||||||
if isinstance(key, bytes):
|
if isinstance(key, bytes):
|
||||||
|
@ -218,7 +218,7 @@ cdef class Vocab:
|
||||||
|
|
||||||
YIELDS (Lexeme): An entry in the vocabulary.
|
YIELDS (Lexeme): An entry in the vocabulary.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#iter
|
DOCS: https://nightly.spacy.io/api/vocab#iter
|
||||||
"""
|
"""
|
||||||
cdef attr_t key
|
cdef attr_t key
|
||||||
cdef size_t addr
|
cdef size_t addr
|
||||||
|
@ -241,7 +241,7 @@ cdef class Vocab:
|
||||||
>>> apple = nlp.vocab.strings["apple"]
|
>>> apple = nlp.vocab.strings["apple"]
|
||||||
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
|
>>> assert nlp.vocab[apple] == nlp.vocab[u"apple"]
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#getitem
|
DOCS: https://nightly.spacy.io/api/vocab#getitem
|
||||||
"""
|
"""
|
||||||
cdef attr_t orth
|
cdef attr_t orth
|
||||||
if isinstance(id_or_string, unicode):
|
if isinstance(id_or_string, unicode):
|
||||||
|
@ -309,7 +309,7 @@ cdef class Vocab:
|
||||||
word was mapped to, and `score` the similarity score between the
|
word was mapped to, and `score` the similarity score between the
|
||||||
two words.
|
two words.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#prune_vectors
|
DOCS: https://nightly.spacy.io/api/vocab#prune_vectors
|
||||||
"""
|
"""
|
||||||
xp = get_array_module(self.vectors.data)
|
xp = get_array_module(self.vectors.data)
|
||||||
# Make prob negative so it sorts by rank ascending
|
# Make prob negative so it sorts by rank ascending
|
||||||
|
@ -349,7 +349,7 @@ cdef class Vocab:
|
||||||
and shape determined by the `vocab.vectors` instance. Usually, a
|
and shape determined by the `vocab.vectors` instance. Usually, a
|
||||||
numpy ndarray of shape (300,) and dtype float32.
|
numpy ndarray of shape (300,) and dtype float32.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#get_vector
|
DOCS: https://nightly.spacy.io/api/vocab#get_vector
|
||||||
"""
|
"""
|
||||||
if isinstance(orth, str):
|
if isinstance(orth, str):
|
||||||
orth = self.strings.add(orth)
|
orth = self.strings.add(orth)
|
||||||
|
@ -396,7 +396,7 @@ cdef class Vocab:
|
||||||
orth (int / unicode): The word.
|
orth (int / unicode): The word.
|
||||||
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#set_vector
|
DOCS: https://nightly.spacy.io/api/vocab#set_vector
|
||||||
"""
|
"""
|
||||||
if isinstance(orth, str):
|
if isinstance(orth, str):
|
||||||
orth = self.strings.add(orth)
|
orth = self.strings.add(orth)
|
||||||
|
@ -418,7 +418,7 @@ cdef class Vocab:
|
||||||
orth (int / unicode): The word.
|
orth (int / unicode): The word.
|
||||||
RETURNS (bool): Whether the word has a vector.
|
RETURNS (bool): Whether the word has a vector.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#has_vector
|
DOCS: https://nightly.spacy.io/api/vocab#has_vector
|
||||||
"""
|
"""
|
||||||
if isinstance(orth, str):
|
if isinstance(orth, str):
|
||||||
orth = self.strings.add(orth)
|
orth = self.strings.add(orth)
|
||||||
|
@ -431,7 +431,7 @@ cdef class Vocab:
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_disk
|
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
|
@ -452,7 +452,7 @@ cdef class Vocab:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vocab): The modified `Vocab` object.
|
RETURNS (Vocab): The modified `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_disk
|
DOCS: https://nightly.spacy.io/api/vocab#to_disk
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
getters = ["strings", "vectors"]
|
getters = ["strings", "vectors"]
|
||||||
|
@ -477,7 +477,7 @@ cdef class Vocab:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (bytes): The serialized form of the `Vocab` object.
|
RETURNS (bytes): The serialized form of the `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#to_bytes
|
DOCS: https://nightly.spacy.io/api/vocab#to_bytes
|
||||||
"""
|
"""
|
||||||
def deserialize_vectors():
|
def deserialize_vectors():
|
||||||
if self.vectors is None:
|
if self.vectors is None:
|
||||||
|
@ -499,7 +499,7 @@ cdef class Vocab:
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vocab): The `Vocab` object.
|
RETURNS (Vocab): The `Vocab` object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#from_bytes
|
DOCS: https://nightly.spacy.io/api/vocab#from_bytes
|
||||||
"""
|
"""
|
||||||
def serialize_vectors(b):
|
def serialize_vectors(b):
|
||||||
if self.vectors is None:
|
if self.vectors is None:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user