From ab1bb421edcedfcbad884fa410f891883882d88a Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 4 Sep 2020 12:58:50 +0200 Subject: [PATCH] Update docs links in codebase --- spacy/cli/_util.py | 2 +- spacy/cli/convert.py | 4 +- spacy/cli/debug_config.py | 2 + spacy/cli/debug_data.py | 2 + spacy/cli/debug_model.py | 2 + spacy/cli/download.py | 4 +- spacy/cli/evaluate.py | 15 +++--- spacy/cli/info.py | 2 + spacy/cli/init_config.py | 4 ++ spacy/cli/init_model.py | 4 +- spacy/cli/package.py | 2 + spacy/cli/pretrain.py | 2 + spacy/cli/profile.py | 2 + spacy/cli/project/assets.py | 2 + spacy/cli/project/clone.py | 2 + spacy/cli/project/document.py | 2 + spacy/cli/project/dvc.py | 5 +- spacy/cli/project/pull.py | 4 +- spacy/cli/project/push.py | 9 ++-- spacy/cli/project/run.py | 2 + spacy/cli/train.py | 2 + spacy/cli/validate.py | 2 + spacy/displacy/__init__.py | 12 ++--- spacy/errors.py | 12 ++--- spacy/gold/converters/conll_ner2docs.py | 2 +- spacy/gold/converters/iob2docs.py | 2 +- spacy/gold/corpus.py | 4 +- spacy/kb.pyx | 4 +- spacy/language.py | 54 ++++++++++----------- spacy/lexeme.pyx | 2 +- spacy/lookups.py | 28 +++++------ spacy/matcher/matcher.pyx | 4 +- spacy/matcher/phrasematcher.pyx | 16 +++---- spacy/pipeline/attributeruler.py | 24 +++++----- spacy/pipeline/dep_parser.pyx | 4 +- spacy/pipeline/entity_linker.py | 20 ++++---- spacy/pipeline/entityruler.py | 24 +++++----- spacy/pipeline/functions.py | 6 +-- spacy/pipeline/lemmatizer.py | 28 +++++------ spacy/pipeline/morphologizer.pyx | 20 ++++---- spacy/pipeline/ner.pyx | 4 +- spacy/pipeline/pipe.pyx | 36 +++++++------- spacy/pipeline/sentencizer.pyx | 18 +++---- spacy/pipeline/senter.pyx | 20 ++++---- spacy/pipeline/simple_ner.py | 2 +- spacy/pipeline/tagger.pyx | 34 +++++++------- spacy/pipeline/textcat.py | 24 +++++----- spacy/pipeline/tok2vec.py | 14 +++--- spacy/scorer.py | 14 +++--- spacy/strings.pyx | 2 +- spacy/tokenizer.pyx | 26 +++++------ spacy/tokens/_retokenize.pyx | 8 ++-- spacy/tokens/_serialize.py | 16 +++---- spacy/tokens/doc.pyx | 62 ++++++++++++------------- spacy/tokens/span.pyx | 50 ++++++++++---------- spacy/tokens/token.pyx | 50 ++++++++++---------- spacy/vectors.pyx | 38 +++++++-------- spacy/vocab.pyx | 26 +++++------ 58 files changed, 416 insertions(+), 371 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 6a24a4ba4..0ecb5ad8f 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -25,7 +25,7 @@ COMMAND = "python -m spacy" NAME = "spacy" HELP = """spaCy Command-line Interface -DOCS: https://spacy.io/api/cli +DOCS: https://nightly.spacy.io/api/cli """ PROJECT_HELP = f"""Command-line interface for spaCy projects and templates. You'd typically start by cloning a project template to a local directory and diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index 84040a712..ade5a3ad4 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -61,6 +61,8 @@ def convert_cli( If no output_dir is specified and the output format is JSON, the data is written to stdout, so you can pipe them forward to a JSON file: $ spacy convert some_file.conllu --file-type json > some_file.json + + DOCS: https://nightly.spacy.io/api/cli#convert """ if isinstance(file_type, FileTypes): # We get an instance of the FileTypes from the CLI so we need its string value @@ -261,6 +263,6 @@ def _get_converter(msg, converter, input_path): msg.warn( "Can't automatically detect NER format. " "Conversion may not succeed. " - "See https://spacy.io/api/cli#convert" + "See https://nightly.spacy.io/api/cli#convert" ) return converter diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py index 2944cd364..7930d0674 100644 --- a/spacy/cli/debug_config.py +++ b/spacy/cli/debug_config.py @@ -31,6 +31,8 @@ def debug_config_cli( Similar as with the 'train' command, you can override settings from the config as command line options. For instance, --training.batch_size 128 overrides the value of "batch_size" in the block "[training]". + + DOCS: https://nightly.spacy.io/api/cli#debug-config """ overrides = parse_config_overrides(ctx.args) import_code(code_path) diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index a4269796f..75a81e6f5 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -47,6 +47,8 @@ def debug_data_cli( Analyze, debug and validate your training and development data. Outputs useful stats, and can help you find problems like invalid entity annotations, cyclic dependencies, low data labels and more. + + DOCS: https://nightly.spacy.io/api/cli#debug-data """ if ctx.command.name == "debug-data": msg.warn( diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py index ed8d54655..5bd4e008f 100644 --- a/spacy/cli/debug_model.py +++ b/spacy/cli/debug_model.py @@ -30,6 +30,8 @@ def debug_model_cli( """ Analyze a Thinc model implementation. Includes checks for internal structure and activations during training. + + DOCS: https://nightly.spacy.io/api/cli#debug-model """ if use_gpu >= 0: msg.info("Using GPU") diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 3d5e0a765..036aeab17 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -28,7 +28,7 @@ def download_cli( additional arguments provided to this command will be passed to `pip install` on package installation. - DOCS: https://spacy.io/api/cli#download + DOCS: https://nightly.spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models """ download(model, direct, *ctx.args) @@ -77,7 +77,7 @@ def get_compatibility() -> dict: f"Couldn't fetch compatibility table. Please find a package for your spaCy " f"installation (v{about.__version__}), and download it manually. " f"For more details, see the documentation: " - f"https://spacy.io/usage/models", + f"https://nightly.spacy.io/usage/models", exits=1, ) comp_table = r.json() diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py index 3898c89a1..c5cbab09a 100644 --- a/spacy/cli/evaluate.py +++ b/spacy/cli/evaluate.py @@ -27,12 +27,15 @@ def evaluate_cli( ): """ Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation - data in the binary .spacy format. The --gold-preproc option sets up the evaluation - examples with gold-standard sentences and tokens for the predictions. Gold - preprocessing helps the annotations align to the tokenization, and may - result in sequences of more consistent length. However, it may reduce - runtime accuracy due to train/test skew. To render a sample of dependency - parses in a HTML file, set as output directory as the displacy_path argument. + data in the binary .spacy format. The --gold-preproc option sets up the + evaluation examples with gold-standard sentences and tokens for the + predictions. Gold preprocessing helps the annotations align to the + tokenization, and may result in sequences of more consistent length. However, + it may reduce runtime accuracy due to train/test skew. To render a sample of + dependency parses in a HTML file, set as output directory as the + displacy_path argument. + + DOCS: https://nightly.spacy.io/api/cli#evaluate """ evaluate( model, diff --git a/spacy/cli/info.py b/spacy/cli/info.py index 98cd042a8..2b87163c2 100644 --- a/spacy/cli/info.py +++ b/spacy/cli/info.py @@ -21,6 +21,8 @@ def info_cli( Print info about spaCy installation. If a pipeline is speficied as an argument, print its meta information. Flag --markdown prints details in Markdown for easy copy-pasting to GitHub issues. + + DOCS: https://nightly.spacy.io/api/cli#info """ info(model, markdown=markdown, silent=silent) diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index b75718a2e..584ca7f64 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -37,6 +37,8 @@ def init_config_cli( specified via the CLI arguments, this command generates a config with the optimal settings for you use case. This includes the choice of architecture, pretrained weights and related hyperparameters. + + DOCS: https://nightly.spacy.io/api/cli#init-config """ if isinstance(optimize, Optimizations): # instance of enum from the CLI optimize = optimize.value @@ -59,6 +61,8 @@ def init_fill_config_cli( functions for their default values and update the base config. This command can be used with a config generated via the training quickstart widget: https://nightly.spacy.io/usage/training#quickstart + + DOCS: https://nightly.spacy.io/api/cli#init-fill-config """ fill_config(output_file, base_path, pretraining=pretraining, diff=diff) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 4fd3025fd..5f06fd895 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -28,7 +28,7 @@ except ImportError: DEFAULT_OOV_PROB = -20 -@init_cli.command("vectors") +@init_cli.command("vocab") @app.command( "init-model", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, @@ -54,6 +54,8 @@ def init_model_cli( Create a new blank pipeline directory with vocab and vectors from raw data. If vectors are provided in Word2Vec format, they can be either a .txt or zipped as a .zip or .tar.gz. + + DOCS: https://nightly.spacy.io/api/cli#init-vocab """ if ctx.command.name == "init-model": msg.warn( diff --git a/spacy/cli/package.py b/spacy/cli/package.py index f464c97e8..c457b3e17 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -31,6 +31,8 @@ def package_cli( the existing values will be used as the defaults in the command-line prompt. After packaging, "python setup.py sdist" is run in the package directory, which will create a .tar.gz archive that can be installed via "pip install". + + DOCS: https://nightly.spacy.io/api/cli#package """ package( input_dir, diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index fe6bfa92e..828e5f08e 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -57,6 +57,8 @@ def pretrain_cli( To load the weights back in during 'spacy train', you need to ensure all settings are the same between pretraining and training. Ideally, this is done by using the same config file for both commands. + + DOCS: https://nightly.spacy.io/api/cli#pretrain """ overrides = parse_config_overrides(ctx.args) import_code(code_path) diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py index 1b995f4bc..43226730d 100644 --- a/spacy/cli/profile.py +++ b/spacy/cli/profile.py @@ -29,6 +29,8 @@ def profile_cli( Input should be formatted as one JSON object per line with a key "text". It can either be provided as a JSONL file, or be read from sys.sytdin. If no input file is specified, the IMDB dataset is loaded via Thinc. + + DOCS: https://nightly.spacy.io/api/cli#debug-profile """ if ctx.parent.command.name == NAME: # called as top-level command msg.warn( diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index e33a82acc..2b623675d 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -20,6 +20,8 @@ def project_assets_cli( defined in the "assets" section of the project.yml. If a checksum is provided in the project.yml, the file is only downloaded if no local file with the same checksum exists. + + DOCS: https://nightly.spacy.io/api/cli#project-assets """ project_assets(project_dir) diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py index 751c389bc..a419feb0f 100644 --- a/spacy/cli/project/clone.py +++ b/spacy/cli/project/clone.py @@ -22,6 +22,8 @@ def project_clone_cli( only download the files from the given subdirectory. The GitHub repo defaults to the official spaCy template repo, but can be customized (including using a private repo). + + DOCS: https://nightly.spacy.io/api/cli#project-clone """ if dest is None: dest = Path.cwd() / name diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py index ab345ecd8..d0265029a 100644 --- a/spacy/cli/project/document.py +++ b/spacy/cli/project/document.py @@ -43,6 +43,8 @@ def project_document_cli( hidden markers are added so you can add custom content before or after the auto-generated section and only the auto-generated docs will be replaced when you re-run the command. + + DOCS: https://nightly.spacy.io/api/cli#project-document """ project_document(project_dir, output_file, no_emoji=no_emoji) diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py index de0480bad..541253234 100644 --- a/spacy/cli/project/dvc.py +++ b/spacy/cli/project/dvc.py @@ -31,7 +31,10 @@ def project_update_dvc_cli( """Auto-generate Data Version Control (DVC) config. A DVC project can only define one pipeline, so you need to specify one workflow defined in the project.yml. If no workflow is specified, the first defined - workflow is used. The DVC config will only be updated if the project.yml changed. + workflow is used. The DVC config will only be updated if the project.yml + changed. + + DOCS: https://nightly.spacy.io/api/cli#project-dvc """ project_update_dvc(project_dir, workflow, verbose=verbose, force=force) diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py index 7b597141f..655e2f459 100644 --- a/spacy/cli/project/pull.py +++ b/spacy/cli/project/pull.py @@ -17,7 +17,9 @@ def project_pull_cli( """Retrieve available precomputed outputs from a remote storage. You can alias remotes in your project.yml by mapping them to storage paths. A storage can be anything that the smart-open library can upload to, e.g. - gcs, aws, ssh, local directories etc + AWS, Google Cloud Storage, SSH, local directories etc. + + DOCS: https://nightly.spacy.io/api/cli#project-pull """ for url, output_path in project_pull(project_dir, remote): if url is not None: diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py index e09ee6e1a..fcee2231a 100644 --- a/spacy/cli/project/push.py +++ b/spacy/cli/project/push.py @@ -13,9 +13,12 @@ def project_push_cli( project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), # fmt: on ): - """Persist outputs to a remote storage. You can alias remotes in your project.yml - by mapping them to storage paths. A storage can be anything that the smart-open - library can upload to, e.g. gcs, aws, ssh, local directories etc + """Persist outputs to a remote storage. You can alias remotes in your + project.yml by mapping them to storage paths. A storage can be anything that + the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH, + local directories etc. + + DOCS: https://nightly.spacy.io/api/cli#project-push """ for output_path, url in project_push(project_dir, remote): if url is None: diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py index bacd7f04b..eb7b8cc5b 100644 --- a/spacy/cli/project/run.py +++ b/spacy/cli/project/run.py @@ -24,6 +24,8 @@ def project_run_cli( name is specified, all commands in the workflow are run, in order. If commands define dependencies and/or outputs, they will only be re-run if state has changed. + + DOCS: https://nightly.spacy.io/api/cli#project-run """ if show_help or not subcommand: print_run_help(project_dir, subcommand) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 5377f7f8f..6be47fa39 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -44,6 +44,8 @@ def train_cli( lets you pass in a Python file that's imported before training. It can be used to register custom functions and architectures that can then be referenced in the config. + + DOCS: https://nightly.spacy.io/api/cli#train """ util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR) verify_cli_args(config_path, output_path) diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index a1e05fdcd..9a75ed6f3 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -16,6 +16,8 @@ def validate_cli(): Validate the currently installed pipeline packages and spaCy version. Checks if the installed packages are compatible and shows upgrade instructions if available. Should be run after `pip install -U spacy`. + + DOCS: https://nightly.spacy.io/api/cli#validate """ validate() diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 2df2bd61c..0e80c3b5f 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -1,8 +1,8 @@ """ spaCy's built in visualization suite for dependencies and named entities. -DOCS: https://spacy.io/api/top-level#displacy -USAGE: https://spacy.io/usage/visualizers +DOCS: https://nightly.spacy.io/api/top-level#displacy +USAGE: https://nightly.spacy.io/usage/visualizers """ from typing import Union, Iterable, Optional, Dict, Any, Callable import warnings @@ -37,8 +37,8 @@ def render( manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts. RETURNS (str): Rendered HTML markup. - DOCS: https://spacy.io/api/top-level#displacy.render - USAGE: https://spacy.io/usage/visualizers + DOCS: https://nightly.spacy.io/api/top-level#displacy.render + USAGE: https://nightly.spacy.io/usage/visualizers """ factories = { "dep": (DependencyRenderer, parse_deps), @@ -88,8 +88,8 @@ def serve( port (int): Port to serve visualisation. host (str): Host to serve visualisation. - DOCS: https://spacy.io/api/top-level#displacy.serve - USAGE: https://spacy.io/usage/visualizers + DOCS: https://nightly.spacy.io/api/top-level#displacy.serve + USAGE: https://nightly.spacy.io/usage/visualizers """ from wsgiref import simple_server diff --git a/spacy/errors.py b/spacy/errors.py index 165714d9e..f3058d2b4 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -22,7 +22,7 @@ class Warnings: "generate a dependency visualization for it. Make sure the Doc " "was processed with a model that supports dependency parsing, and " "not just a language class like `English()`. For more info, see " - "the docs:\nhttps://spacy.io/usage/models") + "the docs:\nhttps://nightly.spacy.io/usage/models") W006 = ("No entities to visualize found in Doc object. If this is " "surprising to you, make sure the Doc was processed using a model " "that supports named entity recognition, and check the `doc.ents` " @@ -147,7 +147,7 @@ class Errors: E010 = ("Word vectors set to length 0. This may be because you don't have " "a model installed or loaded, or because your model doesn't " "include word vectors. For more info, see the docs:\n" - "https://spacy.io/usage/models") + "https://nightly.spacy.io/usage/models") E011 = ("Unknown operator: '{op}'. Options: {opts}") E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}") E014 = ("Unknown tag ID: {tag}") @@ -181,7 +181,7 @@ class Errors: "list of (unicode, bool) tuples. Got bytes instance: {value}") E029 = ("noun_chunks requires the dependency parse, which requires a " "statistical model to be installed and loaded. For more info, see " - "the documentation:\nhttps://spacy.io/usage/models") + "the documentation:\nhttps://nightly.spacy.io/usage/models") E030 = ("Sentence boundaries unset. You can add the 'sentencizer' " "component to the pipeline with: " "nlp.add_pipe('sentencizer'). " @@ -294,7 +294,7 @@ class Errors: E102 = ("Can't merge non-disjoint spans. '{token}' is already part of " "tokens to merge. If you want to find the longest non-overlapping " "spans, you can use the util.filter_spans helper:\n" - "https://spacy.io/api/top-level#util.filter_spans") + "https://nightly.spacy.io/api/top-level#util.filter_spans") E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A " "token can only be part of one entity, so make sure the entities " "you're setting don't overlap.") @@ -364,10 +364,10 @@ class Errors: E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure " "to provide a valid JSON object as input with either the `text` " "or `tokens` key. For more info, see the docs:\n" - "https://spacy.io/api/cli#pretrain-jsonl") + "https://nightly.spacy.io/api/cli#pretrain-jsonl") E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input " "includes either the `text` or `tokens` key. For more info, see " - "the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl") + "the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl") E139 = ("Knowledge Base for component '{name}' is empty. Use the methods " "kb.add_entity and kb.add_alias to add entries.") E140 = ("The list of entities, prior probabilities and entity vectors " diff --git a/spacy/gold/converters/conll_ner2docs.py b/spacy/gold/converters/conll_ner2docs.py index 0b348142a..c04a77f07 100644 --- a/spacy/gold/converters/conll_ner2docs.py +++ b/spacy/gold/converters/conll_ner2docs.py @@ -106,7 +106,7 @@ def conll_ner2docs( raise ValueError( "The token-per-line NER file is not formatted correctly. " "Try checking whitespace and delimiters. See " - "https://spacy.io/api/cli#convert" + "https://nightly.spacy.io/api/cli#convert" ) length = len(cols[0]) words.extend(cols[0]) diff --git a/spacy/gold/converters/iob2docs.py b/spacy/gold/converters/iob2docs.py index c7e243397..eebf1266b 100644 --- a/spacy/gold/converters/iob2docs.py +++ b/spacy/gold/converters/iob2docs.py @@ -44,7 +44,7 @@ def read_iob(raw_sents, vocab, n_sents): sent_tags = ["-"] * len(sent_words) else: raise ValueError( - "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convert" + "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert" ) words.extend(sent_words) tags.extend(sent_tags) diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py index 1046da1e6..545f01eaa 100644 --- a/spacy/gold/corpus.py +++ b/spacy/gold/corpus.py @@ -38,7 +38,7 @@ class Corpus: limit (int): Limit corpus to a subset of examples, e.g. for debugging. Defaults to 0, which indicates no limit. - DOCS: https://spacy.io/api/corpus + DOCS: https://nightly.spacy.io/api/corpus """ def __init__( @@ -83,7 +83,7 @@ class Corpus: nlp (Language): The current nlp object. YIELDS (Example): The examples. - DOCS: https://spacy.io/api/corpus#call + DOCS: https://nightly.spacy.io/api/corpus#call """ ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path)) if self.gold_preproc: diff --git a/spacy/kb.pyx b/spacy/kb.pyx index 3b8017a0c..b24ed3a20 100644 --- a/spacy/kb.pyx +++ b/spacy/kb.pyx @@ -21,7 +21,7 @@ cdef class Candidate: algorithm which will disambiguate the various candidates to the correct one. Each candidate (alias, entity) pair is assigned to a certain prior probability. - DOCS: https://spacy.io/api/kb/#candidate_init + DOCS: https://nightly.spacy.io/api/kb/#candidate_init """ def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob): @@ -79,7 +79,7 @@ cdef class KnowledgeBase: """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases, to support entity linking of named entities to real-world concepts. - DOCS: https://spacy.io/api/kb + DOCS: https://nightly.spacy.io/api/kb """ def __init__(self, Vocab vocab, entity_vector_length): diff --git a/spacy/language.py b/spacy/language.py index 1a40bec61..17ca020ca 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -95,7 +95,7 @@ class Language: object and processing pipeline. lang (str): Two-letter language ID, i.e. ISO code. - DOCS: https://spacy.io/api/language + DOCS: https://nightly.spacy.io/api/language """ Defaults = BaseDefaults @@ -130,7 +130,7 @@ class Language: create_tokenizer (Callable): Function that takes the nlp object and returns a tokenizer. - DOCS: https://spacy.io/api/language#init + DOCS: https://nightly.spacy.io/api/language#init """ # We're only calling this to import all factories provided via entry # points. The factory decorator applied to these functions takes care @@ -185,7 +185,7 @@ class Language: RETURNS (Dict[str, Any]): The meta. - DOCS: https://spacy.io/api/language#meta + DOCS: https://nightly.spacy.io/api/language#meta """ spacy_version = util.get_model_version_range(about.__version__) if self.vocab.lang: @@ -225,7 +225,7 @@ class Language: RETURNS (thinc.api.Config): The config. - DOCS: https://spacy.io/api/language#config + DOCS: https://nightly.spacy.io/api/language#config """ self._config.setdefault("nlp", {}) self._config.setdefault("training", {}) @@ -433,7 +433,7 @@ class Language: will be combined and normalized for the whole pipeline. func (Optional[Callable]): Factory function if not used as a decorator. - DOCS: https://spacy.io/api/language#factory + DOCS: https://nightly.spacy.io/api/language#factory """ if not isinstance(name, str): raise ValueError(Errors.E963.format(decorator="factory")) @@ -513,7 +513,7 @@ class Language: Used for pipeline analysis. func (Optional[Callable]): Factory function if not used as a decorator. - DOCS: https://spacy.io/api/language#component + DOCS: https://nightly.spacy.io/api/language#component """ if name is not None and not isinstance(name, str): raise ValueError(Errors.E963.format(decorator="component")) @@ -579,7 +579,7 @@ class Language: name (str): Name of pipeline component to get. RETURNS (callable): The pipeline component. - DOCS: https://spacy.io/api/language#get_pipe + DOCS: https://nightly.spacy.io/api/language#get_pipe """ for pipe_name, component in self._components: if pipe_name == name: @@ -608,7 +608,7 @@ class Language: arguments and types expected by the factory. RETURNS (Callable[[Doc], Doc]): The pipeline component. - DOCS: https://spacy.io/api/language#create_pipe + DOCS: https://nightly.spacy.io/api/language#create_pipe """ name = name if name is not None else factory_name if not isinstance(config, dict): @@ -722,7 +722,7 @@ class Language: arguments and types expected by the factory. RETURNS (Callable[[Doc], Doc]): The pipeline component. - DOCS: https://spacy.io/api/language#add_pipe + DOCS: https://nightly.spacy.io/api/language#add_pipe """ if not isinstance(factory_name, str): bad_val = repr(factory_name) @@ -820,7 +820,7 @@ class Language: name (str): Name of the component. RETURNS (bool): Whether a component of the name exists in the pipeline. - DOCS: https://spacy.io/api/language#has_pipe + DOCS: https://nightly.spacy.io/api/language#has_pipe """ return name in self.pipe_names @@ -841,7 +841,7 @@ class Language: validate (bool): Whether to validate the component config against the arguments and types expected by the factory. - DOCS: https://spacy.io/api/language#replace_pipe + DOCS: https://nightly.spacy.io/api/language#replace_pipe """ if name not in self.pipe_names: raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) @@ -870,7 +870,7 @@ class Language: old_name (str): Name of the component to rename. new_name (str): New name of the component. - DOCS: https://spacy.io/api/language#rename_pipe + DOCS: https://nightly.spacy.io/api/language#rename_pipe """ if old_name not in self.component_names: raise ValueError( @@ -891,7 +891,7 @@ class Language: name (str): Name of the component to remove. RETURNS (tuple): A `(name, component)` tuple of the removed component. - DOCS: https://spacy.io/api/language#remove_pipe + DOCS: https://nightly.spacy.io/api/language#remove_pipe """ if name not in self.component_names: raise ValueError(Errors.E001.format(name=name, opts=self.component_names)) @@ -944,7 +944,7 @@ class Language: keyword arguments for specific components. RETURNS (Doc): A container for accessing the annotations. - DOCS: https://spacy.io/api/language#call + DOCS: https://nightly.spacy.io/api/language#call """ if len(text) > self.max_length: raise ValueError( @@ -993,7 +993,7 @@ class Language: disable (str or iterable): The name(s) of the pipes to disable enable (str or iterable): The name(s) of the pipes to enable - all others will be disabled - DOCS: https://spacy.io/api/language#select_pipes + DOCS: https://nightly.spacy.io/api/language#select_pipes """ if enable is None and disable is None: raise ValueError(Errors.E991) @@ -1044,7 +1044,7 @@ class Language: exclude (Iterable[str]): Names of components that shouldn't be updated. RETURNS (Dict[str, float]): The updated losses dictionary - DOCS: https://spacy.io/api/language#update + DOCS: https://nightly.spacy.io/api/language#update """ if _ is not None: raise ValueError(Errors.E989) @@ -1106,7 +1106,7 @@ class Language: >>> raw_batch = [Example.from_dict(nlp.make_doc(text), {}) for text in next(raw_text_batches)] >>> nlp.rehearse(raw_batch) - DOCS: https://spacy.io/api/language#rehearse + DOCS: https://nightly.spacy.io/api/language#rehearse """ if len(examples) == 0: return @@ -1153,7 +1153,7 @@ class Language: create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/language#begin_training + DOCS: https://nightly.spacy.io/api/language#begin_training """ # TODO: throw warning when get_gold_tuples is provided instead of get_examples if get_examples is None: @@ -1200,7 +1200,7 @@ class Language: sgd (Optional[Optimizer]): An optimizer. RETURNS (Optimizer): The optimizer. - DOCS: https://spacy.io/api/language#resume_training + DOCS: https://nightly.spacy.io/api/language#resume_training """ if device >= 0: # TODO: do we need this here? require_gpu(device) @@ -1236,7 +1236,7 @@ class Language: for the scorer. RETURNS (Scorer): The scorer containing the evaluation results. - DOCS: https://spacy.io/api/language#evaluate + DOCS: https://nightly.spacy.io/api/language#evaluate """ validate_examples(examples, "Language.evaluate") if component_cfg is None: @@ -1286,7 +1286,7 @@ class Language: >>> with nlp.use_params(optimizer.averages): >>> nlp.to_disk("/tmp/checkpoint") - DOCS: https://spacy.io/api/language#use_params + DOCS: https://nightly.spacy.io/api/language#use_params """ if not params: yield @@ -1333,7 +1333,7 @@ class Language: n_process (int): Number of processors to process texts. If -1, set `multiprocessing.cpu_count()`. YIELDS (Doc): Documents in the order of the original text. - DOCS: https://spacy.io/api/language#pipe + DOCS: https://nightly.spacy.io/api/language#pipe """ if n_process == -1: n_process = mp.cpu_count() @@ -1469,7 +1469,7 @@ class Language: the types expected by the factory. RETURNS (Language): The initialized Language class. - DOCS: https://spacy.io/api/language#from_config + DOCS: https://nightly.spacy.io/api/language#from_config """ if auto_fill: config = Config( @@ -1582,7 +1582,7 @@ class Language: it doesn't exist. exclude (list): Names of components or serialization fields to exclude. - DOCS: https://spacy.io/api/language#to_disk + DOCS: https://nightly.spacy.io/api/language#to_disk """ path = util.ensure_path(path) serializers = {} @@ -1611,7 +1611,7 @@ class Language: exclude (list): Names of components or serialization fields to exclude. RETURNS (Language): The modified `Language` object. - DOCS: https://spacy.io/api/language#from_disk + DOCS: https://nightly.spacy.io/api/language#from_disk """ def deserialize_meta(path: Path) -> None: @@ -1659,7 +1659,7 @@ class Language: exclude (list): Names of components or serialization fields to exclude. RETURNS (bytes): The serialized form of the `Language` object. - DOCS: https://spacy.io/api/language#to_bytes + DOCS: https://nightly.spacy.io/api/language#to_bytes """ serializers = {} serializers["vocab"] = lambda: self.vocab.to_bytes() @@ -1683,7 +1683,7 @@ class Language: exclude (list): Names of components or serialization fields to exclude. RETURNS (Language): The `Language` object. - DOCS: https://spacy.io/api/language#from_bytes + DOCS: https://nightly.spacy.io/api/language#from_bytes """ def deserialize_meta(b): diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 25461b4b7..17ce574ce 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -30,7 +30,7 @@ cdef class Lexeme: tag, dependency parse, or lemma (lemmatization depends on the part-of-speech tag). - DOCS: https://spacy.io/api/lexeme + DOCS: https://nightly.spacy.io/api/lexeme """ def __init__(self, Vocab vocab, attr_t orth): """Create a Lexeme object. diff --git a/spacy/lookups.py b/spacy/lookups.py index d79a5b950..fb5e3d748 100644 --- a/spacy/lookups.py +++ b/spacy/lookups.py @@ -57,7 +57,7 @@ class Table(OrderedDict): data (dict): The dictionary. name (str): Optional table name for reference. - DOCS: https://spacy.io/api/lookups#table.from_dict + DOCS: https://nightly.spacy.io/api/lookups#table.from_dict """ self = cls(name=name) self.update(data) @@ -69,7 +69,7 @@ class Table(OrderedDict): name (str): Optional table name for reference. data (dict): Initial data, used to hint Bloom Filter. - DOCS: https://spacy.io/api/lookups#table.init + DOCS: https://nightly.spacy.io/api/lookups#table.init """ OrderedDict.__init__(self) self.name = name @@ -135,7 +135,7 @@ class Table(OrderedDict): RETURNS (bytes): The serialized table. - DOCS: https://spacy.io/api/lookups#table.to_bytes + DOCS: https://nightly.spacy.io/api/lookups#table.to_bytes """ data = { "name": self.name, @@ -150,7 +150,7 @@ class Table(OrderedDict): bytes_data (bytes): The data to load. RETURNS (Table): The loaded table. - DOCS: https://spacy.io/api/lookups#table.from_bytes + DOCS: https://nightly.spacy.io/api/lookups#table.from_bytes """ loaded = srsly.msgpack_loads(bytes_data) data = loaded.get("dict", {}) @@ -172,7 +172,7 @@ class Lookups: def __init__(self) -> None: """Initialize the Lookups object. - DOCS: https://spacy.io/api/lookups#init + DOCS: https://nightly.spacy.io/api/lookups#init """ self._tables = {} @@ -201,7 +201,7 @@ class Lookups: data (dict): Optional data to add to the table. RETURNS (Table): The newly added table. - DOCS: https://spacy.io/api/lookups#add_table + DOCS: https://nightly.spacy.io/api/lookups#add_table """ if name in self.tables: raise ValueError(Errors.E158.format(name=name)) @@ -215,7 +215,7 @@ class Lookups: name (str): Name of the table to set. table (Table): The Table to set. - DOCS: https://spacy.io/api/lookups#set_table + DOCS: https://nightly.spacy.io/api/lookups#set_table """ self._tables[name] = table @@ -227,7 +227,7 @@ class Lookups: default (Any): Optional default value to return if table doesn't exist. RETURNS (Table): The table. - DOCS: https://spacy.io/api/lookups#get_table + DOCS: https://nightly.spacy.io/api/lookups#get_table """ if name not in self._tables: if default == UNSET: @@ -241,7 +241,7 @@ class Lookups: name (str): Name of the table to remove. RETURNS (Table): The removed table. - DOCS: https://spacy.io/api/lookups#remove_table + DOCS: https://nightly.spacy.io/api/lookups#remove_table """ if name not in self._tables: raise KeyError(Errors.E159.format(name=name, tables=self.tables)) @@ -253,7 +253,7 @@ class Lookups: name (str): Name of the table. RETURNS (bool): Whether a table of that name exists. - DOCS: https://spacy.io/api/lookups#has_table + DOCS: https://nightly.spacy.io/api/lookups#has_table """ return name in self._tables @@ -262,7 +262,7 @@ class Lookups: RETURNS (bytes): The serialized Lookups. - DOCS: https://spacy.io/api/lookups#to_bytes + DOCS: https://nightly.spacy.io/api/lookups#to_bytes """ return srsly.msgpack_dumps(self._tables) @@ -272,7 +272,7 @@ class Lookups: bytes_data (bytes): The data to load. RETURNS (Lookups): The loaded Lookups. - DOCS: https://spacy.io/api/lookups#from_bytes + DOCS: https://nightly.spacy.io/api/lookups#from_bytes """ self._tables = {} for key, value in srsly.msgpack_loads(bytes_data).items(): @@ -287,7 +287,7 @@ class Lookups: path (str / Path): The file path. - DOCS: https://spacy.io/api/lookups#to_disk + DOCS: https://nightly.spacy.io/api/lookups#to_disk """ if len(self._tables): path = ensure_path(path) @@ -306,7 +306,7 @@ class Lookups: path (str / Path): The directory path. RETURNS (Lookups): The loaded lookups. - DOCS: https://spacy.io/api/lookups#from_disk + DOCS: https://nightly.spacy.io/api/lookups#from_disk """ path = ensure_path(path) filepath = path / filename diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index d3a8fa539..a170c7a6b 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -31,8 +31,8 @@ DEF PADDING = 5 cdef class Matcher: """Match sequences of tokens, based on pattern rules. - DOCS: https://spacy.io/api/matcher - USAGE: https://spacy.io/usage/rule-based-matching + DOCS: https://nightly.spacy.io/api/matcher + USAGE: https://nightly.spacy.io/usage/rule-based-matching """ def __init__(self, vocab, validate=True): diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx index ba0f515b5..fae513367 100644 --- a/spacy/matcher/phrasematcher.pyx +++ b/spacy/matcher/phrasematcher.pyx @@ -19,8 +19,8 @@ cdef class PhraseMatcher: sequences based on lists of token descriptions, the `PhraseMatcher` accepts match patterns in the form of `Doc` objects. - DOCS: https://spacy.io/api/phrasematcher - USAGE: https://spacy.io/usage/rule-based-matching#phrasematcher + DOCS: https://nightly.spacy.io/api/phrasematcher + USAGE: https://nightly.spacy.io/usage/rule-based-matching#phrasematcher Adapted from FlashText: https://github.com/vi3k6i5/flashtext MIT License (see `LICENSE`) @@ -34,7 +34,7 @@ cdef class PhraseMatcher: attr (int / str): Token attribute to match on. validate (bool): Perform additional validation when patterns are added. - DOCS: https://spacy.io/api/phrasematcher#init + DOCS: https://nightly.spacy.io/api/phrasematcher#init """ self.vocab = vocab self._callbacks = {} @@ -61,7 +61,7 @@ cdef class PhraseMatcher: RETURNS (int): The number of rules. - DOCS: https://spacy.io/api/phrasematcher#len + DOCS: https://nightly.spacy.io/api/phrasematcher#len """ return len(self._callbacks) @@ -71,7 +71,7 @@ cdef class PhraseMatcher: key (str): The match ID. RETURNS (bool): Whether the matcher contains rules for this match ID. - DOCS: https://spacy.io/api/phrasematcher#contains + DOCS: https://nightly.spacy.io/api/phrasematcher#contains """ return key in self._callbacks @@ -85,7 +85,7 @@ cdef class PhraseMatcher: key (str): The match ID. - DOCS: https://spacy.io/api/phrasematcher#remove + DOCS: https://nightly.spacy.io/api/phrasematcher#remove """ if key not in self._docs: raise KeyError(key) @@ -164,7 +164,7 @@ cdef class PhraseMatcher: as variable arguments. Will be ignored if a list of patterns is provided as the second argument. - DOCS: https://spacy.io/api/phrasematcher#add + DOCS: https://nightly.spacy.io/api/phrasematcher#add """ if docs is None or hasattr(docs, "__call__"): # old API on_match = docs @@ -228,7 +228,7 @@ cdef class PhraseMatcher: `doc[start:end]`. The `match_id` is an integer. If as_spans is set to True, a list of Span objects is returned. - DOCS: https://spacy.io/api/phrasematcher#call + DOCS: https://nightly.spacy.io/api/phrasematcher#call """ matches = [] if doc is None or len(doc) == 0: diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index 85a425e29..406112681 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -38,7 +38,7 @@ class AttributeRuler(Pipe): """Set token-level attributes for tokens matched by Matcher patterns. Additionally supports importing patterns from tag maps and morph rules. - DOCS: https://spacy.io/api/attributeruler + DOCS: https://nightly.spacy.io/api/attributeruler """ def __init__( @@ -59,7 +59,7 @@ class AttributeRuler(Pipe): RETURNS (AttributeRuler): The AttributeRuler component. - DOCS: https://spacy.io/api/attributeruler#init + DOCS: https://nightly.spacy.io/api/attributeruler#init """ self.name = name self.vocab = vocab @@ -77,7 +77,7 @@ class AttributeRuler(Pipe): doc (Doc): The document to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/attributeruler#call + DOCS: https://nightly.spacy.io/api/attributeruler#call """ matches = sorted(self.matcher(doc)) @@ -121,7 +121,7 @@ class AttributeRuler(Pipe): tag_map (dict): The tag map that maps fine-grained tags to coarse-grained tags and morphological features. - DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules + DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules """ for tag, attrs in tag_map.items(): pattern = [{"TAG": tag}] @@ -139,7 +139,7 @@ class AttributeRuler(Pipe): fine-grained tags to coarse-grained tags, lemmas and morphological features. - DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules + DOCS: https://nightly.spacy.io/api/attributeruler#load_from_morph_rules """ for tag in morph_rules: for word in morph_rules[tag]: @@ -163,7 +163,7 @@ class AttributeRuler(Pipe): index (int): The index of the token in the matched span to modify. May be negative to index from the end of the span. Defaults to 0. - DOCS: https://spacy.io/api/attributeruler#add + DOCS: https://nightly.spacy.io/api/attributeruler#add """ self.matcher.add(len(self.attrs), patterns) self._attrs_unnormed.append(attrs) @@ -178,7 +178,7 @@ class AttributeRuler(Pipe): as the arguments to AttributeRuler.add (patterns/attrs/index) to add as patterns. - DOCS: https://spacy.io/api/attributeruler#add_patterns + DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns """ for p in pattern_dicts: self.add(**p) @@ -203,7 +203,7 @@ class AttributeRuler(Pipe): Scorer.score_token_attr for the attributes "tag", "pos", "morph" and "lemma" for the target token attributes. - DOCS: https://spacy.io/api/tagger#score + DOCS: https://nightly.spacy.io/api/tagger#score """ validate_examples(examples, "AttributeRuler.score") results = {} @@ -227,7 +227,7 @@ class AttributeRuler(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/attributeruler#to_bytes + DOCS: https://nightly.spacy.io/api/attributeruler#to_bytes """ serialize = {} serialize["vocab"] = self.vocab.to_bytes @@ -243,7 +243,7 @@ class AttributeRuler(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. returns (AttributeRuler): The loaded object. - DOCS: https://spacy.io/api/attributeruler#from_bytes + DOCS: https://nightly.spacy.io/api/attributeruler#from_bytes """ def load_patterns(b): @@ -264,7 +264,7 @@ class AttributeRuler(Pipe): path (Union[Path, str]): A path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/attributeruler#to_disk + DOCS: https://nightly.spacy.io/api/attributeruler#to_disk """ serialize = { "vocab": lambda p: self.vocab.to_disk(p), @@ -279,7 +279,7 @@ class AttributeRuler(Pipe): path (Union[Path, str]): A path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/attributeruler#from_disk + DOCS: https://nightly.spacy.io/api/attributeruler#from_disk """ def load_patterns(p): diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 76f58df58..e001920a6 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -105,7 +105,7 @@ def make_parser( cdef class DependencyParser(Parser): """Pipeline component for dependency parsing. - DOCS: https://spacy.io/api/dependencyparser + DOCS: https://nightly.spacy.io/api/dependencyparser """ TransitionSystem = ArcEager @@ -146,7 +146,7 @@ cdef class DependencyParser(Parser): RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans and Scorer.score_deps. - DOCS: https://spacy.io/api/dependencyparser#score + DOCS: https://nightly.spacy.io/api/dependencyparser#score """ validate_examples(examples, "DependencyParser.score") def dep_getter(token, attr): diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 78cf274ab..d4f1e6b56 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -83,7 +83,7 @@ def make_entity_linker( class EntityLinker(Pipe): """Pipeline component for named entity linking. - DOCS: https://spacy.io/api/entitylinker + DOCS: https://nightly.spacy.io/api/entitylinker """ NIL = "NIL" # string used to refer to a non-existing link @@ -111,7 +111,7 @@ class EntityLinker(Pipe): incl_prior (bool): Whether or not to include prior probabilities from the KB in the model. incl_context (bool): Whether or not to include the local context in the model. - DOCS: https://spacy.io/api/entitylinker#init + DOCS: https://nightly.spacy.io/api/entitylinker#init """ self.vocab = vocab self.model = model @@ -151,7 +151,7 @@ class EntityLinker(Pipe): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/entitylinker#begin_training + DOCS: https://nightly.spacy.io/api/entitylinker#begin_training """ self.require_kb() nO = self.kb.entity_vector_length @@ -182,7 +182,7 @@ class EntityLinker(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/entitylinker#update + DOCS: https://nightly.spacy.io/api/entitylinker#update """ self.require_kb() if losses is None: @@ -264,7 +264,7 @@ class EntityLinker(Pipe): doc (Doc): The document to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/entitylinker#call + DOCS: https://nightly.spacy.io/api/entitylinker#call """ kb_ids = self.predict([doc]) self.set_annotations([doc], kb_ids) @@ -279,7 +279,7 @@ class EntityLinker(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/entitylinker#pipe + DOCS: https://nightly.spacy.io/api/entitylinker#pipe """ for docs in util.minibatch(stream, size=batch_size): kb_ids = self.predict(docs) @@ -294,7 +294,7 @@ class EntityLinker(Pipe): docs (Iterable[Doc]): The documents to predict. RETURNS (List[int]): The models prediction for each document. - DOCS: https://spacy.io/api/entitylinker#predict + DOCS: https://nightly.spacy.io/api/entitylinker#predict """ self.require_kb() entity_count = 0 @@ -391,7 +391,7 @@ class EntityLinker(Pipe): docs (Iterable[Doc]): The documents to modify. kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict. - DOCS: https://spacy.io/api/entitylinker#set_annotations + DOCS: https://nightly.spacy.io/api/entitylinker#set_annotations """ count_ents = len([ent for doc in docs for ent in doc.ents]) if count_ents != len(kb_ids): @@ -412,7 +412,7 @@ class EntityLinker(Pipe): path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/entitylinker#to_disk + DOCS: https://nightly.spacy.io/api/entitylinker#to_disk """ serialize = {} serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) @@ -430,7 +430,7 @@ class EntityLinker(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (EntityLinker): The modified EntityLinker object. - DOCS: https://spacy.io/api/entitylinker#from_disk + DOCS: https://nightly.spacy.io/api/entitylinker#from_disk """ def load_model(p): diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 5137dfec2..9a87c8589 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -53,8 +53,8 @@ class EntityRuler: purely rule-based entity recognition system. After initialization, the component is typically added to the pipeline using `nlp.add_pipe`. - DOCS: https://spacy.io/api/entityruler - USAGE: https://spacy.io/usage/rule-based-matching#entityruler + DOCS: https://nightly.spacy.io/api/entityruler + USAGE: https://nightly.spacy.io/usage/rule-based-matching#entityruler """ def __init__( @@ -88,7 +88,7 @@ class EntityRuler: added by the model, overwrite them by matches if necessary. ent_id_sep (str): Separator used internally for entity IDs. - DOCS: https://spacy.io/api/entityruler#init + DOCS: https://nightly.spacy.io/api/entityruler#init """ self.nlp = nlp self.name = name @@ -127,7 +127,7 @@ class EntityRuler: doc (Doc): The Doc object in the pipeline. RETURNS (Doc): The Doc with added entities, if available. - DOCS: https://spacy.io/api/entityruler#call + DOCS: https://nightly.spacy.io/api/entityruler#call """ matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc)) matches = set( @@ -165,7 +165,7 @@ class EntityRuler: RETURNS (set): The string labels. - DOCS: https://spacy.io/api/entityruler#labels + DOCS: https://nightly.spacy.io/api/entityruler#labels """ keys = set(self.token_patterns.keys()) keys.update(self.phrase_patterns.keys()) @@ -185,7 +185,7 @@ class EntityRuler: RETURNS (set): The string entity ids. - DOCS: https://spacy.io/api/entityruler#ent_ids + DOCS: https://nightly.spacy.io/api/entityruler#ent_ids """ keys = set(self.token_patterns.keys()) keys.update(self.phrase_patterns.keys()) @@ -203,7 +203,7 @@ class EntityRuler: RETURNS (list): The original patterns, one dictionary per pattern. - DOCS: https://spacy.io/api/entityruler#patterns + DOCS: https://nightly.spacy.io/api/entityruler#patterns """ all_patterns = [] for label, patterns in self.token_patterns.items(): @@ -230,7 +230,7 @@ class EntityRuler: patterns (list): The patterns to add. - DOCS: https://spacy.io/api/entityruler#add_patterns + DOCS: https://nightly.spacy.io/api/entityruler#add_patterns """ # disable the nlp components after this one in case they hadn't been initialized / deserialised yet @@ -324,7 +324,7 @@ class EntityRuler: patterns_bytes (bytes): The bytestring to load. RETURNS (EntityRuler): The loaded entity ruler. - DOCS: https://spacy.io/api/entityruler#from_bytes + DOCS: https://nightly.spacy.io/api/entityruler#from_bytes """ cfg = srsly.msgpack_loads(patterns_bytes) self.clear() @@ -346,7 +346,7 @@ class EntityRuler: RETURNS (bytes): The serialized patterns. - DOCS: https://spacy.io/api/entityruler#to_bytes + DOCS: https://nightly.spacy.io/api/entityruler#to_bytes """ serial = { "overwrite": self.overwrite, @@ -365,7 +365,7 @@ class EntityRuler: path (str / Path): The JSONL file to load. RETURNS (EntityRuler): The loaded entity ruler. - DOCS: https://spacy.io/api/entityruler#from_disk + DOCS: https://nightly.spacy.io/api/entityruler#from_disk """ path = ensure_path(path) self.clear() @@ -401,7 +401,7 @@ class EntityRuler: path (str / Path): The JSONL file to save. - DOCS: https://spacy.io/api/entityruler#to_disk + DOCS: https://nightly.spacy.io/api/entityruler#to_disk """ path = ensure_path(path) cfg = { diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py index 501884873..7e68ea369 100644 --- a/spacy/pipeline/functions.py +++ b/spacy/pipeline/functions.py @@ -15,7 +15,7 @@ def merge_noun_chunks(doc: Doc) -> Doc: doc (Doc): The Doc object. RETURNS (Doc): The Doc object with merged noun chunks. - DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks + DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_noun_chunks """ if not doc.is_parsed: return doc @@ -37,7 +37,7 @@ def merge_entities(doc: Doc): doc (Doc): The Doc object. RETURNS (Doc): The Doc object with merged entities. - DOCS: https://spacy.io/api/pipeline-functions#merge_entities + DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_entities """ with doc.retokenize() as retokenizer: for ent in doc.ents: @@ -54,7 +54,7 @@ def merge_subtokens(doc: Doc, label: str = "subtok") -> Doc: label (str): The subtoken dependency label. RETURNS (Doc): The Doc object with merged subtokens. - DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens + DOCS: https://nightly.spacy.io/api/pipeline-functions#merge_subtokens """ # TODO: make stateful component with "label" config merger = Matcher(doc.vocab) diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 6cea65fec..3f3e387b7 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -43,7 +43,7 @@ class Lemmatizer(Pipe): The Lemmatizer supports simple part-of-speech-sensitive suffix rules and lookup tables. - DOCS: https://spacy.io/api/lemmatizer + DOCS: https://nightly.spacy.io/api/lemmatizer """ @classmethod @@ -54,7 +54,7 @@ class Lemmatizer(Pipe): mode (str): The lemmatizer mode. RETURNS (dict): The lookups configuration settings for this mode. - DOCS: https://spacy.io/api/lemmatizer#get_lookups_config + DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config """ if mode == "lookup": return { @@ -80,7 +80,7 @@ class Lemmatizer(Pipe): lookups should be loaded. RETURNS (Lookups): The Lookups object. - DOCS: https://spacy.io/api/lemmatizer#get_lookups_config + DOCS: https://nightly.spacy.io/api/lemmatizer#get_lookups_config """ config = cls.get_lookups_config(mode) required_tables = config.get("required_tables", []) @@ -123,7 +123,7 @@ class Lemmatizer(Pipe): overwrite (bool): Whether to overwrite existing lemmas. Defaults to `False`. - DOCS: https://spacy.io/api/lemmatizer#init + DOCS: https://nightly.spacy.io/api/lemmatizer#init """ self.vocab = vocab self.model = model @@ -152,7 +152,7 @@ class Lemmatizer(Pipe): doc (Doc): The Doc to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/lemmatizer#call + DOCS: https://nightly.spacy.io/api/lemmatizer#call """ for token in doc: if self.overwrite or token.lemma == 0: @@ -168,7 +168,7 @@ class Lemmatizer(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/lemmatizer#pipe + DOCS: https://nightly.spacy.io/api/lemmatizer#pipe """ for doc in stream: doc = self(doc) @@ -180,7 +180,7 @@ class Lemmatizer(Pipe): token (Token): The token to lemmatize. RETURNS (list): The available lemmas for the string. - DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize + DOCS: https://nightly.spacy.io/api/lemmatizer#lookup_lemmatize """ lookup_table = self.lookups.get_table("lemma_lookup", {}) result = lookup_table.get(token.text, token.text) @@ -194,7 +194,7 @@ class Lemmatizer(Pipe): token (Token): The token to lemmatize. RETURNS (list): The available lemmas for the string. - DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize + DOCS: https://nightly.spacy.io/api/lemmatizer#rule_lemmatize """ cache_key = (token.orth, token.pos, token.morph) if cache_key in self.cache: @@ -260,7 +260,7 @@ class Lemmatizer(Pipe): token (Token): The token. RETURNS (bool): Whether the token is a base form. - DOCS: https://spacy.io/api/lemmatizer#is_base_form + DOCS: https://nightly.spacy.io/api/lemmatizer#is_base_form """ return False @@ -270,7 +270,7 @@ class Lemmatizer(Pipe): examples (Iterable[Example]): The examples to score. RETURNS (Dict[str, Any]): The scores. - DOCS: https://spacy.io/api/lemmatizer#score + DOCS: https://nightly.spacy.io/api/lemmatizer#score """ validate_examples(examples, "Lemmatizer.score") return Scorer.score_token_attr(examples, "lemma", **kwargs) @@ -282,7 +282,7 @@ class Lemmatizer(Pipe): it doesn't exist. exclude (list): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/vocab#to_disk + DOCS: https://nightly.spacy.io/api/vocab#to_disk """ serialize = {} serialize["vocab"] = lambda p: self.vocab.to_disk(p) @@ -297,7 +297,7 @@ class Lemmatizer(Pipe): exclude (list): String names of serialization fields to exclude. RETURNS (Vocab): The modified `Vocab` object. - DOCS: https://spacy.io/api/vocab#to_disk + DOCS: https://nightly.spacy.io/api/vocab#to_disk """ deserialize = {} deserialize["vocab"] = lambda p: self.vocab.from_disk(p) @@ -310,7 +310,7 @@ class Lemmatizer(Pipe): exclude (list): String names of serialization fields to exclude. RETURNS (bytes): The serialized form of the `Vocab` object. - DOCS: https://spacy.io/api/vocab#to_bytes + DOCS: https://nightly.spacy.io/api/vocab#to_bytes """ serialize = {} serialize["vocab"] = self.vocab.to_bytes @@ -324,7 +324,7 @@ class Lemmatizer(Pipe): exclude (list): String names of serialization fields to exclude. RETURNS (Vocab): The `Vocab` object. - DOCS: https://spacy.io/api/vocab#from_bytes + DOCS: https://nightly.spacy.io/api/vocab#from_bytes """ deserialize = {} deserialize["vocab"] = lambda b: self.vocab.from_bytes(b) diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 329a05f90..bcb555b90 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -79,7 +79,7 @@ class Morphologizer(Tagger): labels_morph (dict): Mapping of morph + POS tags to morph labels. labels_pos (dict): Mapping of morph + POS tags to POS tags. - DOCS: https://spacy.io/api/morphologizer#init + DOCS: https://nightly.spacy.io/api/morphologizer#init """ self.vocab = vocab self.model = model @@ -106,7 +106,7 @@ class Morphologizer(Tagger): label (str): The label to add. RETURNS (int): 0 if label is already present, otherwise 1. - DOCS: https://spacy.io/api/morphologizer#add_label + DOCS: https://nightly.spacy.io/api/morphologizer#add_label """ if not isinstance(label, str): raise ValueError(Errors.E187) @@ -139,7 +139,7 @@ class Morphologizer(Tagger): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/morphologizer#begin_training + DOCS: https://nightly.spacy.io/api/morphologizer#begin_training """ if not hasattr(get_examples, "__call__"): err = Errors.E930.format(name="Morphologizer", obj=type(get_examples)) @@ -169,7 +169,7 @@ class Morphologizer(Tagger): docs (Iterable[Doc]): The documents to modify. batch_tag_ids: The IDs to set, produced by Morphologizer.predict. - DOCS: https://spacy.io/api/morphologizer#set_annotations + DOCS: https://nightly.spacy.io/api/morphologizer#set_annotations """ if isinstance(docs, Doc): docs = [docs] @@ -194,7 +194,7 @@ class Morphologizer(Tagger): scores: Scores representing the model's predictions. RETUTNRS (Tuple[float, float]): The loss and the gradient. - DOCS: https://spacy.io/api/morphologizer#get_loss + DOCS: https://nightly.spacy.io/api/morphologizer#get_loss """ validate_examples(examples, "Morphologizer.get_loss") loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) @@ -231,7 +231,7 @@ class Morphologizer(Tagger): Scorer.score_token_attr for the attributes "pos" and "morph" and Scorer.score_token_attr_per_feat for the attribute "morph". - DOCS: https://spacy.io/api/morphologizer#score + DOCS: https://nightly.spacy.io/api/morphologizer#score """ validate_examples(examples, "Morphologizer.score") results = {} @@ -247,7 +247,7 @@ class Morphologizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/morphologizer#to_bytes + DOCS: https://nightly.spacy.io/api/morphologizer#to_bytes """ serialize = {} serialize["model"] = self.model.to_bytes @@ -262,7 +262,7 @@ class Morphologizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Morphologizer): The loaded Morphologizer. - DOCS: https://spacy.io/api/morphologizer#from_bytes + DOCS: https://nightly.spacy.io/api/morphologizer#from_bytes """ def load_model(b): try: @@ -284,7 +284,7 @@ class Morphologizer(Tagger): path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/morphologizer#to_disk + DOCS: https://nightly.spacy.io/api/morphologizer#to_disk """ serialize = { "vocab": lambda p: self.vocab.to_disk(p), @@ -300,7 +300,7 @@ class Morphologizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Morphologizer): The modified Morphologizer object. - DOCS: https://spacy.io/api/morphologizer#from_disk + DOCS: https://nightly.spacy.io/api/morphologizer#from_disk """ def load_model(p): with p.open("rb") as file_: diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index 631b5ae72..d9f33ccb4 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -88,7 +88,7 @@ def make_ner( cdef class EntityRecognizer(Parser): """Pipeline component for named entity recognition. - DOCS: https://spacy.io/api/entityrecognizer + DOCS: https://nightly.spacy.io/api/entityrecognizer """ TransitionSystem = BiluoPushDown @@ -119,7 +119,7 @@ cdef class EntityRecognizer(Parser): examples (Iterable[Example]): The examples to score. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. - DOCS: https://spacy.io/api/entityrecognizer#score + DOCS: https://nightly.spacy.io/api/entityrecognizer#score """ validate_examples(examples, "EntityRecognizer.score") return Scorer.score_spans(examples, "ents", **kwargs) diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index a3f379a97..2518ebad3 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -15,7 +15,7 @@ cdef class Pipe: from it and it defines the interface that components should follow to function as trainable components in a spaCy pipeline. - DOCS: https://spacy.io/api/pipe + DOCS: https://nightly.spacy.io/api/pipe """ def __init__(self, vocab, model, name, **cfg): """Initialize a pipeline component. @@ -25,7 +25,7 @@ cdef class Pipe: name (str): The component instance name. **cfg: Additonal settings and config parameters. - DOCS: https://spacy.io/api/pipe#init + DOCS: https://nightly.spacy.io/api/pipe#init """ self.vocab = vocab self.model = model @@ -40,7 +40,7 @@ cdef class Pipe: docs (Doc): The Doc to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/pipe#call + DOCS: https://nightly.spacy.io/api/pipe#call """ scores = self.predict([doc]) self.set_annotations([doc], scores) @@ -55,7 +55,7 @@ cdef class Pipe: batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/pipe#pipe + DOCS: https://nightly.spacy.io/api/pipe#pipe """ for docs in util.minibatch(stream, size=batch_size): scores = self.predict(docs) @@ -69,7 +69,7 @@ cdef class Pipe: docs (Iterable[Doc]): The documents to predict. RETURNS: Vector representations for each token in the documents. - DOCS: https://spacy.io/api/pipe#predict + DOCS: https://nightly.spacy.io/api/pipe#predict """ raise NotImplementedError(Errors.E931.format(method="predict", name=self.name)) @@ -79,7 +79,7 @@ cdef class Pipe: docs (Iterable[Doc]): The documents to modify. scores: The scores to assign. - DOCS: https://spacy.io/api/pipe#set_annotations + DOCS: https://nightly.spacy.io/api/pipe#set_annotations """ raise NotImplementedError(Errors.E931.format(method="set_annotations", name=self.name)) @@ -96,7 +96,7 @@ cdef class Pipe: Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/pipe#update + DOCS: https://nightly.spacy.io/api/pipe#update """ if losses is None: losses = {} @@ -132,7 +132,7 @@ cdef class Pipe: Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/pipe#rehearse + DOCS: https://nightly.spacy.io/api/pipe#rehearse """ pass @@ -144,7 +144,7 @@ cdef class Pipe: scores: Scores representing the model's predictions. RETUTNRS (Tuple[float, float]): The loss and the gradient. - DOCS: https://spacy.io/api/pipe#get_loss + DOCS: https://nightly.spacy.io/api/pipe#get_loss """ raise NotImplementedError(Errors.E931.format(method="get_loss", name=self.name)) @@ -156,7 +156,7 @@ cdef class Pipe: label (str): The label to add. RETURNS (int): 0 if label is already present, otherwise 1. - DOCS: https://spacy.io/api/pipe#add_label + DOCS: https://nightly.spacy.io/api/pipe#add_label """ raise NotImplementedError(Errors.E931.format(method="add_label", name=self.name)) @@ -165,7 +165,7 @@ cdef class Pipe: RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/pipe#create_optimizer + DOCS: https://nightly.spacy.io/api/pipe#create_optimizer """ return util.create_default_optimizer() @@ -181,7 +181,7 @@ cdef class Pipe: create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/pipe#begin_training + DOCS: https://nightly.spacy.io/api/pipe#begin_training """ self.model.initialize() if sgd is None: @@ -200,7 +200,7 @@ cdef class Pipe: params (dict): The parameter values to use in the model. - DOCS: https://spacy.io/api/pipe#use_params + DOCS: https://nightly.spacy.io/api/pipe#use_params """ with self.model.use_params(params): yield @@ -211,7 +211,7 @@ cdef class Pipe: examples (Iterable[Example]): The examples to score. RETURNS (Dict[str, Any]): The scores. - DOCS: https://spacy.io/api/pipe#score + DOCS: https://nightly.spacy.io/api/pipe#score """ return {} @@ -221,7 +221,7 @@ cdef class Pipe: exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/pipe#to_bytes + DOCS: https://nightly.spacy.io/api/pipe#to_bytes """ serialize = {} serialize["cfg"] = lambda: srsly.json_dumps(self.cfg) @@ -236,7 +236,7 @@ cdef class Pipe: exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Pipe): The loaded object. - DOCS: https://spacy.io/api/pipe#from_bytes + DOCS: https://nightly.spacy.io/api/pipe#from_bytes """ def load_model(b): @@ -259,7 +259,7 @@ cdef class Pipe: path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/pipe#to_disk + DOCS: https://nightly.spacy.io/api/pipe#to_disk """ serialize = {} serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) @@ -274,7 +274,7 @@ cdef class Pipe: exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Pipe): The loaded object. - DOCS: https://spacy.io/api/pipe#from_disk + DOCS: https://nightly.spacy.io/api/pipe#from_disk """ def load_model(p): diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx index 46d599497..aaf08d594 100644 --- a/spacy/pipeline/sentencizer.pyx +++ b/spacy/pipeline/sentencizer.pyx @@ -29,7 +29,7 @@ def make_sentencizer( class Sentencizer(Pipe): """Segment the Doc into sentences using a rule-based strategy. - DOCS: https://spacy.io/api/sentencizer + DOCS: https://nightly.spacy.io/api/sentencizer """ default_punct_chars = ['!', '.', '?', '։', '؟', '۔', '܀', '܁', '܂', '߹', @@ -51,7 +51,7 @@ class Sentencizer(Pipe): serialized with the nlp object. RETURNS (Sentencizer): The sentencizer component. - DOCS: https://spacy.io/api/sentencizer#init + DOCS: https://nightly.spacy.io/api/sentencizer#init """ self.name = name if punct_chars: @@ -68,7 +68,7 @@ class Sentencizer(Pipe): doc (Doc): The document to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/sentencizer#call + DOCS: https://nightly.spacy.io/api/sentencizer#call """ start = 0 seen_period = False @@ -94,7 +94,7 @@ class Sentencizer(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/sentencizer#pipe + DOCS: https://nightly.spacy.io/api/sentencizer#pipe """ for docs in util.minibatch(stream, size=batch_size): predictions = self.predict(docs) @@ -157,7 +157,7 @@ class Sentencizer(Pipe): examples (Iterable[Example]): The examples to score. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. - DOCS: https://spacy.io/api/sentencizer#score + DOCS: https://nightly.spacy.io/api/sentencizer#score """ validate_examples(examples, "Sentencizer.score") results = Scorer.score_spans(examples, "sents", **kwargs) @@ -169,7 +169,7 @@ class Sentencizer(Pipe): RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/sentencizer#to_bytes + DOCS: https://nightly.spacy.io/api/sentencizer#to_bytes """ return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)}) @@ -179,7 +179,7 @@ class Sentencizer(Pipe): bytes_data (bytes): The data to load. returns (Sentencizer): The loaded object. - DOCS: https://spacy.io/api/sentencizer#from_bytes + DOCS: https://nightly.spacy.io/api/sentencizer#from_bytes """ cfg = srsly.msgpack_loads(bytes_data) self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars)) @@ -188,7 +188,7 @@ class Sentencizer(Pipe): def to_disk(self, path, *, exclude=tuple()): """Serialize the sentencizer to disk. - DOCS: https://spacy.io/api/sentencizer#to_disk + DOCS: https://nightly.spacy.io/api/sentencizer#to_disk """ path = util.ensure_path(path) path = path.with_suffix(".json") @@ -198,7 +198,7 @@ class Sentencizer(Pipe): def from_disk(self, path, *, exclude=tuple()): """Load the sentencizer from disk. - DOCS: https://spacy.io/api/sentencizer#from_disk + DOCS: https://nightly.spacy.io/api/sentencizer#from_disk """ path = util.ensure_path(path) path = path.with_suffix(".json") diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index e82225d27..b78be44f8 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -44,7 +44,7 @@ def make_senter(nlp: Language, name: str, model: Model): class SentenceRecognizer(Tagger): """Pipeline component for sentence segmentation. - DOCS: https://spacy.io/api/sentencerecognizer + DOCS: https://nightly.spacy.io/api/sentencerecognizer """ def __init__(self, vocab, model, name="senter"): """Initialize a sentence recognizer. @@ -54,7 +54,7 @@ class SentenceRecognizer(Tagger): name (str): The component instance name, used to add entries to the losses during training. - DOCS: https://spacy.io/api/sentencerecognizer#init + DOCS: https://nightly.spacy.io/api/sentencerecognizer#init """ self.vocab = vocab self.model = model @@ -76,7 +76,7 @@ class SentenceRecognizer(Tagger): docs (Iterable[Doc]): The documents to modify. batch_tag_ids: The IDs to set, produced by SentenceRecognizer.predict. - DOCS: https://spacy.io/api/sentencerecognizer#set_annotations + DOCS: https://nightly.spacy.io/api/sentencerecognizer#set_annotations """ if isinstance(docs, Doc): docs = [docs] @@ -101,7 +101,7 @@ class SentenceRecognizer(Tagger): scores: Scores representing the model's predictions. RETUTNRS (Tuple[float, float]): The loss and the gradient. - DOCS: https://spacy.io/api/sentencerecognizer#get_loss + DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss """ validate_examples(examples, "SentenceRecognizer.get_loss") labels = self.labels @@ -135,7 +135,7 @@ class SentenceRecognizer(Tagger): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/sentencerecognizer#begin_training + DOCS: https://nightly.spacy.io/api/sentencerecognizer#begin_training """ self.set_output(len(self.labels)) self.model.initialize() @@ -151,7 +151,7 @@ class SentenceRecognizer(Tagger): examples (Iterable[Example]): The examples to score. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans. - DOCS: https://spacy.io/api/sentencerecognizer#score + DOCS: https://nightly.spacy.io/api/sentencerecognizer#score """ validate_examples(examples, "SentenceRecognizer.score") results = Scorer.score_spans(examples, "sents", **kwargs) @@ -164,7 +164,7 @@ class SentenceRecognizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/sentencerecognizer#to_bytes + DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_bytes """ serialize = {} serialize["model"] = self.model.to_bytes @@ -179,7 +179,7 @@ class SentenceRecognizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Tagger): The loaded SentenceRecognizer. - DOCS: https://spacy.io/api/sentencerecognizer#from_bytes + DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_bytes """ def load_model(b): try: @@ -201,7 +201,7 @@ class SentenceRecognizer(Tagger): path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/sentencerecognizer#to_disk + DOCS: https://nightly.spacy.io/api/sentencerecognizer#to_disk """ serialize = { "vocab": lambda p: self.vocab.to_disk(p), @@ -217,7 +217,7 @@ class SentenceRecognizer(Tagger): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Tagger): The modified SentenceRecognizer object. - DOCS: https://spacy.io/api/sentencerecognizer#from_disk + DOCS: https://nightly.spacy.io/api/sentencerecognizer#from_disk """ def load_model(p): with p.open("rb") as file_: diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py index 5f3addbd7..c55edb067 100644 --- a/spacy/pipeline/simple_ner.py +++ b/spacy/pipeline/simple_ner.py @@ -78,7 +78,7 @@ class SimpleNER(Pipe): def add_label(self, label: str) -> None: """Add a new label to the pipe. label (str): The label to add. - DOCS: https://spacy.io/api/simplener#add_label + DOCS: https://nightly.spacy.io/api/simplener#add_label """ if not isinstance(label, str): raise ValueError(Errors.E187) diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index f831caefe..2b760c878 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -58,7 +58,7 @@ def make_tagger(nlp: Language, name: str, model: Model): class Tagger(Pipe): """Pipeline component for part-of-speech tagging. - DOCS: https://spacy.io/api/tagger + DOCS: https://nightly.spacy.io/api/tagger """ def __init__(self, vocab, model, name="tagger", *, labels=None): """Initialize a part-of-speech tagger. @@ -69,7 +69,7 @@ class Tagger(Pipe): losses during training. labels (List): The set of labels. Defaults to None. - DOCS: https://spacy.io/api/tagger#init + DOCS: https://nightly.spacy.io/api/tagger#init """ self.vocab = vocab self.model = model @@ -86,7 +86,7 @@ class Tagger(Pipe): RETURNS (Tuple[str]): The labels. - DOCS: https://spacy.io/api/tagger#labels + DOCS: https://nightly.spacy.io/api/tagger#labels """ return tuple(self.cfg["labels"]) @@ -96,7 +96,7 @@ class Tagger(Pipe): doc (Doc): The document to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/tagger#call + DOCS: https://nightly.spacy.io/api/tagger#call """ tags = self.predict([doc]) self.set_annotations([doc], tags) @@ -111,7 +111,7 @@ class Tagger(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/tagger#pipe + DOCS: https://nightly.spacy.io/api/tagger#pipe """ for docs in util.minibatch(stream, size=batch_size): tag_ids = self.predict(docs) @@ -124,7 +124,7 @@ class Tagger(Pipe): docs (Iterable[Doc]): The documents to predict. RETURNS: The models prediction for each document. - DOCS: https://spacy.io/api/tagger#predict + DOCS: https://nightly.spacy.io/api/tagger#predict """ if not any(len(doc) for doc in docs): # Handle cases where there are no tokens in any docs. @@ -153,7 +153,7 @@ class Tagger(Pipe): docs (Iterable[Doc]): The documents to modify. batch_tag_ids: The IDs to set, produced by Tagger.predict. - DOCS: https://spacy.io/api/tagger#set_annotations + DOCS: https://nightly.spacy.io/api/tagger#set_annotations """ if isinstance(docs, Doc): docs = [docs] @@ -182,7 +182,7 @@ class Tagger(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/tagger#update + DOCS: https://nightly.spacy.io/api/tagger#update """ if losses is None: losses = {} @@ -220,7 +220,7 @@ class Tagger(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/tagger#rehearse + DOCS: https://nightly.spacy.io/api/tagger#rehearse """ validate_examples(examples, "Tagger.rehearse") docs = [eg.predicted for eg in examples] @@ -247,7 +247,7 @@ class Tagger(Pipe): scores: Scores representing the model's predictions. RETUTNRS (Tuple[float, float]): The loss and the gradient. - DOCS: https://spacy.io/api/tagger#get_loss + DOCS: https://nightly.spacy.io/api/tagger#get_loss """ validate_examples(examples, "Tagger.get_loss") loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False) @@ -269,7 +269,7 @@ class Tagger(Pipe): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/tagger#begin_training + DOCS: https://nightly.spacy.io/api/tagger#begin_training """ if not hasattr(get_examples, "__call__"): err = Errors.E930.format(name="Tagger", obj=type(get_examples)) @@ -307,7 +307,7 @@ class Tagger(Pipe): label (str): The label to add. RETURNS (int): 0 if label is already present, otherwise 1. - DOCS: https://spacy.io/api/tagger#add_label + DOCS: https://nightly.spacy.io/api/tagger#add_label """ if not isinstance(label, str): raise ValueError(Errors.E187) @@ -324,7 +324,7 @@ class Tagger(Pipe): RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_token_attr for the attributes "tag". - DOCS: https://spacy.io/api/tagger#score + DOCS: https://nightly.spacy.io/api/tagger#score """ validate_examples(examples, "Tagger.score") return Scorer.score_token_attr(examples, "tag", **kwargs) @@ -335,7 +335,7 @@ class Tagger(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. - DOCS: https://spacy.io/api/tagger#to_bytes + DOCS: https://nightly.spacy.io/api/tagger#to_bytes """ serialize = {} serialize["model"] = self.model.to_bytes @@ -350,7 +350,7 @@ class Tagger(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Tagger): The loaded Tagger. - DOCS: https://spacy.io/api/tagger#from_bytes + DOCS: https://nightly.spacy.io/api/tagger#from_bytes """ def load_model(b): try: @@ -372,7 +372,7 @@ class Tagger(Pipe): path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/tagger#to_disk + DOCS: https://nightly.spacy.io/api/tagger#to_disk """ serialize = { "vocab": lambda p: self.vocab.to_disk(p), @@ -388,7 +388,7 @@ class Tagger(Pipe): exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Tagger): The modified Tagger object. - DOCS: https://spacy.io/api/tagger#from_disk + DOCS: https://nightly.spacy.io/api/tagger#from_disk """ def load_model(p): with p.open("rb") as file_: diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index ce4f286e5..d6efb4348 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -92,7 +92,7 @@ def make_textcat( class TextCategorizer(Pipe): """Pipeline component for text classification. - DOCS: https://spacy.io/api/textcategorizer + DOCS: https://nightly.spacy.io/api/textcategorizer """ def __init__( @@ -111,7 +111,7 @@ class TextCategorizer(Pipe): losses during training. labels (Iterable[str]): The labels to use. - DOCS: https://spacy.io/api/textcategorizer#init + DOCS: https://nightly.spacy.io/api/textcategorizer#init """ self.vocab = vocab self.model = model @@ -124,7 +124,7 @@ class TextCategorizer(Pipe): def labels(self) -> Tuple[str]: """RETURNS (Tuple[str]): The labels currently added to the component. - DOCS: https://spacy.io/api/textcategorizer#labels + DOCS: https://nightly.spacy.io/api/textcategorizer#labels """ return tuple(self.cfg.setdefault("labels", [])) @@ -146,7 +146,7 @@ class TextCategorizer(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/textcategorizer#pipe + DOCS: https://nightly.spacy.io/api/textcategorizer#pipe """ for docs in util.minibatch(stream, size=batch_size): scores = self.predict(docs) @@ -159,7 +159,7 @@ class TextCategorizer(Pipe): docs (Iterable[Doc]): The documents to predict. RETURNS: The models prediction for each document. - DOCS: https://spacy.io/api/textcategorizer#predict + DOCS: https://nightly.spacy.io/api/textcategorizer#predict """ tensors = [doc.tensor for doc in docs] if not any(len(doc) for doc in docs): @@ -177,7 +177,7 @@ class TextCategorizer(Pipe): docs (Iterable[Doc]): The documents to modify. scores: The scores to set, produced by TextCategorizer.predict. - DOCS: https://spacy.io/api/textcategorizer#set_annotations + DOCS: https://nightly.spacy.io/api/textcategorizer#set_annotations """ for i, doc in enumerate(docs): for j, label in enumerate(self.labels): @@ -204,7 +204,7 @@ class TextCategorizer(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/textcategorizer#update + DOCS: https://nightly.spacy.io/api/textcategorizer#update """ if losses is None: losses = {} @@ -245,7 +245,7 @@ class TextCategorizer(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/textcategorizer#rehearse + DOCS: https://nightly.spacy.io/api/textcategorizer#rehearse """ if losses is not None: losses.setdefault(self.name, 0.0) @@ -289,7 +289,7 @@ class TextCategorizer(Pipe): scores: Scores representing the model's predictions. RETUTNRS (Tuple[float, float]): The loss and the gradient. - DOCS: https://spacy.io/api/textcategorizer#get_loss + DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss """ validate_examples(examples, "TextCategorizer.get_loss") truths, not_missing = self._examples_to_truth(examples) @@ -305,7 +305,7 @@ class TextCategorizer(Pipe): label (str): The label to add. RETURNS (int): 0 if label is already present, otherwise 1. - DOCS: https://spacy.io/api/textcategorizer#add_label + DOCS: https://nightly.spacy.io/api/textcategorizer#add_label """ if not isinstance(label, str): raise ValueError(Errors.E187) @@ -343,7 +343,7 @@ class TextCategorizer(Pipe): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/textcategorizer#begin_training + DOCS: https://nightly.spacy.io/api/textcategorizer#begin_training """ if not hasattr(get_examples, "__call__"): err = Errors.E930.format(name="TextCategorizer", obj=type(get_examples)) @@ -378,7 +378,7 @@ class TextCategorizer(Pipe): positive_label (str): Optional positive label. RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_cats. - DOCS: https://spacy.io/api/textcategorizer#score + DOCS: https://nightly.spacy.io/api/textcategorizer#score """ validate_examples(examples, "TextCategorizer.score") return Scorer.score_cats( diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 7e61ccc02..5657d687d 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -56,7 +56,7 @@ class Tok2Vec(Pipe): a list of Doc objects as input, and output a list of 2d float arrays. name (str): The component instance name. - DOCS: https://spacy.io/api/tok2vec#init + DOCS: https://nightly.spacy.io/api/tok2vec#init """ self.vocab = vocab self.model = model @@ -91,7 +91,7 @@ class Tok2Vec(Pipe): docs (Doc): The Doc to process. RETURNS (Doc): The processed Doc. - DOCS: https://spacy.io/api/tok2vec#call + DOCS: https://nightly.spacy.io/api/tok2vec#call """ tokvecses = self.predict([doc]) self.set_annotations([doc], tokvecses) @@ -106,7 +106,7 @@ class Tok2Vec(Pipe): batch_size (int): The number of documents to buffer. YIELDS (Doc): Processed documents in order. - DOCS: https://spacy.io/api/tok2vec#pipe + DOCS: https://nightly.spacy.io/api/tok2vec#pipe """ for docs in minibatch(stream, batch_size): docs = list(docs) @@ -121,7 +121,7 @@ class Tok2Vec(Pipe): docs (Iterable[Doc]): The documents to predict. RETURNS: Vector representations for each token in the documents. - DOCS: https://spacy.io/api/tok2vec#predict + DOCS: https://nightly.spacy.io/api/tok2vec#predict """ tokvecs = self.model.predict(docs) batch_id = Tok2VecListener.get_batch_id(docs) @@ -135,7 +135,7 @@ class Tok2Vec(Pipe): docs (Iterable[Doc]): The documents to modify. tokvecses: The tensors to set, produced by Tok2Vec.predict. - DOCS: https://spacy.io/api/tok2vec#set_annotations + DOCS: https://nightly.spacy.io/api/tok2vec#set_annotations """ for doc, tokvecs in zip(docs, tokvecses): assert tokvecs.shape[0] == len(doc) @@ -162,7 +162,7 @@ class Tok2Vec(Pipe): Updated using the component name as the key. RETURNS (Dict[str, float]): The updated losses dictionary. - DOCS: https://spacy.io/api/tok2vec#update + DOCS: https://nightly.spacy.io/api/tok2vec#update """ if losses is None: losses = {} @@ -220,7 +220,7 @@ class Tok2Vec(Pipe): create_optimizer if it doesn't exist. RETURNS (thinc.api.Optimizer): The optimizer. - DOCS: https://spacy.io/api/tok2vec#begin_training + DOCS: https://nightly.spacy.io/api/tok2vec#begin_training """ docs = [Doc(self.vocab, words=["hello"])] self.model.initialize(X=docs) diff --git a/spacy/scorer.py b/spacy/scorer.py index 9bbc64cac..9b1831a91 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -85,7 +85,7 @@ class Scorer: ) -> None: """Initialize the Scorer. - DOCS: https://spacy.io/api/scorer#init + DOCS: https://nightly.spacy.io/api/scorer#init """ self.nlp = nlp self.cfg = cfg @@ -101,7 +101,7 @@ class Scorer: examples (Iterable[Example]): The predicted annotations + correct annotations. RETURNS (Dict): A dictionary of scores. - DOCS: https://spacy.io/api/scorer#score + DOCS: https://nightly.spacy.io/api/scorer#score """ scores = {} if hasattr(self.nlp.tokenizer, "score"): @@ -121,7 +121,7 @@ class Scorer: RETURNS (Dict[str, float]): A dictionary containing the scores token_acc/p/r/f. - DOCS: https://spacy.io/api/scorer#score_tokenization + DOCS: https://nightly.spacy.io/api/scorer#score_tokenization """ acc_score = PRFScore() prf_score = PRFScore() @@ -169,7 +169,7 @@ class Scorer: RETURNS (Dict[str, float]): A dictionary containing the accuracy score under the key attr_acc. - DOCS: https://spacy.io/api/scorer#score_token_attr + DOCS: https://nightly.spacy.io/api/scorer#score_token_attr """ tag_score = PRFScore() for example in examples: @@ -263,7 +263,7 @@ class Scorer: RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under the keys attr_p/r/f and the per-type PRF scores under attr_per_type. - DOCS: https://spacy.io/api/scorer#score_spans + DOCS: https://nightly.spacy.io/api/scorer#score_spans """ score = PRFScore() score_per_type = dict() @@ -350,7 +350,7 @@ class Scorer: attr_f_per_type, attr_auc_per_type - DOCS: https://spacy.io/api/scorer#score_cats + DOCS: https://nightly.spacy.io/api/scorer#score_cats """ if threshold is None: threshold = 0.5 if multi_label else 0.0 @@ -467,7 +467,7 @@ class Scorer: RETURNS (Dict[str, Any]): A dictionary containing the scores: attr_uas, attr_las, and attr_las_per_type. - DOCS: https://spacy.io/api/scorer#score_deps + DOCS: https://nightly.spacy.io/api/scorer#score_deps """ unlabelled = PRFScore() labelled = PRFScore() diff --git a/spacy/strings.pyx b/spacy/strings.pyx index 6a1d68221..cd442729c 100644 --- a/spacy/strings.pyx +++ b/spacy/strings.pyx @@ -91,7 +91,7 @@ cdef Utf8Str* _allocate(Pool mem, const unsigned char* chars, uint32_t length) e cdef class StringStore: """Look up strings by 64-bit hashes. - DOCS: https://spacy.io/api/stringstore + DOCS: https://nightly.spacy.io/api/stringstore """ def __init__(self, strings=None, freeze=False): """Create the StringStore. diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 759de90d3..5e7222d40 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -31,7 +31,7 @@ cdef class Tokenizer: """Segment text, and create Doc objects with the discovered segment boundaries. - DOCS: https://spacy.io/api/tokenizer + DOCS: https://nightly.spacy.io/api/tokenizer """ def __init__(self, Vocab vocab, rules=None, prefix_search=None, suffix_search=None, infix_finditer=None, token_match=None, @@ -54,7 +54,7 @@ cdef class Tokenizer: EXAMPLE: >>> tokenizer = Tokenizer(nlp.vocab) - DOCS: https://spacy.io/api/tokenizer#init + DOCS: https://nightly.spacy.io/api/tokenizer#init """ self.mem = Pool() self._cache = PreshMap() @@ -147,7 +147,7 @@ cdef class Tokenizer: string (str): The string to tokenize. RETURNS (Doc): A container for linguistic annotations. - DOCS: https://spacy.io/api/tokenizer#call + DOCS: https://nightly.spacy.io/api/tokenizer#call """ doc = self._tokenize_affixes(string, True) self._apply_special_cases(doc) @@ -209,7 +209,7 @@ cdef class Tokenizer: Defaults to 1000. YIELDS (Doc): A sequence of Doc objects, in order. - DOCS: https://spacy.io/api/tokenizer#pipe + DOCS: https://nightly.spacy.io/api/tokenizer#pipe """ for text in texts: yield self(text) @@ -529,7 +529,7 @@ cdef class Tokenizer: and `.end()` methods, denoting the placement of internal segment separators, e.g. hyphens. - DOCS: https://spacy.io/api/tokenizer#find_infix + DOCS: https://nightly.spacy.io/api/tokenizer#find_infix """ if self.infix_finditer is None: return 0 @@ -542,7 +542,7 @@ cdef class Tokenizer: string (str): The string to segment. RETURNS (int): The length of the prefix if present, otherwise `None`. - DOCS: https://spacy.io/api/tokenizer#find_prefix + DOCS: https://nightly.spacy.io/api/tokenizer#find_prefix """ if self.prefix_search is None: return 0 @@ -556,7 +556,7 @@ cdef class Tokenizer: string (str): The string to segment. Returns (int): The length of the suffix if present, otherwise `None`. - DOCS: https://spacy.io/api/tokenizer#find_suffix + DOCS: https://nightly.spacy.io/api/tokenizer#find_suffix """ if self.suffix_search is None: return 0 @@ -596,7 +596,7 @@ cdef class Tokenizer: a token and its attributes. The `ORTH` fields of the attributes must exactly match the string when they are concatenated. - DOCS: https://spacy.io/api/tokenizer#add_special_case + DOCS: https://nightly.spacy.io/api/tokenizer#add_special_case """ self._validate_special_case(string, substrings) substrings = list(substrings) @@ -635,7 +635,7 @@ cdef class Tokenizer: string (str): The string to tokenize. RETURNS (list): A list of (pattern_string, token_string) tuples - DOCS: https://spacy.io/api/tokenizer#explain + DOCS: https://nightly.spacy.io/api/tokenizer#explain """ prefix_search = self.prefix_search suffix_search = self.suffix_search @@ -716,7 +716,7 @@ cdef class Tokenizer: it doesn't exist. exclude (list): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/tokenizer#to_disk + DOCS: https://nightly.spacy.io/api/tokenizer#to_disk """ path = util.ensure_path(path) with path.open("wb") as file_: @@ -730,7 +730,7 @@ cdef class Tokenizer: exclude (list): String names of serialization fields to exclude. RETURNS (Tokenizer): The modified `Tokenizer` object. - DOCS: https://spacy.io/api/tokenizer#from_disk + DOCS: https://nightly.spacy.io/api/tokenizer#from_disk """ path = util.ensure_path(path) with path.open("rb") as file_: @@ -744,7 +744,7 @@ cdef class Tokenizer: exclude (list): String names of serialization fields to exclude. RETURNS (bytes): The serialized form of the `Tokenizer` object. - DOCS: https://spacy.io/api/tokenizer#to_bytes + DOCS: https://nightly.spacy.io/api/tokenizer#to_bytes """ serializers = { "vocab": lambda: self.vocab.to_bytes(), @@ -764,7 +764,7 @@ cdef class Tokenizer: exclude (list): String names of serialization fields to exclude. RETURNS (Tokenizer): The `Tokenizer` object. - DOCS: https://spacy.io/api/tokenizer#from_bytes + DOCS: https://nightly.spacy.io/api/tokenizer#from_bytes """ data = {} deserializers = { diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx index 8d57b791f..c5fac2299 100644 --- a/spacy/tokens/_retokenize.pyx +++ b/spacy/tokens/_retokenize.pyx @@ -24,8 +24,8 @@ from ..strings import get_string_id cdef class Retokenizer: """Helper class for doc.retokenize() context manager. - DOCS: https://spacy.io/api/doc#retokenize - USAGE: https://spacy.io/usage/linguistic-features#retokenization + DOCS: https://nightly.spacy.io/api/doc#retokenize + USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization """ cdef Doc doc cdef list merges @@ -47,7 +47,7 @@ cdef class Retokenizer: span (Span): The span to merge. attrs (dict): Attributes to set on the merged token. - DOCS: https://spacy.io/api/doc#retokenizer.merge + DOCS: https://nightly.spacy.io/api/doc#retokenizer.merge """ if (span.start, span.end) in self._spans_to_merge: return @@ -73,7 +73,7 @@ cdef class Retokenizer: attrs (dict): Attributes to set on all split tokens. Attribute names mapped to list of per-token attribute values. - DOCS: https://spacy.io/api/doc#retokenizer.split + DOCS: https://nightly.spacy.io/api/doc#retokenizer.split """ if ''.join(orths) != token.text: raise ValueError(Errors.E117.format(new=''.join(orths), old=token.text)) diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index a257c7919..cd8c81939 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -61,7 +61,7 @@ class DocBin: store_user_data (bool): Whether to include the `Doc.user_data`. docs (Iterable[Doc]): Docs to add. - DOCS: https://spacy.io/api/docbin#init + DOCS: https://nightly.spacy.io/api/docbin#init """ attrs = sorted([intify_attr(attr) for attr in attrs]) self.version = "0.1" @@ -86,7 +86,7 @@ class DocBin: doc (Doc): The Doc object to add. - DOCS: https://spacy.io/api/docbin#add + DOCS: https://nightly.spacy.io/api/docbin#add """ array = doc.to_array(self.attrs) if len(array.shape) == 1: @@ -115,7 +115,7 @@ class DocBin: vocab (Vocab): The shared vocab. YIELDS (Doc): The Doc objects. - DOCS: https://spacy.io/api/docbin#get_docs + DOCS: https://nightly.spacy.io/api/docbin#get_docs """ for string in self.strings: vocab[string] @@ -141,7 +141,7 @@ class DocBin: other (DocBin): The DocBin to merge into the current bin. - DOCS: https://spacy.io/api/docbin#merge + DOCS: https://nightly.spacy.io/api/docbin#merge """ if self.attrs != other.attrs: raise ValueError(Errors.E166.format(current=self.attrs, other=other.attrs)) @@ -158,7 +158,7 @@ class DocBin: RETURNS (bytes): The serialized DocBin. - DOCS: https://spacy.io/api/docbin#to_bytes + DOCS: https://nightly.spacy.io/api/docbin#to_bytes """ for tokens in self.tokens: assert len(tokens.shape) == 2, tokens.shape # this should never happen @@ -185,7 +185,7 @@ class DocBin: bytes_data (bytes): The data to load from. RETURNS (DocBin): The loaded DocBin. - DOCS: https://spacy.io/api/docbin#from_bytes + DOCS: https://nightly.spacy.io/api/docbin#from_bytes """ msg = srsly.msgpack_loads(zlib.decompress(bytes_data)) self.attrs = msg["attrs"] @@ -211,7 +211,7 @@ class DocBin: path (str / Path): The file path. - DOCS: https://spacy.io/api/docbin#to_disk + DOCS: https://nightly.spacy.io/api/docbin#to_disk """ path = ensure_path(path) with path.open("wb") as file_: @@ -223,7 +223,7 @@ class DocBin: path (str / Path): The file path. RETURNS (DocBin): The loaded DocBin. - DOCS: https://spacy.io/api/docbin#to_disk + DOCS: https://nightly.spacy.io/api/docbin#to_disk """ path = ensure_path(path) with path.open("rb") as file_: diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 3c7b4f8b3..29bdf85ab 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -104,7 +104,7 @@ cdef class Doc: >>> from spacy.tokens import Doc >>> doc = Doc(nlp.vocab, words=["hello", "world", "!"], spaces=[True, False, False]) - DOCS: https://spacy.io/api/doc + DOCS: https://nightly.spacy.io/api/doc """ @classmethod @@ -118,8 +118,8 @@ cdef class Doc: method (callable): Optional method for method extension. force (bool): Force overwriting existing attribute. - DOCS: https://spacy.io/api/doc#set_extension - USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes + DOCS: https://nightly.spacy.io/api/doc#set_extension + USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes """ if cls.has_extension(name) and not kwargs.get("force", False): raise ValueError(Errors.E090.format(name=name, obj="Doc")) @@ -132,7 +132,7 @@ cdef class Doc: name (str): Name of the extension. RETURNS (tuple): A `(default, method, getter, setter)` tuple. - DOCS: https://spacy.io/api/doc#get_extension + DOCS: https://nightly.spacy.io/api/doc#get_extension """ return Underscore.doc_extensions.get(name) @@ -143,7 +143,7 @@ cdef class Doc: name (str): Name of the extension. RETURNS (bool): Whether the extension has been registered. - DOCS: https://spacy.io/api/doc#has_extension + DOCS: https://nightly.spacy.io/api/doc#has_extension """ return name in Underscore.doc_extensions @@ -155,7 +155,7 @@ cdef class Doc: RETURNS (tuple): A `(default, method, getter, setter)` tuple of the removed extension. - DOCS: https://spacy.io/api/doc#remove_extension + DOCS: https://nightly.spacy.io/api/doc#remove_extension """ if not cls.has_extension(name): raise ValueError(Errors.E046.format(name=name)) @@ -173,7 +173,7 @@ cdef class Doc: it is not. If `None`, defaults to `[True]*len(words)` user_data (dict or None): Optional extra data to attach to the Doc. - DOCS: https://spacy.io/api/doc#init + DOCS: https://nightly.spacy.io/api/doc#init """ self.vocab = vocab size = max(20, (len(words) if words is not None else 0)) @@ -288,7 +288,7 @@ cdef class Doc: You can use negative indices and open-ended ranges, which have their normal Python semantics. - DOCS: https://spacy.io/api/doc#getitem + DOCS: https://nightly.spacy.io/api/doc#getitem """ if isinstance(i, slice): start, stop = normalize_slice(len(self), i.start, i.stop, i.step) @@ -305,7 +305,7 @@ cdef class Doc: than-Python speeds are required, you can instead access the annotations as a numpy array, or access the underlying C data directly from Cython. - DOCS: https://spacy.io/api/doc#iter + DOCS: https://nightly.spacy.io/api/doc#iter """ cdef int i for i in range(self.length): @@ -316,7 +316,7 @@ cdef class Doc: RETURNS (int): The number of tokens in the document. - DOCS: https://spacy.io/api/doc#len + DOCS: https://nightly.spacy.io/api/doc#len """ return self.length @@ -349,7 +349,7 @@ cdef class Doc: the span. RETURNS (Span): The newly constructed object. - DOCS: https://spacy.io/api/doc#char_span + DOCS: https://nightly.spacy.io/api/doc#char_span """ if not isinstance(label, int): label = self.vocab.strings.add(label) @@ -374,7 +374,7 @@ cdef class Doc: `Span`, `Token` and `Lexeme` objects. RETURNS (float): A scalar similarity score. Higher is more similar. - DOCS: https://spacy.io/api/doc#similarity + DOCS: https://nightly.spacy.io/api/doc#similarity """ if "similarity" in self.user_hooks: return self.user_hooks["similarity"](self, other) @@ -407,7 +407,7 @@ cdef class Doc: RETURNS (bool): Whether a word vector is associated with the object. - DOCS: https://spacy.io/api/doc#has_vector + DOCS: https://nightly.spacy.io/api/doc#has_vector """ if "has_vector" in self.user_hooks: return self.user_hooks["has_vector"](self) @@ -425,7 +425,7 @@ cdef class Doc: RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array representing the document's semantics. - DOCS: https://spacy.io/api/doc#vector + DOCS: https://nightly.spacy.io/api/doc#vector """ def __get__(self): if "vector" in self.user_hooks: @@ -453,7 +453,7 @@ cdef class Doc: RETURNS (float): The L2 norm of the vector representation. - DOCS: https://spacy.io/api/doc#vector_norm + DOCS: https://nightly.spacy.io/api/doc#vector_norm """ def __get__(self): if "vector_norm" in self.user_hooks: @@ -493,7 +493,7 @@ cdef class Doc: RETURNS (tuple): Entities in the document, one `Span` per entity. - DOCS: https://spacy.io/api/doc#ents + DOCS: https://nightly.spacy.io/api/doc#ents """ def __get__(self): cdef int i @@ -584,7 +584,7 @@ cdef class Doc: YIELDS (Span): Noun chunks in the document. - DOCS: https://spacy.io/api/doc#noun_chunks + DOCS: https://nightly.spacy.io/api/doc#noun_chunks """ # Accumulate the result before beginning to iterate over it. This @@ -609,7 +609,7 @@ cdef class Doc: YIELDS (Span): Sentences in the document. - DOCS: https://spacy.io/api/doc#sents + DOCS: https://nightly.spacy.io/api/doc#sents """ if not self.is_sentenced: raise ValueError(Errors.E030) @@ -722,7 +722,7 @@ cdef class Doc: attr_id (int): The attribute ID to key the counts. RETURNS (dict): A dictionary mapping attributes to integer counts. - DOCS: https://spacy.io/api/doc#count_by + DOCS: https://nightly.spacy.io/api/doc#count_by """ cdef int i cdef attr_t attr @@ -777,7 +777,7 @@ cdef class Doc: array (numpy.ndarray[ndim=2, dtype='int32']): The attribute values. RETURNS (Doc): Itself. - DOCS: https://spacy.io/api/doc#from_array + DOCS: https://nightly.spacy.io/api/doc#from_array """ # Handle scalar/list inputs of strings/ints for py_attr_ids # See also #3064 @@ -872,7 +872,7 @@ cdef class Doc: attrs (list): Optional list of attribute ID ints or attribute name strings. RETURNS (Doc): A doc that contains the concatenated docs, or None if no docs were given. - DOCS: https://spacy.io/api/doc#from_docs + DOCS: https://nightly.spacy.io/api/doc#from_docs """ if not docs: return None @@ -953,7 +953,7 @@ cdef class Doc: RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape (n, n), where n = len(self). - DOCS: https://spacy.io/api/doc#get_lca_matrix + DOCS: https://nightly.spacy.io/api/doc#get_lca_matrix """ return numpy.asarray(_get_lca_matrix(self, 0, len(self))) @@ -987,7 +987,7 @@ cdef class Doc: it doesn't exist. Paths may be either strings or Path-like objects. exclude (Iterable[str]): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/doc#to_disk + DOCS: https://nightly.spacy.io/api/doc#to_disk """ path = util.ensure_path(path) with path.open("wb") as file_: @@ -1002,7 +1002,7 @@ cdef class Doc: exclude (list): String names of serialization fields to exclude. RETURNS (Doc): The modified `Doc` object. - DOCS: https://spacy.io/api/doc#from_disk + DOCS: https://nightly.spacy.io/api/doc#from_disk """ path = util.ensure_path(path) with path.open("rb") as file_: @@ -1016,7 +1016,7 @@ cdef class Doc: RETURNS (bytes): A losslessly serialized copy of the `Doc`, including all annotations. - DOCS: https://spacy.io/api/doc#to_bytes + DOCS: https://nightly.spacy.io/api/doc#to_bytes """ return srsly.msgpack_dumps(self.to_dict(exclude=exclude)) @@ -1027,7 +1027,7 @@ cdef class Doc: exclude (list): String names of serialization fields to exclude. RETURNS (Doc): Itself. - DOCS: https://spacy.io/api/doc#from_bytes + DOCS: https://nightly.spacy.io/api/doc#from_bytes """ return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude) @@ -1038,7 +1038,7 @@ cdef class Doc: RETURNS (bytes): A losslessly serialized copy of the `Doc`, including all annotations. - DOCS: https://spacy.io/api/doc#to_bytes + DOCS: https://nightly.spacy.io/api/doc#to_bytes """ array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, NORM, ENT_KB_ID] if self.is_tagged: @@ -1086,7 +1086,7 @@ cdef class Doc: exclude (list): String names of serialization fields to exclude. RETURNS (Doc): Itself. - DOCS: https://spacy.io/api/doc#from_dict + DOCS: https://nightly.spacy.io/api/doc#from_dict """ if self.length != 0: raise ValueError(Errors.E033.format(length=self.length)) @@ -1166,8 +1166,8 @@ cdef class Doc: retokenization are invalidated, although they may accidentally continue to work. - DOCS: https://spacy.io/api/doc#retokenize - USAGE: https://spacy.io/usage/linguistic-features#retokenization + DOCS: https://nightly.spacy.io/api/doc#retokenize + USAGE: https://nightly.spacy.io/usage/linguistic-features#retokenization """ return Retokenizer(self) @@ -1202,7 +1202,7 @@ cdef class Doc: be added to an "_" key in the data, e.g. "_": {"foo": "bar"}. RETURNS (dict): The data in spaCy's JSON format. - DOCS: https://spacy.io/api/doc#to_json + DOCS: https://nightly.spacy.io/api/doc#to_json """ data = {"text": self.text} if self.is_nered: diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 15e6518d6..f06f3307d 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -27,7 +27,7 @@ from .underscore import Underscore, get_ext_args cdef class Span: """A slice from a Doc object. - DOCS: https://spacy.io/api/span + DOCS: https://nightly.spacy.io/api/span """ @classmethod def set_extension(cls, name, **kwargs): @@ -40,8 +40,8 @@ cdef class Span: method (callable): Optional method for method extension. force (bool): Force overwriting existing attribute. - DOCS: https://spacy.io/api/span#set_extension - USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes + DOCS: https://nightly.spacy.io/api/span#set_extension + USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes """ if cls.has_extension(name) and not kwargs.get("force", False): raise ValueError(Errors.E090.format(name=name, obj="Span")) @@ -54,7 +54,7 @@ cdef class Span: name (str): Name of the extension. RETURNS (tuple): A `(default, method, getter, setter)` tuple. - DOCS: https://spacy.io/api/span#get_extension + DOCS: https://nightly.spacy.io/api/span#get_extension """ return Underscore.span_extensions.get(name) @@ -65,7 +65,7 @@ cdef class Span: name (str): Name of the extension. RETURNS (bool): Whether the extension has been registered. - DOCS: https://spacy.io/api/span#has_extension + DOCS: https://nightly.spacy.io/api/span#has_extension """ return name in Underscore.span_extensions @@ -77,7 +77,7 @@ cdef class Span: RETURNS (tuple): A `(default, method, getter, setter)` tuple of the removed extension. - DOCS: https://spacy.io/api/span#remove_extension + DOCS: https://nightly.spacy.io/api/span#remove_extension """ if not cls.has_extension(name): raise ValueError(Errors.E046.format(name=name)) @@ -95,7 +95,7 @@ cdef class Span: vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span. - DOCS: https://spacy.io/api/span#init + DOCS: https://nightly.spacy.io/api/span#init """ if not (0 <= start <= end <= len(doc)): raise IndexError(Errors.E035.format(start=start, end=end, length=len(doc))) @@ -151,7 +151,7 @@ cdef class Span: RETURNS (int): The number of tokens in the span. - DOCS: https://spacy.io/api/span#len + DOCS: https://nightly.spacy.io/api/span#len """ self._recalculate_indices() if self.end < self.start: @@ -168,7 +168,7 @@ cdef class Span: the span to get. RETURNS (Token or Span): The token at `span[i]`. - DOCS: https://spacy.io/api/span#getitem + DOCS: https://nightly.spacy.io/api/span#getitem """ self._recalculate_indices() if isinstance(i, slice): @@ -189,7 +189,7 @@ cdef class Span: YIELDS (Token): A `Token` object. - DOCS: https://spacy.io/api/span#iter + DOCS: https://nightly.spacy.io/api/span#iter """ self._recalculate_indices() for i in range(self.start, self.end): @@ -210,7 +210,7 @@ cdef class Span: copy_user_data (bool): Whether or not to copy the original doc's user data. RETURNS (Doc): The `Doc` copy of the span. - DOCS: https://spacy.io/api/span#as_doc + DOCS: https://nightly.spacy.io/api/span#as_doc """ # TODO: make copy_user_data a keyword-only argument (Python 3 only) words = [t.text for t in self] @@ -292,7 +292,7 @@ cdef class Span: RETURNS (np.array[ndim=2, dtype=numpy.int32]): LCA matrix with shape (n, n), where n = len(self). - DOCS: https://spacy.io/api/span#get_lca_matrix + DOCS: https://nightly.spacy.io/api/span#get_lca_matrix """ return numpy.asarray(_get_lca_matrix(self.doc, self.start, self.end)) @@ -304,7 +304,7 @@ cdef class Span: `Span`, `Token` and `Lexeme` objects. RETURNS (float): A scalar similarity score. Higher is more similar. - DOCS: https://spacy.io/api/span#similarity + DOCS: https://nightly.spacy.io/api/span#similarity """ if "similarity" in self.doc.user_span_hooks: return self.doc.user_span_hooks["similarity"](self, other) @@ -400,7 +400,7 @@ cdef class Span: RETURNS (tuple): Entities in the span, one `Span` per entity. - DOCS: https://spacy.io/api/span#ents + DOCS: https://nightly.spacy.io/api/span#ents """ ents = [] for ent in self.doc.ents: @@ -415,7 +415,7 @@ cdef class Span: RETURNS (bool): Whether a word vector is associated with the object. - DOCS: https://spacy.io/api/span#has_vector + DOCS: https://nightly.spacy.io/api/span#has_vector """ if "has_vector" in self.doc.user_span_hooks: return self.doc.user_span_hooks["has_vector"](self) @@ -434,7 +434,7 @@ cdef class Span: RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array representing the span's semantics. - DOCS: https://spacy.io/api/span#vector + DOCS: https://nightly.spacy.io/api/span#vector """ if "vector" in self.doc.user_span_hooks: return self.doc.user_span_hooks["vector"](self) @@ -448,7 +448,7 @@ cdef class Span: RETURNS (float): The L2 norm of the vector representation. - DOCS: https://spacy.io/api/span#vector_norm + DOCS: https://nightly.spacy.io/api/span#vector_norm """ if "vector_norm" in self.doc.user_span_hooks: return self.doc.user_span_hooks["vector"](self) @@ -508,7 +508,7 @@ cdef class Span: YIELDS (Span): Base noun-phrase `Span` objects. - DOCS: https://spacy.io/api/span#noun_chunks + DOCS: https://nightly.spacy.io/api/span#noun_chunks """ if not self.doc.is_parsed: raise ValueError(Errors.E029) @@ -533,7 +533,7 @@ cdef class Span: RETURNS (Token): The root token. - DOCS: https://spacy.io/api/span#root + DOCS: https://nightly.spacy.io/api/span#root """ self._recalculate_indices() if "root" in self.doc.user_span_hooks: @@ -590,7 +590,7 @@ cdef class Span: RETURNS (tuple): A tuple of Token objects. - DOCS: https://spacy.io/api/span#lefts + DOCS: https://nightly.spacy.io/api/span#lefts """ return self.root.conjuncts @@ -601,7 +601,7 @@ cdef class Span: YIELDS (Token):A left-child of a token of the span. - DOCS: https://spacy.io/api/span#lefts + DOCS: https://nightly.spacy.io/api/span#lefts """ for token in reversed(self): # Reverse, so we get tokens in order for left in token.lefts: @@ -615,7 +615,7 @@ cdef class Span: YIELDS (Token): A right-child of a token of the span. - DOCS: https://spacy.io/api/span#rights + DOCS: https://nightly.spacy.io/api/span#rights """ for token in self: for right in token.rights: @@ -630,7 +630,7 @@ cdef class Span: RETURNS (int): The number of leftward immediate children of the span, in the syntactic dependency parse. - DOCS: https://spacy.io/api/span#n_lefts + DOCS: https://nightly.spacy.io/api/span#n_lefts """ return len(list(self.lefts)) @@ -642,7 +642,7 @@ cdef class Span: RETURNS (int): The number of rightward immediate children of the span, in the syntactic dependency parse. - DOCS: https://spacy.io/api/span#n_rights + DOCS: https://nightly.spacy.io/api/span#n_rights """ return len(list(self.rights)) @@ -652,7 +652,7 @@ cdef class Span: YIELDS (Token): A token within the span, or a descendant from it. - DOCS: https://spacy.io/api/span#subtree + DOCS: https://nightly.spacy.io/api/span#subtree """ for word in self.lefts: yield from word.subtree diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 8afde60ee..50f1c5da3 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -30,7 +30,7 @@ cdef class Token: """An individual token – i.e. a word, punctuation symbol, whitespace, etc. - DOCS: https://spacy.io/api/token + DOCS: https://nightly.spacy.io/api/token """ @classmethod def set_extension(cls, name, **kwargs): @@ -43,8 +43,8 @@ cdef class Token: method (callable): Optional method for method extension. force (bool): Force overwriting existing attribute. - DOCS: https://spacy.io/api/token#set_extension - USAGE: https://spacy.io/usage/processing-pipelines#custom-components-attributes + DOCS: https://nightly.spacy.io/api/token#set_extension + USAGE: https://nightly.spacy.io/usage/processing-pipelines#custom-components-attributes """ if cls.has_extension(name) and not kwargs.get("force", False): raise ValueError(Errors.E090.format(name=name, obj="Token")) @@ -57,7 +57,7 @@ cdef class Token: name (str): Name of the extension. RETURNS (tuple): A `(default, method, getter, setter)` tuple. - DOCS: https://spacy.io/api/token#get_extension + DOCS: https://nightly.spacy.io/api/token#get_extension """ return Underscore.token_extensions.get(name) @@ -68,7 +68,7 @@ cdef class Token: name (str): Name of the extension. RETURNS (bool): Whether the extension has been registered. - DOCS: https://spacy.io/api/token#has_extension + DOCS: https://nightly.spacy.io/api/token#has_extension """ return name in Underscore.token_extensions @@ -80,7 +80,7 @@ cdef class Token: RETURNS (tuple): A `(default, method, getter, setter)` tuple of the removed extension. - DOCS: https://spacy.io/api/token#remove_extension + DOCS: https://nightly.spacy.io/api/token#remove_extension """ if not cls.has_extension(name): raise ValueError(Errors.E046.format(name=name)) @@ -93,7 +93,7 @@ cdef class Token: doc (Doc): The parent document. offset (int): The index of the token within the document. - DOCS: https://spacy.io/api/token#init + DOCS: https://nightly.spacy.io/api/token#init """ self.vocab = vocab self.doc = doc @@ -108,7 +108,7 @@ cdef class Token: RETURNS (int): The number of unicode characters in the token. - DOCS: https://spacy.io/api/token#len + DOCS: https://nightly.spacy.io/api/token#len """ return self.c.lex.length @@ -171,7 +171,7 @@ cdef class Token: flag_id (int): The ID of the flag attribute. RETURNS (bool): Whether the flag is set. - DOCS: https://spacy.io/api/token#check_flag + DOCS: https://nightly.spacy.io/api/token#check_flag """ return Lexeme.c_check_flag(self.c.lex, flag_id) @@ -181,7 +181,7 @@ cdef class Token: i (int): The relative position of the token to get. Defaults to 1. RETURNS (Token): The token at position `self.doc[self.i+i]`. - DOCS: https://spacy.io/api/token#nbor + DOCS: https://nightly.spacy.io/api/token#nbor """ if self.i+i < 0 or (self.i+i >= len(self.doc)): raise IndexError(Errors.E042.format(i=self.i, j=i, length=len(self.doc))) @@ -195,7 +195,7 @@ cdef class Token: `Span`, `Token` and `Lexeme` objects. RETURNS (float): A scalar similarity score. Higher is more similar. - DOCS: https://spacy.io/api/token#similarity + DOCS: https://nightly.spacy.io/api/token#similarity """ if "similarity" in self.doc.user_token_hooks: return self.doc.user_token_hooks["similarity"](self, other) @@ -373,7 +373,7 @@ cdef class Token: RETURNS (bool): Whether a word vector is associated with the object. - DOCS: https://spacy.io/api/token#has_vector + DOCS: https://nightly.spacy.io/api/token#has_vector """ if "has_vector" in self.doc.user_token_hooks: return self.doc.user_token_hooks["has_vector"](self) @@ -388,7 +388,7 @@ cdef class Token: RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array representing the token's semantics. - DOCS: https://spacy.io/api/token#vector + DOCS: https://nightly.spacy.io/api/token#vector """ if "vector" in self.doc.user_token_hooks: return self.doc.user_token_hooks["vector"](self) @@ -403,7 +403,7 @@ cdef class Token: RETURNS (float): The L2 norm of the vector representation. - DOCS: https://spacy.io/api/token#vector_norm + DOCS: https://nightly.spacy.io/api/token#vector_norm """ if "vector_norm" in self.doc.user_token_hooks: return self.doc.user_token_hooks["vector_norm"](self) @@ -426,7 +426,7 @@ cdef class Token: RETURNS (int): The number of leftward immediate children of the word, in the syntactic dependency parse. - DOCS: https://spacy.io/api/token#n_lefts + DOCS: https://nightly.spacy.io/api/token#n_lefts """ return self.c.l_kids @@ -438,7 +438,7 @@ cdef class Token: RETURNS (int): The number of rightward immediate children of the word, in the syntactic dependency parse. - DOCS: https://spacy.io/api/token#n_rights + DOCS: https://nightly.spacy.io/api/token#n_rights """ return self.c.r_kids @@ -470,7 +470,7 @@ cdef class Token: RETURNS (bool / None): Whether the token starts a sentence. None if unknown. - DOCS: https://spacy.io/api/token#is_sent_start + DOCS: https://nightly.spacy.io/api/token#is_sent_start """ def __get__(self): if self.c.sent_start == 0: @@ -499,7 +499,7 @@ cdef class Token: RETURNS (bool / None): Whether the token ends a sentence. None if unknown. - DOCS: https://spacy.io/api/token#is_sent_end + DOCS: https://nightly.spacy.io/api/token#is_sent_end """ def __get__(self): if self.i + 1 == len(self.doc): @@ -521,7 +521,7 @@ cdef class Token: YIELDS (Token): A left-child of the token. - DOCS: https://spacy.io/api/token#lefts + DOCS: https://nightly.spacy.io/api/token#lefts """ cdef int nr_iter = 0 cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge) @@ -541,7 +541,7 @@ cdef class Token: YIELDS (Token): A right-child of the token. - DOCS: https://spacy.io/api/token#rights + DOCS: https://nightly.spacy.io/api/token#rights """ cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i) tokens = [] @@ -563,7 +563,7 @@ cdef class Token: YIELDS (Token): A child token such that `child.head==self`. - DOCS: https://spacy.io/api/token#children + DOCS: https://nightly.spacy.io/api/token#children """ yield from self.lefts yield from self.rights @@ -576,7 +576,7 @@ cdef class Token: YIELDS (Token): A descendent token such that `self.is_ancestor(descendent) or token == self`. - DOCS: https://spacy.io/api/token#subtree + DOCS: https://nightly.spacy.io/api/token#subtree """ for word in self.lefts: yield from word.subtree @@ -607,7 +607,7 @@ cdef class Token: YIELDS (Token): A sequence of ancestor tokens such that `ancestor.is_ancestor(self)`. - DOCS: https://spacy.io/api/token#ancestors + DOCS: https://nightly.spacy.io/api/token#ancestors """ cdef const TokenC* head_ptr = self.c # Guard against infinite loop, no token can have @@ -625,7 +625,7 @@ cdef class Token: descendant (Token): Another token. RETURNS (bool): Whether this token is the ancestor of the descendant. - DOCS: https://spacy.io/api/token#is_ancestor + DOCS: https://nightly.spacy.io/api/token#is_ancestor """ if self.doc is not descendant.doc: return False @@ -729,7 +729,7 @@ cdef class Token: RETURNS (tuple): The coordinated tokens. - DOCS: https://spacy.io/api/token#conjuncts + DOCS: https://nightly.spacy.io/api/token#conjuncts """ cdef Token word, child if "conjuncts" in self.doc.user_token_hooks: diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index bcea87e67..ae2508c87 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -44,7 +44,7 @@ cdef class Vectors: the table need to be assigned - so len(list(vectors.keys())) may be greater or smaller than vectors.shape[0]. - DOCS: https://spacy.io/api/vectors + DOCS: https://nightly.spacy.io/api/vectors """ cdef public object name cdef public object data @@ -59,7 +59,7 @@ cdef class Vectors: keys (iterable): A sequence of keys, aligned with the data. name (str): A name to identify the vectors table. - DOCS: https://spacy.io/api/vectors#init + DOCS: https://nightly.spacy.io/api/vectors#init """ self.name = name if data is None: @@ -83,7 +83,7 @@ cdef class Vectors: RETURNS (tuple): A `(rows, dims)` pair. - DOCS: https://spacy.io/api/vectors#shape + DOCS: https://nightly.spacy.io/api/vectors#shape """ return self.data.shape @@ -93,7 +93,7 @@ cdef class Vectors: RETURNS (int): The vector size. - DOCS: https://spacy.io/api/vectors#size + DOCS: https://nightly.spacy.io/api/vectors#size """ return self.data.shape[0] * self.data.shape[1] @@ -103,7 +103,7 @@ cdef class Vectors: RETURNS (bool): `True` if no slots are available for new keys. - DOCS: https://spacy.io/api/vectors#is_full + DOCS: https://nightly.spacy.io/api/vectors#is_full """ return self._unset.size() == 0 @@ -114,7 +114,7 @@ cdef class Vectors: RETURNS (int): The number of keys in the table. - DOCS: https://spacy.io/api/vectors#n_keys + DOCS: https://nightly.spacy.io/api/vectors#n_keys """ return len(self.key2row) @@ -127,7 +127,7 @@ cdef class Vectors: key (int): The key to get the vector for. RETURNS (ndarray): The vector for the key. - DOCS: https://spacy.io/api/vectors#getitem + DOCS: https://nightly.spacy.io/api/vectors#getitem """ i = self.key2row[key] if i is None: @@ -141,7 +141,7 @@ cdef class Vectors: key (int): The key to set the vector for. vector (ndarray): The vector to set. - DOCS: https://spacy.io/api/vectors#setitem + DOCS: https://nightly.spacy.io/api/vectors#setitem """ i = self.key2row[key] self.data[i] = vector @@ -153,7 +153,7 @@ cdef class Vectors: YIELDS (int): A key in the table. - DOCS: https://spacy.io/api/vectors#iter + DOCS: https://nightly.spacy.io/api/vectors#iter """ yield from self.key2row @@ -162,7 +162,7 @@ cdef class Vectors: RETURNS (int): The number of vectors in the data. - DOCS: https://spacy.io/api/vectors#len + DOCS: https://nightly.spacy.io/api/vectors#len """ return self.data.shape[0] @@ -172,7 +172,7 @@ cdef class Vectors: key (int): The key to check. RETURNS (bool): Whether the key has a vector entry. - DOCS: https://spacy.io/api/vectors#contains + DOCS: https://nightly.spacy.io/api/vectors#contains """ return key in self.key2row @@ -189,7 +189,7 @@ cdef class Vectors: inplace (bool): Reallocate the memory. RETURNS (list): The removed items as a list of `(key, row)` tuples. - DOCS: https://spacy.io/api/vectors#resize + DOCS: https://nightly.spacy.io/api/vectors#resize """ xp = get_array_module(self.data) if inplace: @@ -224,7 +224,7 @@ cdef class Vectors: YIELDS (ndarray): A vector in the table. - DOCS: https://spacy.io/api/vectors#values + DOCS: https://nightly.spacy.io/api/vectors#values """ for row, vector in enumerate(range(self.data.shape[0])): if not self._unset.count(row): @@ -235,7 +235,7 @@ cdef class Vectors: YIELDS (tuple): A key/vector pair. - DOCS: https://spacy.io/api/vectors#items + DOCS: https://nightly.spacy.io/api/vectors#items """ for key, row in self.key2row.items(): yield key, self.data[row] @@ -281,7 +281,7 @@ cdef class Vectors: row (int / None): The row number of a vector to map the key to. RETURNS (int): The row the vector was added to. - DOCS: https://spacy.io/api/vectors#add + DOCS: https://nightly.spacy.io/api/vectors#add """ # use int for all keys and rows in key2row for more efficient access # and serialization @@ -368,7 +368,7 @@ cdef class Vectors: path (str / Path): A path to a directory, which will be created if it doesn't exists. - DOCS: https://spacy.io/api/vectors#to_disk + DOCS: https://nightly.spacy.io/api/vectors#to_disk """ xp = get_array_module(self.data) if xp is numpy: @@ -396,7 +396,7 @@ cdef class Vectors: path (str / Path): Directory path, string or Path-like object. RETURNS (Vectors): The modified object. - DOCS: https://spacy.io/api/vectors#from_disk + DOCS: https://nightly.spacy.io/api/vectors#from_disk """ def load_key2row(path): if path.exists(): @@ -432,7 +432,7 @@ cdef class Vectors: exclude (list): String names of serialization fields to exclude. RETURNS (bytes): The serialized form of the `Vectors` object. - DOCS: https://spacy.io/api/vectors#to_bytes + DOCS: https://nightly.spacy.io/api/vectors#to_bytes """ def serialize_weights(): if hasattr(self.data, "to_bytes"): @@ -453,7 +453,7 @@ cdef class Vectors: exclude (list): String names of serialization fields to exclude. RETURNS (Vectors): The `Vectors` object. - DOCS: https://spacy.io/api/vectors#from_bytes + DOCS: https://nightly.spacy.io/api/vectors#from_bytes """ def deserialize_weights(b): if hasattr(self.data, "from_bytes"): diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 9e14f37d2..ef0847e54 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -54,7 +54,7 @@ cdef class Vocab: instance also provides access to the `StringStore`, and owns underlying C-data that is shared between `Doc` objects. - DOCS: https://spacy.io/api/vocab + DOCS: https://nightly.spacy.io/api/vocab """ def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None, oov_prob=-20., vectors_name=None, writing_system={}, @@ -117,7 +117,7 @@ cdef class Vocab: available bit will be chosen. RETURNS (int): The integer ID by which the flag value can be checked. - DOCS: https://spacy.io/api/vocab#add_flag + DOCS: https://nightly.spacy.io/api/vocab#add_flag """ if flag_id == -1: for bit in range(1, 64): @@ -201,7 +201,7 @@ cdef class Vocab: string (unicode): The ID string. RETURNS (bool) Whether the string has an entry in the vocabulary. - DOCS: https://spacy.io/api/vocab#contains + DOCS: https://nightly.spacy.io/api/vocab#contains """ cdef hash_t int_key if isinstance(key, bytes): @@ -218,7 +218,7 @@ cdef class Vocab: YIELDS (Lexeme): An entry in the vocabulary. - DOCS: https://spacy.io/api/vocab#iter + DOCS: https://nightly.spacy.io/api/vocab#iter """ cdef attr_t key cdef size_t addr @@ -241,7 +241,7 @@ cdef class Vocab: >>> apple = nlp.vocab.strings["apple"] >>> assert nlp.vocab[apple] == nlp.vocab[u"apple"] - DOCS: https://spacy.io/api/vocab#getitem + DOCS: https://nightly.spacy.io/api/vocab#getitem """ cdef attr_t orth if isinstance(id_or_string, unicode): @@ -309,7 +309,7 @@ cdef class Vocab: word was mapped to, and `score` the similarity score between the two words. - DOCS: https://spacy.io/api/vocab#prune_vectors + DOCS: https://nightly.spacy.io/api/vocab#prune_vectors """ xp = get_array_module(self.vectors.data) # Make prob negative so it sorts by rank ascending @@ -349,7 +349,7 @@ cdef class Vocab: and shape determined by the `vocab.vectors` instance. Usually, a numpy ndarray of shape (300,) and dtype float32. - DOCS: https://spacy.io/api/vocab#get_vector + DOCS: https://nightly.spacy.io/api/vocab#get_vector """ if isinstance(orth, str): orth = self.strings.add(orth) @@ -396,7 +396,7 @@ cdef class Vocab: orth (int / unicode): The word. vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set. - DOCS: https://spacy.io/api/vocab#set_vector + DOCS: https://nightly.spacy.io/api/vocab#set_vector """ if isinstance(orth, str): orth = self.strings.add(orth) @@ -418,7 +418,7 @@ cdef class Vocab: orth (int / unicode): The word. RETURNS (bool): Whether the word has a vector. - DOCS: https://spacy.io/api/vocab#has_vector + DOCS: https://nightly.spacy.io/api/vocab#has_vector """ if isinstance(orth, str): orth = self.strings.add(orth) @@ -431,7 +431,7 @@ cdef class Vocab: it doesn't exist. exclude (list): String names of serialization fields to exclude. - DOCS: https://spacy.io/api/vocab#to_disk + DOCS: https://nightly.spacy.io/api/vocab#to_disk """ path = util.ensure_path(path) if not path.exists(): @@ -452,7 +452,7 @@ cdef class Vocab: exclude (list): String names of serialization fields to exclude. RETURNS (Vocab): The modified `Vocab` object. - DOCS: https://spacy.io/api/vocab#to_disk + DOCS: https://nightly.spacy.io/api/vocab#to_disk """ path = util.ensure_path(path) getters = ["strings", "vectors"] @@ -477,7 +477,7 @@ cdef class Vocab: exclude (list): String names of serialization fields to exclude. RETURNS (bytes): The serialized form of the `Vocab` object. - DOCS: https://spacy.io/api/vocab#to_bytes + DOCS: https://nightly.spacy.io/api/vocab#to_bytes """ def deserialize_vectors(): if self.vectors is None: @@ -499,7 +499,7 @@ cdef class Vocab: exclude (list): String names of serialization fields to exclude. RETURNS (Vocab): The `Vocab` object. - DOCS: https://spacy.io/api/vocab#from_bytes + DOCS: https://nightly.spacy.io/api/vocab#from_bytes """ def serialize_vectors(b): if self.vectors is None: