diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py index 6d591053d..875978eee 100644 --- a/spacy/cli/find_threshold.py +++ b/spacy/cli/find_threshold.py @@ -39,7 +39,7 @@ def find_threshold_cli( # fmt: on ): """ - Runs prediction trials for a trained model with varying tresholds to maximize + Runs prediction trials for a trained model with varying thresholds to maximize the specified metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout` (the corresponding API call to `spacy.cli.find_threshold.find_threshold()` @@ -81,7 +81,7 @@ def find_threshold( silent: bool = True, ) -> Tuple[float, float, Dict[float, float]]: """ - Runs prediction trials for models with varying tresholds to maximize the specified metric. + Runs prediction trials for models with varying thresholds to maximize the specified metric. model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory. data_path (Path): Path to file with DocBin with docs to use for threshold search. pipe_name (str): Name of pipe to examine thresholds for. diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py index 236856dad..ce3fbb576 100644 --- a/spacy/tests/test_language.py +++ b/spacy/tests/test_language.py @@ -327,7 +327,7 @@ def test_language_pipe_error_handler(n_process): nlp.set_error_handler(raise_error) with pytest.raises(ValueError): list(nlp.pipe(texts, n_process=n_process)) - # set explicitely to ignoring + # set explicitly to ignoring nlp.set_error_handler(ignore_error) docs = list(nlp.pipe(texts, n_process=n_process)) assert len(docs) == 0 diff --git a/website/docs/api/attributes.mdx b/website/docs/api/attributes.mdx index 3142b741d..c2030fa33 100644 --- a/website/docs/api/attributes.mdx +++ b/website/docs/api/attributes.mdx @@ -49,7 +49,7 @@ appending `_` as in `token.dep_`. | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `DEP` | The token's dependency label. ~~str~~ | | `ENT_ID` | The token's entity ID (`ent_id`). ~~str~~ | -| `ENT_IOB` | The IOB part of the token's entity tag. Uses custom integer vaues rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ | +| `ENT_IOB` | The IOB part of the token's entity tag. Uses custom integer values rather than the string store: unset is `0`, `I` is `1`, `O` is `2`, and `B` is `3`. ~~str~~ | | `ENT_KB_ID` | The token's entity knowledge base ID. ~~str~~ | | `ENT_TYPE` | The token's entity label. ~~str~~ | | `IS_ALPHA` | Token text consists of alphabetic characters. ~~bool~~ | diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx index 5b4bca1ce..c3aac6ce2 100644 --- a/website/docs/api/cli.mdx +++ b/website/docs/api/cli.mdx @@ -521,7 +521,7 @@ New: 'ORG' (23860), 'PERSON' (21395), 'GPE' (21193), 'DATE' (18080), 'CARDINAL' 'LOC' (2113), 'TIME' (1616), 'WORK_OF_ART' (1229), 'QUANTITY' (1150), 'FAC' (1134), 'EVENT' (974), 'PRODUCT' (935), 'LAW' (444), 'LANGUAGE' (338) ✔ Good amount of examples for all labels -✔ Examples without occurences available for all labels +✔ Examples without occurrences available for all labels ✔ No entities consisting of or starting/ending with whitespace =========================== Part-of-speech Tagging =========================== @@ -1233,7 +1233,7 @@ $ python -m spacy apply [model] [data-path] [output-file] [--code] [--text-key] ## find-threshold {id="find-threshold",version="3.5",tag="command"} -Runs prediction trials for a trained model with varying tresholds to maximize +Runs prediction trials for a trained model with varying thresholds to maximize the specified metric. The search space for the threshold is traversed linearly from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout` (the corresponding API call to `spacy.cli.find_threshold.find_threshold()` diff --git a/website/docs/api/entitylinker.mdx b/website/docs/api/entitylinker.mdx index 21d2e9015..b57ecd85d 100644 --- a/website/docs/api/entitylinker.mdx +++ b/website/docs/api/entitylinker.mdx @@ -67,7 +67,7 @@ architectures and their arguments and hyperparameters. | `generate_empty_kb` 3.5.1 | Function that generates an empty `KnowledgeBase` object. Defaults to [`spacy.EmptyKB.v2`](/api/architectures#EmptyKB), which generates an empty [`InMemoryLookupKB`](/api/inmemorylookupkb). ~~Callable[[Vocab, int], KnowledgeBase]~~ | | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ | -| `threshold` 3.4 | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | +| `threshold` 3.4 | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the threshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | ```python %%GITHUB_SPACY/spacy/pipeline/entity_linker.py @@ -114,7 +114,7 @@ custom knowledge base, you should either call | `incl_context` | Whether or not to include the local context in the model. ~~bool~~ | | `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ | | `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ | -| `threshold` 3.4 | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | +| `threshold` 3.4 | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the threshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ | ## EntityLinker.\_\_call\_\_ {id="call",tag="method"} diff --git a/website/docs/api/entityruler.mdx b/website/docs/api/entityruler.mdx index 27624398e..58f66906d 100644 --- a/website/docs/api/entityruler.mdx +++ b/website/docs/api/entityruler.mdx @@ -173,7 +173,7 @@ happens automatically after the component has been added to the pipeline using [`nlp.add_pipe`](/api/language#add_pipe). If the entity ruler was initialized with `overwrite_ents=True`, existing entities will be replaced if they overlap with the matches. When matches overlap in a Doc, the entity ruler prioritizes -longer patterns over shorter, and if equal the match occuring first in the Doc +longer patterns over shorter, and if equal the match occurring first in the Doc is chosen. > #### Example diff --git a/website/docs/api/span.mdx b/website/docs/api/span.mdx index 41422a5b4..225ff6e6a 100644 --- a/website/docs/api/span.mdx +++ b/website/docs/api/span.mdx @@ -288,7 +288,7 @@ it – so no NP-level coordination, no prepositional phrases, and no relative clauses. If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data) -has not been implemeted for the given language, a `NotImplementedError` is +has not been implemented for the given language, a `NotImplementedError` is raised. > #### Example diff --git a/website/docs/api/transformer.mdx b/website/docs/api/transformer.mdx index ad8ecce54..d3a4457e1 100644 --- a/website/docs/api/transformer.mdx +++ b/website/docs/api/transformer.mdx @@ -405,7 +405,7 @@ by this class. Instances of this class are typically assigned to the | `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | | `width` | The width of the last hidden layer. ~~int~~ | -### TransformerData.empty {id="transformerdata-emoty",tag="classmethod"} +### TransformerData.empty {id="transformerdata-empty",tag="classmethod"} Create an empty `TransformerData` container. diff --git a/website/docs/api/vectors.mdx b/website/docs/api/vectors.mdx index d6033c096..fcb505501 100644 --- a/website/docs/api/vectors.mdx +++ b/website/docs/api/vectors.mdx @@ -440,7 +440,7 @@ Load state from a binary string. > #### Example > > ```python -> fron spacy.vectors import Vectors +> from spacy.vectors import Vectors > vectors_bytes = vectors.to_bytes() > new_vectors = Vectors(StringStore()) > new_vectors.from_bytes(vectors_bytes) diff --git a/website/docs/usage/layers-architectures.mdx b/website/docs/usage/layers-architectures.mdx index 8f6bf3a20..e1c5d2bf6 100644 --- a/website/docs/usage/layers-architectures.mdx +++ b/website/docs/usage/layers-architectures.mdx @@ -830,7 +830,7 @@ retrieve and add to them. After creation, the component needs to be [initialized](/usage/training#initialization). This method can define the -relevant labels in two ways: explicitely by setting the `labels` argument in the +relevant labels in two ways: explicitly by setting the `labels` argument in the [`initialize` block](/api/data-formats#config-initialize) of the config, or implicately by deducing them from the `get_examples` callback that generates the full **training data set**, or a representative sample. diff --git a/website/docs/usage/linguistic-features.mdx b/website/docs/usage/linguistic-features.mdx index 55d5680fe..f930174ab 100644 --- a/website/docs/usage/linguistic-features.mdx +++ b/website/docs/usage/linguistic-features.mdx @@ -1900,7 +1900,7 @@ the two words. "Shore": ("coast", 0.732257), "Precautionary": ("caution", 0.490973), "hopelessness": ("sadness", 0.742366), - "Continous": ("continuous", 0.732549), + "Continuous": ("continuous", 0.732549), "Disemboweled": ("corpse", 0.499432), "biostatistician": ("scientist", 0.339724), "somewheres": ("somewheres", 0.402736), diff --git a/website/docs/usage/projects.mdx b/website/docs/usage/projects.mdx index f3cca8013..c25a54ff5 100644 --- a/website/docs/usage/projects.mdx +++ b/website/docs/usage/projects.mdx @@ -173,7 +173,7 @@ detected, a corresponding warning is displayed. If you'd like to disable the dependency check, set `check_requirements: false` in your project's `project.yml`. -### 4. Run a workflow {id="run-workfow"} +### 4. Run a workflow {id="run-workflow"} > #### project.yml > @@ -286,7 +286,7 @@ pipelines. | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `title` | An optional project title used in `--help` message and [auto-generated docs](#custom-docs). | | `description` | An optional project description used in [auto-generated docs](#custom-docs). | -| `vars` | A dictionary of variables that can be referenced in paths, URLs and scripts and overriden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`. | +| `vars` | A dictionary of variables that can be referenced in paths, URLs and scripts and overridden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`. | | `env` | A dictionary of variables, mapped to the names of environment variables that will be read in when running the project. For example, `${env.name}` will use the value of the environment variable defined as `name`. | | `directories` | An optional list of [directories](#project-files) that should be created in the project for assets, training outputs, metrics etc. spaCy will make sure that these directories always exist. | | `assets` | A list of assets that can be fetched with the [`project assets`](/api/cli#project-assets) command. `url` defines a URL or local path, `dest` is the destination file relative to the project directory, and an optional `checksum` ensures that an error is raised if the file's checksum doesn't match. Instead of `url`, you can also provide a `git` block with the keys `repo`, `branch` and `path`, to download from a Git repo. | diff --git a/website/docs/usage/saving-loading.mdx b/website/docs/usage/saving-loading.mdx index aad8ea353..4dfc73eca 100644 --- a/website/docs/usage/saving-loading.mdx +++ b/website/docs/usage/saving-loading.mdx @@ -346,7 +346,7 @@ them**! To stick with the theme of [this entry points blog post](https://amir.rachum.com/blog/2017/07/28/python-entry-points/), consider the following custom spaCy -[pipeline component](/usage/processing-pipelines#custom-coponents) that prints a +[pipeline component](/usage/processing-pipelines#custom-components) that prints a snake when it's called: > #### Package directory structure diff --git a/website/docs/usage/v2-2.mdx b/website/docs/usage/v2-2.mdx index 84129657d..cf4f7c5bf 100644 --- a/website/docs/usage/v2-2.mdx +++ b/website/docs/usage/v2-2.mdx @@ -185,7 +185,7 @@ New: 'ORG' (23860), 'PERSON' (21395), 'GPE' (21193), 'DATE' (18080), 'CARDINAL' 'LOC' (2113), 'TIME' (1616), 'WORK_OF_ART' (1229), 'QUANTITY' (1150), 'FAC' (1134), 'EVENT' (974), 'PRODUCT' (935), 'LAW' (444), 'LANGUAGE' (338) ✔ Good amount of examples for all labels -✔ Examples without occurences available for all labels +✔ Examples without occurrences available for all labels ✔ No entities consisting of or starting/ending with whitespace =========================== Part-of-speech Tagging =========================== diff --git a/website/docs/usage/v3-2.mdx b/website/docs/usage/v3-2.mdx index b4a4ef672..b3ffd5d68 100644 --- a/website/docs/usage/v3-2.mdx +++ b/website/docs/usage/v3-2.mdx @@ -138,7 +138,7 @@ backwards compatibility, the tuple format remains available under `TransformerData.tensors` and `FullTransformerBatch.tensors`. See more details in the [transformer API docs](/api/architectures#TransformerModel). -`spacy-transfomers` v1.1 also adds support for `transformer_config` settings +`spacy-transformers` v1.1 also adds support for `transformer_config` settings such as `output_attentions`. Additional output is stored under `TransformerData.model_output`. More details are in the [TransformerModel docs](/api/architectures#TransformerModel). The training speed