DOC: Correct grammar issues regarding a/an usage

2026-01-09 18:21:14 +03:00 · 2026-01-04 18:43:37 -08:00 · 2026-01-04 18:43:37 -08:00 · 9ae2498c94
commit 9ae2498c94
parent c1e7cb2ebf
10 changed files with 13 additions and 13 deletions
--- a/extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt
+++ b/extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt
@ -230,7 +230,7 @@ subject to and limited by the following restrictions:
    (e.g., "French translation of the Work by Original Author," or
    "Screenplay based on original Work by Original Author"). The credit
    required by this Section 4(c) may be implemented in any reasonable
-    manner; provided, however, that in the case of a Adaptation or
+    manner; provided, however, that in the case of an Adaptation or
    Collection, at a minimum such credit will appear, if a credit for all
    contributing authors of the Adaptation or Collection appears, then as
    part of these credits and in a manner at least as prominent as the
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -38,7 +38,7 @@ def evaluate_cli(
    predictions. Gold preprocessing helps the annotations align to the
    tokenization, and may result in sequences of more consistent length. However,
    it may reduce runtime accuracy due to train/test skew. To render a sample of
-    dependency parses in a HTML file, set as output directory as the
+    dependency parses in an HTML file, set as output directory as the
    displacy_path argument.

    DOCS: https://spacy.io/api/cli#benchmark-accuracy
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@ -377,7 +377,7 @@ cdef class DependencyMatcher:

    def _resolve_node_operator(self, cache, doc, node, operator): 
        """
-        Given a doc, a node (as a index in the doc) and a REL_OP operator
+        Given a doc, a node (as an index in the doc) and a REL_OP operator
        returns the list of nodes from the doc that belong to node+operator. 
        """
        key = (node, operator)
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -216,7 +216,7 @@ def CharacterEmbed(
    ensures that the final character is always in the last position, instead
    of being in an arbitrary position depending on the word length.

-    The characters are embedded in a embedding table with a given number of rows,
+    The characters are embedded in an embedding table with a given number of rows,
    and the vectors concatenated. A hash-embedded vector of the LOWER of the word is
    also concatenated on, and the result is then passed through a feed-forward
    network to construct a single vector to represent the information.
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@ -194,7 +194,7 @@ characters would be `"jumpping"`: 4 from the start, 4 from the end. This ensures
 that the final character is always in the last position, instead of being in an
 arbitrary position depending on the word length.

-The characters are embedded in a embedding table with a given number of rows,
+The characters are embedded in an embedding table with a given number of rows,
 and the vectors concatenated. A hash-embedded vector of the `NORM` of the word
 is also concatenated on, and the result is then passed through a feed-forward
 network to construct a single vector to represent the information.
@ -599,7 +599,7 @@ Construct a RoBERTa transformer model.

 ### spacy-curated-transformers.XlmrTransformer.v1

-Construct a XLM-RoBERTa transformer model.
+Construct an XLM-RoBERTa transformer model.

 | Name                           | Description                                                                              |
 | ------------------------------ | ---------------------------------------------------------------------------------------- |
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@ -1238,7 +1238,7 @@ Evaluate the accuracy of a trained pipeline. Expects a loadable spaCy pipeline
 sentences and tokens for the predictions. Gold preprocessing helps the
 annotations align to the tokenization, and may result in sequences of more
 consistent length. However, it may reduce runtime accuracy due to train/test
-skew. To render a sample of dependency parses in a HTML file using the
+skew. To render a sample of dependency parses in an HTML file using the
 [displaCy visualizations](/usage/visualizers), set as output directory as the
 `--displacy-path` argument.

--- a/website/docs/api/curatedtransformer.mdx
+++ b/website/docs/api/curatedtransformer.mdx
@ -537,7 +537,7 @@ Construct a callback that initializes a character piece encoder model.
 | ----------- | --------------------------------------------------------------------------- |
 | `path`      | Path to the serialized character model. ~~Path~~                            |
 | `bos_piece` | Piece used as a beginning-of-sentence token. Defaults to `"[BOS]"`. ~~str~~ |
-| `eos_piece` | Piece used as a end-of-sentence token. Defaults to `"[EOS]"`. ~~str~~       |
+| `eos_piece` | Piece used as an end-of-sentence token. Defaults to `"[EOS]"`. ~~str~~      |
 | `unk_piece` | Piece used as a stand-in for unknown tokens. Defaults to `"[UNK]"`. ~~str~~ |
 | `normalize` | Unicode normalization form to use. Defaults to `"NFKC"`. ~~str~~            |

--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@ -80,7 +80,7 @@ architectures and their arguments and hyperparameters.
 | Setting                                             | Description                                                                                                                                                                                                                                                                                             |
 | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                  |
-| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
+| `model`                                             | A model instance that is given a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
 | `spans_key`                                         | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                                  |
 | `threshold`                                         | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Meant to be used in combination with the multi-class `spancat` component with a `Logistic` scoring layer. Defaults to `0.5`. ~~float~~                                                |
 | `max_positive`                                      | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. Meant to be used together with the `spancat` component and defaults to 0 with `spancat_singlelabel`. ~~Optional[int]~~                                                                                 |
@ -133,7 +133,7 @@ shortcut for this and instantiate the component using its string name and
 | Name                                                | Description                                                                                                                                                                                                                                                                                  |
 | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `vocab`                                             | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                             |
-| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~                                                         |
+| `model`                                             | A model instance that is given a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~                                                         |
 | `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                                                          |
 | `name`                                              | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                          |
 | _keyword-only_                                      |                                                                                                                                                                                                                                                                                              |
--- a/website/docs/usage/v2-1.mdx
+++ b/website/docs/usage/v2-1.mdx
@ -294,7 +294,7 @@ if all of your models are up to date, you can run the
  ```

 - The [`spacy init-model`](/api/cli#init-model) command now uses a `--jsonl-loc`
-  argument to pass in a a newline-delimited JSON (JSONL) file containing one
+  argument to pass in a newline-delimited JSON (JSONL) file containing one
  lexical entry per line instead of a separate `--freqs-loc` and
  `--clusters-loc`.

--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1192,7 +1192,7 @@
            "tags": [
                "AWS"
            ],
-            "slogan": "spaCy as a AWS Lambda Layer",
+            "slogan": "spaCy as an AWS Lambda Layer",
            "description": "A collection of Python Packages as AWS Lambda(λ) Layers",
            "github": "keithrozario/Klayers",
            "pip": "",
@ -3647,7 +3647,7 @@
            "id": "spacysetfit",
            "title": "spaCy-SetFit",
            "slogan": "An an easy and intuitive approach to use SetFit in combination with spaCy.",
-            "description": "spaCy-SetFit is a Python library that extends spaCy's text categorization capabilities by incorporating SetFit for few-shot classification. It allows you to train a text categorizer using a intuitive dictionary. \n\nThe library integrates with spaCy's pipeline architecture, enabling easy integration and configuration of the text categorizer component. You can provide a training dataset containing inlier and outlier examples, and spaCy-SetFit will use the paraphrase-MiniLM-L3-v2 model for training the text categorizer with SetFit. Once trained, you can use the categorizer to classify new text and obtain category probabilities.",
+            "description": "spaCy-SetFit is a Python library that extends spaCy's text categorization capabilities by incorporating SetFit for few-shot classification. It allows you to train a text categorizer using an intuitive dictionary. \n\nThe library integrates with spaCy's pipeline architecture, enabling easy integration and configuration of the text categorizer component. You can provide a training dataset containing inlier and outlier examples, and spaCy-SetFit will use the paraphrase-MiniLM-L3-v2 model for training the text categorizer with SetFit. Once trained, you can use the categorizer to classify new text and obtain category probabilities.",
            "github": "davidberenstein1957/spacy-setfit",
            "pip": "spacy-setfit",
            "thumb": "https://raw.githubusercontent.com/davidberenstein1957/spacy-setfit/main/logo.png",