From 774c10fa39fb52ee23bd65faafc8eea0ad1f180e Mon Sep 17 00:00:00 2001
From: Simon Gurcke <simon@gurcke.de>
Date: Fri, 27 Jan 2023 20:43:40 +1000
Subject: [PATCH 1/8] Add alignment_mode argument to Span.char_span() (#12145)

* Add alignment_mode argument to Span.char_span()

* Update website

* Update spacy/tokens/span.pyx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Add test

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 spacy/tests/doc/test_span.py |  8 ++++++++
 spacy/tokens/span.pyi        |  1 +
 spacy/tokens/span.pyx        | 11 ++++++++---
 website/docs/api/span.mdx    | 17 +++++++++--------
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index 3676b35af..d02f305f4 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -367,6 +367,14 @@ def test_spans_by_character(doc):
             span1.start_char + 1, span1.end_char, label="GPE", alignment_mode="unk"
         )
 
+    # Span.char_span + alignment mode "contract"
+    span2 = doc[0:2].char_span(
+        span1.start_char - 3, span1.end_char, label="GPE", alignment_mode="contract"
+    )
+    assert span1.start_char == span2.start_char
+    assert span1.end_char == span2.end_char
+    assert span2.label_ == "GPE"
+
 
 def test_span_to_array(doc):
     span = doc[1:-2]
diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi
index 9986a90e6..00226098a 100644
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@@ -98,6 +98,7 @@ class Span:
         label: Union[int, str] = ...,
         kb_id: Union[int, str] = ...,
         vector: Optional[Floats1d] = ...,
+        alignment_mode: str = ...,
     ) -> Span: ...
     @property
     def conjuncts(self) -> Tuple[Token]: ...
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 99a5f43bd..2912dd705 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -362,7 +362,7 @@ cdef class Span:
         result = xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
         # ensure we get a scalar back (numpy does this automatically but cupy doesn't)
         return result.item()
-    
+
     cpdef np.ndarray to_array(self, object py_attr_ids):
         """Given a list of M attribute IDs, export the tokens to a numpy
         `ndarray` of shape `(N, M)`, where `N` is the length of the document.
@@ -639,7 +639,7 @@ cdef class Span:
         else:
             return self.doc[root]
 
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0):
+    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict"):
         """Create a `Span` object from the slice `span.text[start : end]`.
 
         start (int): The index of the first character of the span.
@@ -649,11 +649,16 @@ cdef class Span:
         kb_id (uint64 or string):  An ID from a KB to capture the meaning of a named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
+        alignment_mode (str): How character indices are aligned to token
+            boundaries. Options: "strict" (character indices must be aligned
+            with token boundaries), "contract" (span of all tokens completely
+            within the character span), "expand" (span of all tokens at least
+            partially covered by the character span). Defaults to "strict".
         RETURNS (Span): The newly constructed object.
         """
         start_idx += self.c.start_char
         end_idx += self.c.start_char
-        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector)
+        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode)
 
     @property
     def conjuncts(self):
diff --git a/website/docs/api/span.mdx b/website/docs/api/span.mdx
index bd7794edc..a135f5ec9 100644
--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@@ -186,14 +186,15 @@ the character indices don't map to a valid span.
 > assert span.text == "New York"
 > ```
 
-| Name        | Description                                                                               |
-| ----------- | ----------------------------------------------------------------------------------------- |
-| `start`     | The index of the first character of the span. ~~int~~                                     |
-| `end`       | The index of the last character after the span. ~~int~~                                   |
-| `label`     | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~               |
-| `kb_id`     | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
-| `vector`    | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~            |
-| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~                                |
+| Name                                            | Description                                                                                                                                                                                                                                                                  |
+| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `start`                                         | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
+| `end`                                           | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
+| `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
+| `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
+| `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
+| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
+| **RETURNS**                                     | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
 
 ## Span.similarity {id="similarity",tag="method",model="vectors"}
 

From 5f8a398bb9d12e65069442de28fe1b9036ff119f Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 27 Jan 2023 15:09:17 +0100
Subject: [PATCH 2/8] Add span_id to Span.char_span, update Doc/Span.char_span
 docs (#12196)

* Add span_id to Span.char_span, update Doc/Span.char_span docs

`Span.char_span(id=)` should be removed in the future.

* Also use Union[int, str] in Doc docstring
---
 spacy/tests/doc/test_span.py | 12 ++++++++++++
 spacy/tokens/doc.pyi         |  1 +
 spacy/tokens/doc.pyx         |  5 +++--
 spacy/tokens/span.pyi        |  2 ++
 spacy/tokens/span.pyx        | 10 ++++++----
 website/docs/api/doc.mdx     | 19 ++++++++++---------
 website/docs/api/span.mdx    |  2 ++
 7 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index d02f305f4..b4631037a 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -163,6 +163,18 @@ def test_char_span(doc, i_sent, i, j, text):
         assert span.text == text
 
 
+def test_char_span_attributes(doc):
+    label = "LABEL"
+    kb_id = "KB_ID"
+    span_id = "SPAN_ID"
+    span1 = doc.char_span(20, 45, label=label, kb_id=kb_id, span_id=span_id)
+    span2 = doc[1:].char_span(15, 40, label=label, kb_id=kb_id, span_id=span_id)
+    assert span1.text == span2.text
+    assert span1.label_ == span2.label_ == label
+    assert span1.kb_id_ == span2.kb_id_ == kb_id
+    assert span1.id_ == span2.id_ == span_id
+
+
 def test_spans_sent_spans(doc):
     sents = list(doc.sents)
     assert sents[0].start == 0
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index f0cdaee87..9d45960ab 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -108,6 +108,7 @@ class Doc:
         kb_id: Union[int, str] = ...,
         vector: Optional[Floats1d] = ...,
         alignment_mode: str = ...,
+        span_id: Union[int, str] = ...,
     ) -> Span: ...
     def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
     @property
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 075bc4d15..7dfe0ca9f 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -528,9 +528,9 @@ cdef class Doc:
         doc (Doc): The parent document.
         start_idx (int): The index of the first character of the span.
         end_idx (int): The index of the first character after the span.
-        label (uint64 or string): A label to attach to the Span, e.g. for
+        label (Union[int, str]): A label to attach to the Span, e.g. for
             named entities.
-        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a
+        kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a
             named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
@@ -539,6 +539,7 @@ cdef class Doc:
             with token boundaries), "contract" (span of all tokens completely
             within the character span), "expand" (span of all tokens at least
             partially covered by the character span). Defaults to "strict".
+        span_id (Union[int, str]): An identifier to associate with the span.
         RETURNS (Span): The newly constructed object.
 
         DOCS: https://spacy.io/api/doc#char_span
diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi
index 00226098a..a92f19e20 100644
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@@ -98,7 +98,9 @@ class Span:
         label: Union[int, str] = ...,
         kb_id: Union[int, str] = ...,
         vector: Optional[Floats1d] = ...,
+        id: Union[int, str] = ...,
         alignment_mode: str = ...,
+        span_id: Union[int, str] = ...,
     ) -> Span: ...
     @property
     def conjuncts(self) -> Tuple[Token]: ...
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 2912dd705..cfe1236df 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -639,26 +639,28 @@ cdef class Span:
         else:
             return self.doc[root]
 
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict"):
+    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict", span_id=0):
         """Create a `Span` object from the slice `span.text[start : end]`.
 
         start (int): The index of the first character of the span.
         end (int): The index of the first character after the span.
-        label (uint64 or string): A label to attach to the Span, e.g. for
+        label (Union[int, str]): A label to attach to the Span, e.g. for
             named entities.
-        kb_id (uint64 or string):  An ID from a KB to capture the meaning of a named entity.
+        kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a named entity.
         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
             the span.
+        id (Union[int, str]): Unused.
         alignment_mode (str): How character indices are aligned to token
             boundaries. Options: "strict" (character indices must be aligned
             with token boundaries), "contract" (span of all tokens completely
             within the character span), "expand" (span of all tokens at least
             partially covered by the character span). Defaults to "strict".
+        span_id (Union[int, str]): An identifier to associate with the span.
         RETURNS (Span): The newly constructed object.
         """
         start_idx += self.c.start_char
         end_idx += self.c.start_char
-        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode)
+        return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode, span_id=span_id)
 
     @property
     def conjuncts(self):
diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx
index a5f3de6be..13c59c4af 100644
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@@ -209,15 +209,16 @@ alignment mode `"strict".
 > assert span.text == "New York"
 > ```
 
-| Name             | Description                                                                                                                                                                                                                                                                  |
-| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `start`          | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
-| `end`            | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
-| `label`          | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
-| `kb_id`          | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
-| `vector`         | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
-| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
-| **RETURNS**      | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
+| Name                                     | Description                                                                                                                                                                                                                                                                  |
+| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `start`                                  | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
+| `end`                                    | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
+| `label`                                  | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
+| `kb_id`                                  | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
+| `vector`                                 | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
+| `alignment_mode`                         | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
+| `span_id` <Tag variant="new">3.3.1</Tag> | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                |
+| **RETURNS**                              | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
 
 ## Doc.set_ents {id="set_ents",tag="method",version="3"}
 
diff --git a/website/docs/api/span.mdx b/website/docs/api/span.mdx
index a135f5ec9..41422a5b4 100644
--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@@ -193,7 +193,9 @@ the character indices don't map to a valid span.
 | `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
 | `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
 | `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
+| `id`                                            | Unused. ~~Union[int, str]~~                                                                                                                                                                                                                                                  |
 | `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
+| `span_id` <Tag variant="new">3.5.1</Tag>        | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                |
 | **RETURNS**                                     | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   |
 
 ## Span.similarity {id="similarity",tag="method",model="vectors"}

From bd739e67d6e730d21a65c616917de24e148b5382 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Fri, 27 Jan 2023 15:13:20 +0100
Subject: [PATCH 3/8] explain KB change and how to remedy (#12189)

---
 website/docs/usage/v3-5.mdx | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/website/docs/usage/v3-5.mdx b/website/docs/usage/v3-5.mdx
index ac61338e3..3ca64f8a2 100644
--- a/website/docs/usage/v3-5.mdx
+++ b/website/docs/usage/v3-5.mdx
@@ -155,6 +155,21 @@ An error is now raised when unsupported values are given as input to train a
 `textcat` or `textcat_multilabel` model - ensure that values are `0.0` or `1.0`
 as explained in the [docs](/api/textcategorizer#assigned-attributes).
 
+### Using the default knowledge base
+
+As `KnowledgeBase` is now an abstract class, you should call the constructor of
+the new `InMemoryLookupKB` instead when you want to use spaCy's default KB
+implementation:
+
+```diff
+- kb = KnowledgeBase()
++ kb = InMemoryLookupKB()
+```
+
+If you've written a custom KB that inherits from `KnowledgeBase`, you'll need to
+implement its abstract methods, or alternatively inherit from `InMemoryLookupKB`
+instead.
+
 ### Updated scorers for tokenization and textcat {id="scores"}
 
 We fixed a bug that inflated the `token_acc` scores in v3.0-v3.4. The reported

From 606273f7e47678996cc2d93fe79c5b12f2de1ca5 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 27 Jan 2023 16:13:34 +0100
Subject: [PATCH 4/8] Normalize whitespace in evaluate CLI output test (#12157)

* Normalize whitespace in evaluate CLI output test

Depending on terminal settings, lines may be padded to the screen width
so the comparison is too strict with only the command string replacement.

* Move to test util method

* Change to normalization method
---
 spacy/tests/test_cli_app.py | 6 +++---
 spacy/tests/util.py         | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 80da5a447..40100412a 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -4,7 +4,7 @@ from typer.testing import CliRunner
 from spacy.tokens import DocBin, Doc
 
 from spacy.cli._util import app
-from .util import make_tempdir
+from .util import make_tempdir, normalize_whitespace
 
 
 def test_convert_auto():
@@ -38,8 +38,8 @@ def test_benchmark_accuracy_alias():
     # Verify that the `evaluate` alias works correctly.
     result_benchmark = CliRunner().invoke(app, ["benchmark", "accuracy", "--help"])
     result_evaluate = CliRunner().invoke(app, ["evaluate", "--help"])
-    assert result_benchmark.stdout == result_evaluate.stdout.replace(
-        "spacy evaluate", "spacy benchmark accuracy"
+    assert normalize_whitespace(result_benchmark.stdout) == normalize_whitespace(
+        result_evaluate.stdout.replace("spacy evaluate", "spacy benchmark accuracy")
     )
 
 
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index d5f3c39ff..c2647558d 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -1,6 +1,7 @@
 import numpy
 import tempfile
 import contextlib
+import re
 import srsly
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
@@ -95,3 +96,7 @@ def assert_packed_msg_equal(b1, b2):
     for (k1, v1), (k2, v2) in zip(sorted(msg1.items()), sorted(msg2.items())):
         assert k1 == k2
         assert v1 == v2
+
+
+def normalize_whitespace(s):
+    return re.sub(r"\s+", " ", s)

From 8932f4dc350ae49b02d6caee5e524e5f48345516 Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Mon, 30 Jan 2023 18:05:23 +0900
Subject: [PATCH 5/8] Add extra flag to assets docs (#12194)

* Add extra flag to assets docs

For some reason this wasn't included.

* Add new tag to docs
---
 website/docs/api/cli.mdx | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index f7315bb2c..bd966015e 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -1410,12 +1410,13 @@ $ python -m spacy project assets [project_dir]
 > $ python -m spacy project assets [--sparse]
 > ```
 
-| Name             | Description                                                                                                                                               |
-| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `project_dir`    | Path to project directory. Defaults to current working directory. ~~Path (positional)~~                                                                   |
-| `--sparse`, `-S` | Enable [sparse checkout](https://git-scm.com/docs/git-sparse-checkout) to only check out and download what's needed. Requires Git v22.2+. ~~bool (flag)~~ |
-| `--help`, `-h`   | Show help message and available arguments. ~~bool (flag)~~                                                                                                |
-| **CREATES**      | Downloaded or copied assets defined in the `project.yml`.                                                                                                 |
+| Name                                           | Description                                                                                                                                               |
+| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `project_dir`                                  | Path to project directory. Defaults to current working directory. ~~Path (positional)~~                                                                   |
+| `--extra`, `-e` <Tag variant="new">3.3.1</Tag> | Download assets marked as "extra". Default false. ~~bool (flag)~~                                                                                         |
+| `--sparse`, `-S`                               | Enable [sparse checkout](https://git-scm.com/docs/git-sparse-checkout) to only check out and download what's needed. Requires Git v22.2+. ~~bool (flag)~~ |
+| `--help`, `-h`                                 | Show help message and available arguments. ~~bool (flag)~~                                                                                                |
+| **CREATES**                                    | Downloaded or copied assets defined in the `project.yml`.                                                                                                 |
 
 ### project run {id="project-run",tag="command"}
 

From 0e51c918ae2fbcaec875367e1d331e4366fdfe64 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 30 Jan 2023 17:51:27 +0100
Subject: [PATCH 6/8] Normalize whitespace in evaluate CLI output test (#12157)

* Normalize whitespace in evaluate CLI output test

Depending on terminal settings, lines may be padded to the screen width
so the comparison is too strict with only the command string replacement.

* Move to test util method

* Change to normalization method

From 02af17a5c8861e4fdc9790aa197e40b7b428e7b4 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 31 Jan 2023 16:52:06 +0100
Subject: [PATCH 7/8] Remove flaky assertions. (#12210)

---
 spacy/tests/test_cli.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 42ffae22d..dc7ce46fe 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1017,8 +1017,6 @@ def test_local_remote_storage_pull_missing():
 
 
 def test_cli_find_threshold(capsys):
-    thresholds = numpy.linspace(0, 1, 10)
-
     def make_examples(nlp: Language) -> List[Example]:
         docs: List[Example] = []
 
@@ -1082,8 +1080,6 @@ def test_cli_find_threshold(capsys):
                 scores_key="cats_macro_f",
                 silent=True,
             )
-            assert best_threshold != thresholds[0]
-            assert thresholds[0] < best_threshold < thresholds[9]
             assert best_score == max(res.values())
             assert res[1.0] == 0.0
 
@@ -1091,7 +1087,7 @@ def test_cli_find_threshold(capsys):
         nlp, _ = init_nlp((("spancat", {}),))
         with make_tempdir() as nlp_dir:
             nlp.to_disk(nlp_dir)
-            res = find_threshold(
+            best_threshold, best_score, res = find_threshold(
                 model=nlp_dir,
                 data_path=docs_dir / "docs.spacy",
                 pipe_name="spancat",
@@ -1099,10 +1095,8 @@ def test_cli_find_threshold(capsys):
                 scores_key="spans_sc_f",
                 silent=True,
             )
-            assert res[0] != thresholds[0]
-            assert thresholds[0] < res[0] < thresholds[8]
-            assert res[1] >= 0.6
-            assert res[2][1.0] == 0.0
+            assert best_score == max(res.values())
+            assert res[1.0] == 0.0
 
         # Having multiple textcat_multilabel components should work, since the name has to be specified.
         nlp, _ = init_nlp((("textcat_multilabel", {}),))

From 4c60afb946f35e2675a5e21880ca3a09633d0bfa Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Wed, 1 Feb 2023 10:15:38 +0100
Subject: [PATCH 8/8] Backslash fixes in docs (#12213)

* backslash fixes

* revert unrelated change
---
 website/docs/api/doc.mdx      | 2 +-
 website/docs/models/index.mdx | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx
index 13c59c4af..0a5826500 100644
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@@ -37,7 +37,7 @@ Construct a `Doc` object. The most common way to get a `Doc` object is via the
 | `words`                                  | A list of strings or integer hash values to add to the document as words. ~~Optional[List[Union[str,int]]]~~                                                                                            |
 | `spaces`                                 | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~            |
 | _keyword-only_                           |                                                                                                                                                                                                         |
-| `user\_data`                             | Optional extra data to attach to the Doc. ~~Dict~~                                                                                                                                                      |
+| `user_data`                              | Optional extra data to attach to the Doc. ~~Dict~~                                                                                                                                                      |
 | `tags` <Tag variant="new">3</Tag>        | A list of strings, of the same length as `words`, to assign as `token.tag` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                   |
 | `pos` <Tag variant="new">3</Tag>         | A list of strings, of the same length as `words`, to assign as `token.pos` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                   |
 | `morphs` <Tag variant="new">3</Tag>      | A list of strings, of the same length as `words`, to assign as `token.morph` for each word. Defaults to `None`. ~~Optional[List[str]]~~                                                                 |
diff --git a/website/docs/models/index.mdx b/website/docs/models/index.mdx
index 371e4460f..366d44f0e 100644
--- a/website/docs/models/index.mdx
+++ b/website/docs/models/index.mdx
@@ -21,8 +21,8 @@ menu:
 ## Package naming conventions {id="conventions"}
 
 In general, spaCy expects all pipeline packages to follow the naming convention
-of `[lang]\_[name]`. For spaCy's pipelines, we also chose to divide the name
-into three components:
+of `[lang]_[name]`. For spaCy's pipelines, we also chose to divide the name into
+three components:
 
 1. **Type:** Capabilities (e.g. `core` for general-purpose pipeline with
    tagging, parsing, lemmatization and named entity recognition, or `dep` for