From 0e51c918ae2fbcaec875367e1d331e4366fdfe64 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 30 Jan 2023 17:51:27 +0100
Subject: [PATCH 1/2] Normalize whitespace in evaluate CLI output test (#12157)

* Normalize whitespace in evaluate CLI output test

Depending on terminal settings, lines may be padded to the screen width
so the comparison is too strict with only the command string replacement.

* Move to test util method

* Change to normalization method

From 02af17a5c8861e4fdc9790aa197e40b7b428e7b4 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 31 Jan 2023 16:52:06 +0100
Subject: [PATCH 2/2] Remove flaky assertions. (#12210)

---
 spacy/tests/test_cli.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 42ffae22d..dc7ce46fe 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1017,8 +1017,6 @@ def test_local_remote_storage_pull_missing():
 
 
 def test_cli_find_threshold(capsys):
-    thresholds = numpy.linspace(0, 1, 10)
-
     def make_examples(nlp: Language) -> List[Example]:
         docs: List[Example] = []
 
@@ -1082,8 +1080,6 @@ def test_cli_find_threshold(capsys):
                 scores_key="cats_macro_f",
                 silent=True,
             )
-            assert best_threshold != thresholds[0]
-            assert thresholds[0] < best_threshold < thresholds[9]
             assert best_score == max(res.values())
             assert res[1.0] == 0.0
 
@@ -1091,7 +1087,7 @@ def test_cli_find_threshold(capsys):
         nlp, _ = init_nlp((("spancat", {}),))
         with make_tempdir() as nlp_dir:
             nlp.to_disk(nlp_dir)
-            res = find_threshold(
+            best_threshold, best_score, res = find_threshold(
                 model=nlp_dir,
                 data_path=docs_dir / "docs.spacy",
                 pipe_name="spancat",
@@ -1099,10 +1095,8 @@ def test_cli_find_threshold(capsys):
                 scores_key="spans_sc_f",
                 silent=True,
             )
-            assert res[0] != thresholds[0]
-            assert thresholds[0] < res[0] < thresholds[8]
-            assert res[1] >= 0.6
-            assert res[2][1.0] == 0.0
+            assert best_score == max(res.values())
+            assert res[1.0] == 0.0
 
         # Having multiple textcat_multilabel components should work, since the name has to be specified.
         nlp, _ = init_nlp((("textcat_multilabel", {}),))