From 64d2d27c5dbf8e5657187975d2c9627f30e108a2 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 22 Dec 2022 10:53:16 +0100
Subject: [PATCH 1/7] Add classifier for python 3.11 (#12013)

---
 setup.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.cfg b/setup.cfg
index cf6e6f84b..d290d706c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -22,6 +22,7 @@ classifiers =
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
     Topic :: Scientific/Engineering
 project_urls =
     Release notes = https://github.com/explosion/spaCy/releases

From 90896504a5dba1babac04a2b88662179409ae006 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 23 Dec 2022 12:44:07 +0100
Subject: [PATCH 2/7] Auto-format code with black (#12019)

Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
---
 spacy/cli/apply.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py
index 9d170bc95..f0df4e757 100644
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@@ -53,9 +53,7 @@ def _stream_jsonl(path: Path, field: str) -> Iterable[str]:
     """
     for entry in srsly.read_jsonl(path):
         if field not in entry:
-            msg.fail(
-                f"{path} does not contain the required '{field}' field.", exits=1
-            )
+            msg.fail(f"{path} does not contain the required '{field}' field.", exits=1)
         else:
             yield entry[field]
 
@@ -118,8 +116,10 @@ def apply(
     paths = walk_directory(data_path)
     if len(paths) == 0:
         docbin.to_disk(output_file)
-        msg.warn("Did not find data to process,"
-                 f" {data_path} seems to be an empty directory.")
+        msg.warn(
+            "Did not find data to process,"
+            f" {data_path} seems to be an empty directory."
+        )
         return
     nlp = load_model(model)
     msg.good(f"Loaded model {model}")

From aa2b471a6e289d1c1bb51558df779ae028671225 Mon Sep 17 00:00:00 2001
From: Madeesh Kannan <shadeMe@users.noreply.github.com>
Date: Fri, 23 Dec 2022 15:21:44 +0100
Subject: [PATCH 3/7] New console logger with expanded progress tracking
 (#11972)

* Add `ConsoleLogger.v3`

This addition expands the progress bar feature to count up the training/distillation steps to either the next evaluation pass or the maximum number of steps.

* Rename progress bar types

* Add defaults to docs
Minor fixes

* Move comment

* Minor punctuation fixes

* Explicitly check for `None` when validating progress bar type

Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
---
 spacy/errors.py               |  1 +
 spacy/training/loggers.py     | 48 ++++++++++++++++++++++++++++++++---
 website/docs/api/top-level.md | 34 ++++++++++++++++++++-----
 3 files changed, 74 insertions(+), 9 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 0e5ef91ed..cd9281e91 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -962,6 +962,7 @@ class Errors(metaclass=ErrorsWithCodes):
     E1046 = ("{cls_name} is an abstract class and cannot be instantiated. If you are looking for spaCy's default "
              "knowledge base, use `InMemoryLookupKB`.")
     E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.")
+    E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 408ea7140..7de31822e 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -26,6 +26,8 @@ def setup_table(
     return final_cols, final_widths, ["r" for _ in final_widths]
 
 
+# We cannot rename this method as it's directly imported
+# and used by external packages such as spacy-loggers.
 @registry.loggers("spacy.ConsoleLogger.v2")
 def console_logger(
     progress_bar: bool = False,
@@ -33,7 +35,27 @@ def console_logger(
     output_file: Optional[Union[str, Path]] = None,
 ):
     """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
-    progress_bar (bool): Whether the logger should print the progress bar.
+    progress_bar (bool): Whether the logger should print a progress bar tracking the steps till the next evaluation pass.
+    console_output (bool): Whether the logger should print the logs on the console.
+    output_file (Optional[Union[str, Path]]): The file to save the training logs to.
+    """
+    return console_logger_v3(
+        progress_bar=None if progress_bar is False else "eval",
+        console_output=console_output,
+        output_file=output_file,
+    )
+
+
+@registry.loggers("spacy.ConsoleLogger.v3")
+def console_logger_v3(
+    progress_bar: Optional[str] = None,
+    console_output: bool = True,
+    output_file: Optional[Union[str, Path]] = None,
+):
+    """The ConsoleLogger.v3 prints out training logs in the console and/or saves them to a jsonl file.
+    progress_bar (Optional[str]): Type of progress bar to show in the console. Allowed values:
+        train - Tracks the number of steps from the beginning of training until the full training run is complete (training.max_steps is reached).
+        eval - Tracks the number of steps between the previous and next evaluation (training.eval_frequency is reached).
     console_output (bool): Whether the logger should print the logs on the console.
     output_file (Optional[Union[str, Path]]): The file to save the training logs to.
     """
@@ -70,6 +92,7 @@ def console_logger(
             for name, proc in nlp.pipeline
             if hasattr(proc, "is_trainable") and proc.is_trainable
         ]
+        max_steps = nlp.config["training"]["max_steps"]
         eval_frequency = nlp.config["training"]["eval_frequency"]
         score_weights = nlp.config["training"]["score_weights"]
         score_cols = [col for col, value in score_weights.items() if value is not None]
@@ -84,6 +107,13 @@ def console_logger(
             write(msg.row(table_header, widths=table_widths, spacing=spacing))
             write(msg.row(["-" * width for width in table_widths], spacing=spacing))
         progress = None
+        expected_progress_types = ("train", "eval")
+        if progress_bar is not None and progress_bar not in expected_progress_types:
+            raise ValueError(
+                Errors.E1048.format(
+                    unexpected=progress_bar, expected=expected_progress_types
+                )
+            )
 
         def log_step(info: Optional[Dict[str, Any]]) -> None:
             nonlocal progress
@@ -141,11 +171,23 @@ def console_logger(
                     )
                 )
                 if progress_bar:
+                    if progress_bar == "train":
+                        total = max_steps
+                        desc = f"Last Eval Epoch: {info['epoch']}"
+                        initial = info["step"]
+                    else:
+                        total = eval_frequency
+                        desc = f"Epoch {info['epoch']+1}"
+                        initial = 0
                     # Set disable=None, so that it disables on non-TTY
                     progress = tqdm.tqdm(
-                        total=eval_frequency, disable=None, leave=False, file=stderr
+                        total=total,
+                        disable=None,
+                        leave=False,
+                        file=stderr,
+                        initial=initial,
                     )
-                    progress.set_description(f"Epoch {info['epoch']+1}")
+                    progress.set_description(desc)
 
         def finalize() -> None:
             if output_stream:
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 26a5d42f4..883c5e3b9 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -513,7 +513,7 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
 Instead of using one of the built-in loggers, you can
 [implement your own](/usage/training#custom-logging).
 
-#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
+#### spacy.ConsoleLogger.v2 {tag="registered function"}
 
 > #### Example config
 >
@@ -564,11 +564,33 @@ start decreasing across epochs.
 
  </Accordion>
 
-| Name             | Description                                                           |
-| ---------------- | --------------------------------------------------------------------- |
-| `progress_bar`   | Whether the logger should print the progress bar ~~bool~~             |
-| `console_output` | Whether the logger should print the logs on the console. ~~bool~~     |
-| `output_file`    | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
+| Name             | Description                                                                                                                  |
+| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
+| `progress_bar`   | Whether the logger should print a progress bar tracking the steps till the next evaluation pass (default: `False`). ~~bool~~ |
+| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~                                          |
+| `output_file`    | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~                                      |
+
+#### spacy.ConsoleLogger.v3 {#ConsoleLogger tag="registered function"}
+
+> #### Example config
+>
+> ```ini
+> [training.logger]
+> @loggers = "spacy.ConsoleLogger.v3"
+> progress_bar = "all_steps"
+> console_output = true
+> output_file = "training_log.jsonl"
+> ```
+
+Writes the results of a training step to the console in a tabular format and
+optionally saves them to a `jsonl` file.
+
+| Name             | Description                                                                                                                                               |
+| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `progress_bar`   | Type of progress bar to show in the console: `"train"`, `"eval"` or `None`.                                                                               |
+|                  | The bar tracks the number of steps until `training.max_steps` and `training.eval_frequency` are reached respectively (default: `None`). ~~Optional[str]~~ |
+| `console_output` | Whether the logger should print the logs in the console (default: `True`). ~~bool~~                                                                       |
+| `output_file`    | The file to save the training logs to (default: `None`). ~~Optional[Union[str, Path]]~~                                                                   |
 
 ## Readers {#readers}
 

From 933b54ac798a7d64f9cde4d85b55556e84e44bd6 Mon Sep 17 00:00:00 2001
From: kadarakos <kadar.akos@gmail.com>
Date: Mon, 26 Dec 2022 13:26:35 +0100
Subject: [PATCH 4/7] typo fix (#11995)

---
 spacy/pipeline/span_ruler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py
index 807a4ffe5..0e7e9ebf7 100644
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@@ -170,7 +170,7 @@ def prioritize_existing_ents_filter(
 
 
 @registry.misc("spacy.prioritize_existing_ents_filter.v1")
-def make_preverse_existing_ents_filter():
+def make_preserve_existing_ents_filter():
     return prioritize_existing_ents_filter
 
 

From ef9e504eacc806162666c964bd00d152fc15f9e3 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 29 Dec 2022 14:01:08 +0100
Subject: [PATCH 5/7] Rename modified textcat scorer to v2 (#11971)

As a follow-up to #11696, rename the modified scorer to v2 and move the
v1 scorer to `spacy-legacy`.
---
 requirements.txt                     |  2 +-
 setup.cfg                            |  2 +-
 spacy/pipeline/textcat.py            |  4 ++--
 spacy/tests/pipeline/test_textcat.py | 17 +++++++++++++++++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0440835f2..5bc1c8684 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 # Our libraries
-spacy-legacy>=3.0.10,<3.1.0
+spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
diff --git a/setup.cfg b/setup.cfg
index d290d706c..cee8c0c33 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -42,7 +42,7 @@ setup_requires =
     thinc>=8.1.0,<8.2.0
 install_requires =
     # Our libraries
-    spacy-legacy>=3.0.10,<3.1.0
+    spacy-legacy>=3.0.11,<3.1.0
     spacy-loggers>=1.0.0,<2.0.0
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 65121114d..650a01949 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -74,7 +74,7 @@ subword_features = true
     default_config={
         "threshold": 0.0,
         "model": DEFAULT_SINGLE_TEXTCAT_MODEL,
-        "scorer": {"@scorers": "spacy.textcat_scorer.v1"},
+        "scorer": {"@scorers": "spacy.textcat_scorer.v2"},
     },
     default_score_weights={
         "cats_score": 1.0,
@@ -117,7 +117,7 @@ def textcat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
     )
 
 
-@registry.scorers("spacy.textcat_scorer.v1")
+@registry.scorers("spacy.textcat_scorer.v2")
 def make_textcat_scorer():
     return textcat_score
 
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 155ce99a2..eafe4c128 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -895,3 +895,20 @@ def test_textcat_multi_threshold():
 
     scores = nlp.evaluate(train_examples, scorer_cfg={"threshold": 0})
     assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
+
+
+@pytest.mark.parametrize("component_name,scorer", [("textcat", "spacy.textcat_scorer.v1")])
+def test_textcat_legacy_scorers(component_name, scorer):
+    """Check that legacy scorers are registered and produce the expected score
+    keys."""
+    nlp = English()
+    nlp.add_pipe(component_name, config={"scorer": {"@scorers": scorer}})
+
+    train_examples = []
+    for text, annotations in TRAIN_DATA_SINGLE_LABEL:
+        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
+    nlp.initialize(get_examples=lambda: train_examples)
+
+    # score the model (it's not actually trained but that doesn't matter)
+    scores = nlp.evaluate(train_examples)
+    assert 0 <= scores["cats_score"] <= 1

From abb0ab109d33d2deaa6155a61fad649a25472f9c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 2 Jan 2023 11:59:57 +0100
Subject: [PATCH 6/7] Auto-format code with black (#12035)

Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
---
 spacy/tests/pipeline/test_textcat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index eafe4c128..048586cec 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -897,7 +897,9 @@ def test_textcat_multi_threshold():
     assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
 
 
-@pytest.mark.parametrize("component_name,scorer", [("textcat", "spacy.textcat_scorer.v1")])
+@pytest.mark.parametrize(
+    "component_name,scorer", [("textcat", "spacy.textcat_scorer.v1")]
+)
 def test_textcat_legacy_scorers(component_name, scorer):
     """Check that legacy scorers are registered and produce the expected score
     keys."""

From 31c1beba787446059de58a1478e6aec197fd0bbb Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Tue, 3 Jan 2023 15:03:59 +0700
Subject: [PATCH 7/7] Add spacy-pythainlp (#12038)

* Add spacy-pythainlp

* Move submission to right section

* Minor cleanup

* Remove extra list call

* Update universe.json

Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
---
 website/meta/universe.json | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index db533c3b2..99d121507 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -4062,6 +4062,33 @@
             "author_links": {
                 "github": "yasufumy"
             }
+        },
+        {
+            "id": "spacy-pythainlp",
+            "title": "spaCy-PyThaiNLP",
+            "slogan": "PyThaiNLP for spaCy",
+            "description": "This package wraps the PyThaiNLP library to add support for Thai to spaCy.",
+            "github": "PyThaiNLP/spaCy-PyThaiNLP",
+            "code_example": [
+                "import spacy",
+                "import spacy_pythainlp.core",
+                "",
+                "nlp = spacy.blank('th')",
+                "nlp.add_pipe('pythainlp')",
+                "doc = nlp('ผมเป็นคนไทย   แต่มะลิอยากไปโรงเรียนส่วนผมจะไปไหน  ผมอยากไปเที่ยว')",
+                "",
+                "print(list(doc.sents))",
+                "# output: [ผมเป็นคนไทย   แต่มะลิอยากไปโรงเรียนส่วนผมจะไปไหน  , ผมอยากไปเที่ยว]"
+            ],
+            "code_language": "python",
+            "author": "Wannaphong Phatthiyaphaibun",
+            "author_links": {
+                "twitter": "@wannaphong_p",
+                "github": "wannaphong",
+                "website": "https://iam.wannaphong.com/"
+            },
+            "category": ["pipeline", "research"],
+            "tags": ["Thai"]
         }
 
     ],