From 8e6a3d58d8fa092eede0fe323441b2aaa3c2042e Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Wed, 19 Apr 2023 10:59:33 +0200
Subject: [PATCH 1/3] fix typo (#12543)

---
 spacy/cli/debug_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 97b4db285..2826cd084 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -337,7 +337,7 @@ def debug_data(
                 show=verbose,
             )
         else:
-            msg.good("Examples without ocurrences available for all labels")
+            msg.good("Examples without occurrences available for all labels")
 
     if "ner" in factory_names:
         # Get all unique NER labels present in the data

From dc0a1a98086ac038bf62221d0483b9933d5d0260 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 20 Apr 2023 11:30:34 +0200
Subject: [PATCH 2/3] Load exceptions last in Tokenizer.from_bytes (#12553)

In `Tokenizer.from_bytes`, the exceptions should be loaded last so that
they are only processed once as part of loading the model.

The exceptions are tokenized as phrase matcher patterns in the
background and the internal tokenization needs to be synced with all the
remaining tokenizer settings. If the exceptions are not loaded last,
there are speed regressions for `Tokenizer.from_bytes/disk` vs.
`Tokenizer.add_special_case` as the caches are reloaded more than
necessary during deserialization.
---
 spacy/tokenizer.pyx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 0e75b5f7a..a4a68ae8e 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -834,10 +834,12 @@ cdef class Tokenizer:
             self.token_match = re.compile(data["token_match"]).match
         if "url_match" in data and isinstance(data["url_match"], str):
             self.url_match = re.compile(data["url_match"]).match
-        if "rules" in data and isinstance(data["rules"], dict):
-            self.rules = data["rules"]
         if "faster_heuristics" in data:
             self.faster_heuristics = data["faster_heuristics"]
+        # always load rules last so that all other settings are set before the
+        # internal tokenization for the phrase matcher
+        if "rules" in data and isinstance(data["rules"], dict):
+            self.rules = data["rules"]
         return self
 
 

From b60b027927d734db627cf12b040fb75d9cb8894a Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 20 Apr 2023 14:06:32 +0200
Subject: [PATCH 3/3] Add default option to MorphAnalysis.get (#12545)

* Add default to MorphAnalysis.get

Similar to `dict`, allow a `default` option for `MorphAnalysis.get` for
the user to provide a default return value if the field is not found.
The default return value remains `[]`, which is not the same as
`dict.get`, but is already established as this method's default return
value with the return type `List[str]`. However the new `default` option
does not enforce that the user-provided default is actually `List[str]`.

* Restore test case
---
 spacy/tests/doc/test_morphanalysis.py | 2 ++
 spacy/tokens/morphanalysis.pyi        | 4 ++--
 spacy/tokens/morphanalysis.pyx        | 6 +++++-
 website/docs/api/morphology.mdx       | 9 +++++----
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py
index 918d4acdc..49e32b936 100644
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@@ -33,6 +33,8 @@ def test_token_morph_key(i_has):
 def test_morph_props(i_has):
     assert i_has[0].morph.get("PronType") == ["prs"]
     assert i_has[1].morph.get("PronType") == []
+    assert i_has[1].morph.get("AsdfType", ["asdf"]) == ["asdf"]
+    assert i_has[1].morph.get("AsdfType", default=["asdf", "qwer"]) == ["asdf", "qwer"]
 
 
 def test_morph_iter(i_has):
diff --git a/spacy/tokens/morphanalysis.pyi b/spacy/tokens/morphanalysis.pyi
index b86203cc4..a5376e80d 100644
--- a/spacy/tokens/morphanalysis.pyi
+++ b/spacy/tokens/morphanalysis.pyi
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Iterator, List, Union
+from typing import Any, Dict, Iterator, List, Optional, Union
 from ..vocab import Vocab
 
 class MorphAnalysis:
@@ -13,7 +13,7 @@ class MorphAnalysis:
     def __hash__(self) -> int: ...
     def __eq__(self, other: MorphAnalysis) -> bool: ...  # type: ignore[override]
     def __ne__(self, other: MorphAnalysis) -> bool: ...  # type: ignore[override]
-    def get(self, field: Any) -> List[str]: ...
+    def get(self, field: Any, default: Optional[List[str]]) -> List[str]: ...
     def to_json(self) -> str: ...
     def to_dict(self) -> Dict[str, str]: ...
     def __str__(self) -> str: ...
diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx
index a7d1f2e44..baa3800a1 100644
--- a/spacy/tokens/morphanalysis.pyx
+++ b/spacy/tokens/morphanalysis.pyx
@@ -58,10 +58,14 @@ cdef class MorphAnalysis:
     def __ne__(self, other):
         return self.key != other.key
 
-    def get(self, field):
+    def get(self, field, default=None):
         """Retrieve feature values by field."""
         cdef attr_t field_id = self.vocab.strings.as_int(field)
         cdef np.ndarray results = get_by_field(&self.c, field_id)
+        if len(results) == 0:
+            if default is None:
+                default = []
+            return default
         features = [self.vocab.strings[result] for result in results]
         return [f.split(Morphology.FIELD_SEP)[1] for f in features]
 
diff --git a/website/docs/api/morphology.mdx b/website/docs/api/morphology.mdx
index 68d80b814..5d4affafe 100644
--- a/website/docs/api/morphology.mdx
+++ b/website/docs/api/morphology.mdx
@@ -213,10 +213,11 @@ Retrieve values for a feature by field.
 > assert morph.get("Feat1") == ["Val1", "Val2"]
 > ```
 
-| Name        | Description                                      |
-| ----------- | ------------------------------------------------ |
-| `field`     | The field to retrieve. ~~str~~                   |
-| **RETURNS** | A list of the individual features. ~~List[str]~~ |
+| Name                                   | Description                                                                                                                    |
+| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
+| `field`                                | The field to retrieve. ~~str~~                                                                                                 |
+| `default` <Tag variant="new">3.6</Tag> | The value to return if the field is not present. If unset or `None`, the default return value is `[]`. ~~Optional[List[str]]~~ |
+| **RETURNS**                            | A list of the individual features. ~~List[str]~~                                                                               |
 
 ### MorphAnalysis.to_dict {id="morphanalysis-to_dict",tag="method"}