From f22fc7a1138545a2a75975909b5af554e8e1d616 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 9 Dec 2022 10:15:52 +0100
Subject: [PATCH 1/6] Auto-format code with black (#11955)

Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
---
 spacy/tests/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 3104b49ff..42af08749 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -140,7 +140,7 @@ def test_issue11235():
         assert os.path.exists(d / "cfg")
         assert os.path.exists(d / f"{lang_var}_model")
     assert cfg["commands"][0]["script"][0] == f"hello {lang_var}"
-    
+
 
 def test_cli_info():
     nlp = Dutch()

From 8c291ace0c0978e70257906438d3585022090e9f Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 12 Dec 2022 08:38:36 +0100
Subject: [PATCH 2/6] Extend to wasabi v1.1 (#11945)

* Extend to wasabi v1.1

* Temporarily run mypy and tests with newest wasabi

* Temporarily skip check requirements test

* Revert "Temporarily skip check requirements test"

This reverts commit 44f4ce20a8e8c92e8bfc8042cc68333589a96253.

* Revert "Temporarily run mypy and tests with newest wasabi"

This reverts commit e677a2257ced55e696cafc3a8e89eb2f7ddfc369.
---
 requirements.txt | 2 +-
 setup.cfg        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 778c05e21..0440835f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ preshed>=3.0.2,<3.1.0
 thinc>=8.1.0,<8.2.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
-wasabi>=0.9.1,<1.1.0
+wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<0.8.0
diff --git a/setup.cfg b/setup.cfg
index 5768c9d3e..cf6e6f84b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -47,7 +47,7 @@ install_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     thinc>=8.1.0,<8.2.0
-    wasabi>=0.9.1,<1.1.0
+    wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
     # Third-party dependencies

From 0591e67265d7378769c0fc0df4020817f2d514ec Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 12 Dec 2022 08:45:35 +0100
Subject: [PATCH 3/6] Cast to uint64 for all array-based doc representations
 (#11933)

* Convert all individual values explicitly to uint64 for array-based doc representations

* Temporarily test with latest numpy v1.24.0rc

* Remove unnecessary conversion from attr_t

* Reduce number of individual casts

* Convert specifically from int32 to uint64

* Revert "Temporarily test with latest numpy v1.24.0rc"

This reverts commit eb0e3c5006515b9a7ff52bae59484c909b8a3f65.

* Also use int32 in tests
---
 spacy/tests/doc/test_array.py |  4 ++--
 spacy/tokens/doc.pyx          |  2 ++
 spacy/tokens/span.pyx         |  4 ++--
 spacy/training/example.pyx    | 15 ++++++++-------
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index c334cc6eb..1f2d7d999 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -123,14 +123,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
 
     # head before start
     arr = doc.to_array(["HEAD"])
-    arr[0] = -1
+    arr[0] = numpy.int32(-1).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
 
     # head after end
     arr = doc.to_array(["HEAD"])
-    arr[0] = 5
+    arr[0] = numpy.int32(5).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index f2621292c..075bc4d15 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -359,6 +359,7 @@ cdef class Doc:
             for annot in annotations:
                 if annot:
                     if annot is heads or annot is sent_starts or annot is ent_iobs:
+                        annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64)
                         for i in range(len(words)):
                             if attrs.ndim == 1:
                                 attrs[i] = annot[i]
@@ -1558,6 +1559,7 @@ cdef class Doc:
 
             for j, (attr, annot) in enumerate(token_annotations.items()):
                 if attr is HEAD:
+                    annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64)
                     for i in range(len(words)):
                         array[i, j] = annot[i]
                 elif attr is MORPH:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index c3495f497..99a5f43bd 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -299,7 +299,7 @@ cdef class Span:
                     for ancestor in ancestors:
                         ancestor_i = ancestor.i - self.c.start
                         if ancestor_i in range(length):
-                            array[i, head_col] = ancestor_i - i
+                            array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
 
                 # if there is no appropriate ancestor, define a new artificial root
                 value = array[i, head_col]
@@ -307,7 +307,7 @@ cdef class Span:
                     new_root = old_to_new_root.get(ancestor_i, None)
                     if new_root is not None:
                         # take the same artificial root as a previous token from the same sentence
-                        array[i, head_col] = new_root - i
+                        array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
                     else:
                         # set this token as the new artificial root
                         array[i, head_col] = 0
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index dfd337b9e..95b0f0de9 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -443,26 +443,27 @@ def _annot2array(vocab, tok_annot, doc_annot):
         if key not in IDS:
             raise ValueError(Errors.E974.format(obj="token", key=key))
         elif key in ["ORTH", "SPACY"]:
-            pass
+            continue
         elif key == "HEAD":
             attrs.append(key)
-            values.append([h-i if h is not None else 0 for i, h in enumerate(value)])
+            row = [h-i if h is not None else 0 for i, h in enumerate(value)]
         elif key == "DEP":
             attrs.append(key)
-            values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value])
+            row = [vocab.strings.add(h) if h is not None else MISSING_DEP for h in value]
         elif key == "SENT_START":
             attrs.append(key)
-            values.append([to_ternary_int(v) for v in value])
+            row = [to_ternary_int(v) for v in value]
         elif key == "MORPH":
             attrs.append(key)
-            values.append([vocab.morphology.add(v) for v in value])
+            row = [vocab.morphology.add(v) for v in value]
         else:
             attrs.append(key)
             if not all(isinstance(v, str) for v in value):
                 types = set([type(v) for v in value])
                 raise TypeError(Errors.E969.format(field=key, types=types)) from None
-            values.append([vocab.strings.add(v) for v in value])
-    array = numpy.asarray(values, dtype="uint64")
+            row = [vocab.strings.add(v) for v in value]
+        values.append([numpy.array(v, dtype=numpy.int32).astype(numpy.uint64) if v < 0 else v for v in row])
+    array = numpy.array(values, dtype=numpy.uint64)
     return attrs, array.T
 
 

From e5c7f3b0776d49c4f6aab7e02b503cdb84fb2134 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 12 Dec 2022 10:13:10 +0100
Subject: [PATCH 4/6] CI: Install thinc-apple-ops through extra (#11963)

---
 .github/azure-steps.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 2f77706b8..d0db75f9a 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -107,7 +107,7 @@ steps:
     displayName: "Run CPU tests"
 
   - script: |
-      python -m pip install --pre thinc-apple-ops
+      python -m pip install 'spacy[apple]'
       python -m pytest --pyargs spacy
     displayName: "Run CPU tests with thinc-apple-ops"
     condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))

From c9d9d6847f9685c21eeec01f4b8cd053cadf8bf5 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 15 Dec 2022 10:55:01 +0100
Subject: [PATCH 5/6] Update build constraints for python 3.11 (#11981)

---
 build-constraints.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/build-constraints.txt b/build-constraints.txt
index 956973abf..c1e82f1b0 100644
--- a/build-constraints.txt
+++ b/build-constraints.txt
@@ -5,4 +5,5 @@ numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
 numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
 numpy==1.19.3; python_version=='3.9'
 numpy==1.21.3; python_version=='3.10'
-numpy; python_version>='3.11'
+numpy==1.23.2; python_version=='3.11'
+numpy; python_version>='3.12'

From 3a2b655a29203d1c181a2c14d230b3f9cf8dd54a Mon Sep 17 00:00:00 2001
From: cfuerbachersparks <119413757+cfuerbachersparks@users.noreply.github.com>
Date: Mon, 19 Dec 2022 10:33:38 +0100
Subject: [PATCH 6/6] Update lexeme.md (#11994)

Change suffix_ string to end
---
 website/docs/api/lexeme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/lexeme.md b/website/docs/api/lexeme.md
index eb76afa90..557d04cce 100644
--- a/website/docs/api/lexeme.md
+++ b/website/docs/api/lexeme.md
@@ -138,7 +138,7 @@ The L2 norm of the lexeme's vector representation.
 | `prefix`         | Length-N substring from the start of the word. Defaults to `N=1`. ~~int~~                                                                                                                                                                                            |
 | `prefix_`        | Length-N substring from the start of the word. Defaults to `N=1`. ~~str~~                                                                                                                                                                                            |
 | `suffix`         | Length-N substring from the end of the word. Defaults to `N=3`. ~~int~~                                                                                                                                                                                              |
-| `suffix_`        | Length-N substring from the start of the word. Defaults to `N=3`. ~~str~~                                                                                                                                                                                            |
+| `suffix_`        | Length-N substring from the end of the word. Defaults to `N=3`. ~~str~~                                                                                                                                                                                            |
 | `is_alpha`       | Does the lexeme consist of alphabetic characters? Equivalent to `lexeme.text.isalpha()`. ~~bool~~                                                                                                                                                                    |
 | `is_ascii`       | Does the lexeme consist of ASCII characters? Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`. ~~bool~~                                                                                                                                                     |
 | `is_digit`       | Does the lexeme consist of digits? Equivalent to `lexeme.text.isdigit()`. ~~bool~~                                                                                                                                                                                   |