From 59b0b26cc6a973e528bd190e8f19397ceab81ee6 Mon Sep 17 00:00:00 2001
From: richardpaulhudson <richard@explosion.ai>
Date: Fri, 9 Dec 2022 21:06:16 +0100
Subject: [PATCH] Changes based on review comments

---
 spacy/ml/models/lemmatizer.py          |  5 ++++-
 spacy/pipeline/edit_tree_lemmatizer.py | 13 ++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/spacy/ml/models/lemmatizer.py b/spacy/ml/models/lemmatizer.py
index c42e0756e..f537983b2 100644
--- a/spacy/ml/models/lemmatizer.py
+++ b/spacy/ml/models/lemmatizer.py
@@ -13,6 +13,7 @@ def build_lemmatizer_model(
     nO: Optional[int] = None,
     normalize=False,
     lowercasing=True,
+    lowercasing_relu_width: Optional[int] = 50,
 ) -> Model[List[Doc], Union[List[Floats2d]]]:
     """Build a model for the edit-tree lemmatizer, using a provided token-to-vector component.
     A linear layer with softmax activation is added to predict scores
@@ -38,7 +39,9 @@ def build_lemmatizer_model(
         model = tok2vec >> with_array(softmax)
         if lowercasing:
             lowercasing_output = Sigmoid(1)
-            sigmoid_appendage = Relu(50) >> Dropout(0.2) >> lowercasing_output
+            sigmoid_appendage = (
+                Relu(lowercasing_relu_width) >> Dropout() >> lowercasing_output
+            )
             model |= tok2vec >> with_array(sigmoid_appendage)
             model.set_ref("lowercasing_output", lowercasing_output)
         return model
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index b19511790..58fc1ad87 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -25,6 +25,7 @@ default_model_config = """
 [model]
 @architectures = "spacy.Lemmatizer.v1"
 lowercasing = true
+lowercasing_relu_width = 50
 
 [model.tok2vec]
 @architectures = "spacy.HashEmbedCNN.v2"
@@ -204,7 +205,7 @@ class EditTreeLemmatizer(TrainablePipe):
         self, docs, scores, lowercasing_flags: Optional[List[Floats2d]]
     ):
         guesses = []
-        for i, (doc, doc_scores) in enumerate(zip(docs, scores)):
+        for (i, doc, doc_scores) in zip(range(len(docs)), docs, scores):
             if self.top_k == 1:
                 doc_guesses = doc_scores.argmax(axis=1).reshape(-1, 1)
             else:
@@ -214,11 +215,13 @@ class EditTreeLemmatizer(TrainablePipe):
                 doc_guesses = doc_guesses.get()
 
             doc_compat_guesses = []
-            for j, (token, candidates) in enumerate(zip(doc, doc_guesses)):
-                to_lowercase = False
+            for (j, token, candidates) in zip(range(len(doc)), doc, doc_guesses):
                 if lowercasing_flags is not None and lowercasing_flags[i][j] > 0.5:
-                    to_lowercase = True
-                text = token.lower_ if to_lowercase else token.text
+                    to_lowercase = 1
+                    text = token.lower_
+                else:
+                    to_lowercase = 0
+                    text = token.text
                 tree_id = -1
                 for candidate in candidates:
                     candidate_tree_id = self.cfg["labels"][candidate]