Minor NEL type fixes (#10860)

* Fix TODO about typing Fix was simple: just request an array2f. * Add type ignore Maxout has a more restrictive type than the residual layer expects (only Floats2d vs any Floats). * Various cleanup This moves a lot of lines around but doesn't change any functionality. Details: 1. use `continue` to reduce indentation 2. move sentence doc building inside conditional since it's otherwise unused 3. reduces some temporary assignments
2025-12-23 01:53:17 +03:00 · 2022-06-01 07:41:28 +09:00 · 2022-06-01 07:41:28 +09:00 · dca2e8c644
commit dca2e8c644
parent 56d4055d96
2 changed files with 56 additions and 56 deletions
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@ -23,7 +23,7 @@ def build_nel_encoder(
            ((tok2vec >> list2ragged()) & build_span_maker())
            >> extract_spans()
            >> reduce_mean()
-            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
+            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0)) # type: ignore
            >> output_layer
        )
        model.set_ref("output_layer", output_layer)
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -355,7 +355,7 @@ class EntityLinker(TrainablePipe):
                    keep_ents.append(eidx)

                eidx += 1
-        entity_encodings = self.model.ops.asarray(entity_encodings, dtype="float32")
+        entity_encodings = self.model.ops.asarray2f(entity_encodings, dtype="float32")
        selected_encodings = sentence_encodings[keep_ents]

        # if there are no matches, short circuit
@ -368,13 +368,12 @@ class EntityLinker(TrainablePipe):
                method="get_loss", msg="gold entities do not match up"
            )
            raise RuntimeError(err)
-        # TODO: fix typing issue here
-        gradients = self.distance.get_grad(selected_encodings, entity_encodings)  # type: ignore
+        gradients = self.distance.get_grad(selected_encodings, entity_encodings)
        # to match the input size, we need to give a zero gradient for items not in the kb
        out = self.model.ops.alloc2f(*sentence_encodings.shape)
        out[keep_ents] = gradients

-        loss = self.distance.get_loss(selected_encodings, entity_encodings)  # type: ignore
+        loss = self.distance.get_loss(selected_encodings, entity_encodings)
        loss = loss / len(entity_encodings)
        return float(loss), out

@ -391,18 +390,21 @@ class EntityLinker(TrainablePipe):
        self.validate_kb()
        entity_count = 0
        final_kb_ids: List[str] = []
+        xp = self.model.ops.xp
        if not docs:
            return final_kb_ids
        if isinstance(docs, Doc):
            docs = [docs]
        for i, doc in enumerate(docs):
+            if len(doc) == 0:
+                continue
            sentences = [s for s in doc.sents]
-            if len(doc) > 0:
            # Looping through each entity (TODO: rewrite)
            for ent in doc.ents:
-                    sent = ent.sent
-                    sent_index = sentences.index(sent)
+                sent_index = sentences.index(ent.sent)
                assert sent_index >= 0
+
+                if self.incl_context:
                    # get n_neighbour sentences, clipped to the length of the document
                    start_sentence = max(0, sent_index - self.n_sents)
                    end_sentence = min(len(sentences) - 1, sent_index + self.n_sents)
@ -410,8 +412,6 @@ class EntityLinker(TrainablePipe):
                    end_token = sentences[end_sentence].end
                    sent_doc = doc[start_token:end_token].as_doc()
                    # currently, the context is the same for each entity in a sentence (should be refined)
-                    xp = self.model.ops.xp
-                    if self.incl_context:
                    sentence_encoding = self.model.predict([sent_doc])[0]
                    sentence_encoding_t = sentence_encoding.T
                    sentence_norm = xp.linalg.norm(sentence_encoding_t)