Clean up unused functions

`make_clean_doc` is not needed and was removed. `logsumexp` may be needed if I misunderstood the loss calculation, so I left it in for now with a note.
2025-10-27 14:11:04 +03:00 · 2021-05-28 15:56:20 +09:00 · 2021-05-28 15:56:20 +09:00 · 4a4ef72191
commit 4a4ef72191
parent 0aa1083ce8
1 changed files with 3 additions and 12 deletions
--- a/spacy/ml/models/coref_util.py
+++ b/spacy/ml/models/coref_util.py
@ -44,11 +44,13 @@ def topk(xp, arr, k, axis=None):
 def logsumexp(xp, arr, axis=None):
    """Emulate torch.logsumexp by returning the log of summed exponentials
    along each row in the given dimension.
-    TODO: currently not used?

    Reduces a 2d array to 1d."""
    # from slide 5 here:
    # https://www.slideshare.net/ryokuta/cupy
+
+    # Note: this was added to reproduce loss calculation in coref-hoi. If loss
+    # can be calculated using another method this is not necessary.
    hi = arr.max(axis=axis)
    hi = xp.expand_dims(hi, 1)
    return hi.squeeze() + xp.log(xp.exp(arr - hi).sum(axis=axis))
@ -215,17 +217,6 @@ def get_clusters_from_doc(doc) -> List[List[Tuple[int, int]]]:
    return out


-def make_clean_doc(nlp, doc):
-    """Return a doc with raw data but not span annotations."""
-    # Surely there is a better way to do this?
-    # TODO: currently not used?
-
-    sents = [tok.is_sent_start for tok in doc]
-    words = [tok.text for tok in doc]
-    out = Doc(nlp.vocab, words=words, sent_starts=sents)
-    return out
-
-
 def create_gold_scores(
    ments: Ints2d, clusters: List[List[Tuple[int, int]]]
 ) -> List[List[bool]]: