Change mention limit to match reference implementations

This generall means fewer spans are considered, which makes individual steps in training faster but can make training take longer to find the good spans.
2025-07-18 20:22:25 +03:00 · 2021-08-08 19:55:52 +09:00 · 2021-08-08 19:55:52 +09:00 · 56803d3909
commit 56803d3909
parent 1d1679d431
1 changed files with 2 additions and 2 deletions
--- a/spacy/ml/models/coref.py
+++ b/spacy/ml/models/coref.py
@ -237,8 +237,8 @@ def coarse_prune(
        # calculate the doc length
        doclen = ends[-1] - starts[0]
        # XXX seems to make more sense to use menlen than doclen here?
-        # mlimit = min(mention_limit, int(mention_limit_ratio * doclen))
+        # coref-hoi uses doclen (number of words). 
-        mlimit = min(mention_limit, int(mention_limit_ratio * menlen))
+        mlimit = min(mention_limit, int(mention_limit_ratio * doclen))
        # csel is a 1d integer list
        csel = select_non_crossing_spans(tops, starts, ends, mlimit)
        # add the offset so these indices are absolute