From 56803d3909a4c9e56b816da7cf583a6a549baf98 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Sun, 8 Aug 2021 19:55:52 +0900 Subject: [PATCH] Change mention limit to match reference implementations This generall means fewer spans are considered, which makes individual steps in training faster but can make training take longer to find the good spans. --- spacy/ml/models/coref.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index e6cfd1773..3b14e6ecb 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -237,8 +237,8 @@ def coarse_prune( # calculate the doc length doclen = ends[-1] - starts[0] # XXX seems to make more sense to use menlen than doclen here? - # mlimit = min(mention_limit, int(mention_limit_ratio * doclen)) - mlimit = min(mention_limit, int(mention_limit_ratio * menlen)) + # coref-hoi uses doclen (number of words). + mlimit = min(mention_limit, int(mention_limit_ratio * doclen)) # csel is a 1d integer list csel = select_non_crossing_spans(tops, starts, ends, mlimit) # add the offset so these indices are absolute