From 5db28ec2fd47276fa2a2f460d10d6ec61760ed6e Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Sat, 3 Jul 2021 21:13:32 +0900 Subject: [PATCH] Tweak mention limit calculation The calculation of this in the coref-hoi code is hard to follow. Based on comments and variable names it sounds like it's using the doc length, but it might actually be the number of mentions? Number of mentions should be much larger and seems more correct, but might want to revisit this. --- spacy/ml/models/coref.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/ml/models/coref.py b/spacy/ml/models/coref.py index 9a3081bd8..2545f7325 100644 --- a/spacy/ml/models/coref.py +++ b/spacy/ml/models/coref.py @@ -267,7 +267,9 @@ def coarse_prune( # calculate the doc length doclen = ends[-1] - starts[0] - mlimit = min(mention_limit, int(mention_limit_ratio * doclen)) + # XXX seems to make more sense to use menlen than doclen here? + #mlimit = min(mention_limit, int(mention_limit_ratio * doclen)) + mlimit = min(mention_limit, int(mention_limit_ratio * menlen)) # csel is a 1d integer list csel = select_non_crossing_spans(tops, starts, ends, mlimit) # add the offset so these indices are absolute