From 53b644e821e8f1fed813f60d99590147b20bd4f9 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 6 Jul 2023 08:48:32 +0200 Subject: [PATCH] SpanFinder: set default max_length to 25 When the default `max_length` is not set and there are longer training documents, it can be difficult to train and evaluate the span finder due to memory limits and the time it takes to evaluate a huge number of predicted spans. --- spacy/cli/templates/quickstart_training.jinja | 4 ++-- spacy/pipeline/span_finder.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index e3ca73cfb..1937ea935 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -130,7 +130,7 @@ grad_factor = 1.0 {% if "span_finder" in components -%} [components.span_finder] factory = "span_finder" -max_length = null +max_length = 25 min_length = null scorer = {"@scorers":"spacy.span_finder_scorer.v1"} spans_key = "sc" @@ -419,7 +419,7 @@ width = ${components.tok2vec.model.encode.width} {% if "span_finder" in components %} [components.span_finder] factory = "span_finder" -max_length = null +max_length = 25 min_length = null scorer = {"@scorers":"spacy.span_finder_scorer.v1"} spans_key = "sc" diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py index 53f5c55be..a12d52911 100644 --- a/spacy/pipeline/span_finder.py +++ b/spacy/pipeline/span_finder.py @@ -48,7 +48,7 @@ DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model "threshold": 0.5, "model": DEFAULT_SPAN_FINDER_MODEL, "spans_key": DEFAULT_SPANS_KEY, - "max_length": None, + "max_length": 25, "min_length": None, "scorer": {"@scorers": "spacy.span_finder_scorer.v1"}, },