From 0a22fed6341f1e26500b5d5420b46b96197b933e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 6 May 2021 10:42:44 +0200 Subject: [PATCH] Fix span offsets for Matcher(as_spans) on spans (#7992) Fix returned span offsets for `Matcher(as_spans=True)(span)`. --- spacy/matcher/matcher.pyx | 8 +++++++- spacy/tests/matcher/test_matcher_api.py | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index dae12c3f6..f389b4abd 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -284,7 +284,13 @@ cdef class Matcher: if on_match is not None: on_match(self, doc, i, final_matches) if as_spans: - return [Span(doc, start, end, label=key) for key, start, end in final_matches] + spans = [] + for key, start, end in final_matches: + if isinstance(doclike, Span): + start += doclike.start + end += doclike.start + spans.append(Span(doc, start, end, label=key)) + return spans elif with_alignments: # convert alignments List[Dict[str, int]] --> List[int] final_matches = [] diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index 094bf22a6..548da7dc6 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -513,6 +513,12 @@ def test_matcher_as_spans(matcher): assert matches[1].text == "Java" assert matches[1].label_ == "Java" + matches = matcher(doc[1:], as_spans=True) + assert len(matches) == 1 + assert isinstance(matches[0], Span) + assert matches[0].text == "Java" + assert matches[0].label_ == "Java" + def test_matcher_deprecated(matcher): doc = Doc(matcher.vocab, words=["hello", "world"])