mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Fix span offsets for Matcher(as_spans) on spans (#7992)
Fix returned span offsets for `Matcher(as_spans=True)(span)`.
This commit is contained in:
parent
7d5db41ac3
commit
0a22fed634
|
@ -284,7 +284,13 @@ cdef class Matcher:
|
||||||
if on_match is not None:
|
if on_match is not None:
|
||||||
on_match(self, doc, i, final_matches)
|
on_match(self, doc, i, final_matches)
|
||||||
if as_spans:
|
if as_spans:
|
||||||
return [Span(doc, start, end, label=key) for key, start, end in final_matches]
|
spans = []
|
||||||
|
for key, start, end in final_matches:
|
||||||
|
if isinstance(doclike, Span):
|
||||||
|
start += doclike.start
|
||||||
|
end += doclike.start
|
||||||
|
spans.append(Span(doc, start, end, label=key))
|
||||||
|
return spans
|
||||||
elif with_alignments:
|
elif with_alignments:
|
||||||
# convert alignments List[Dict[str, int]] --> List[int]
|
# convert alignments List[Dict[str, int]] --> List[int]
|
||||||
final_matches = []
|
final_matches = []
|
||||||
|
|
|
@ -513,6 +513,12 @@ def test_matcher_as_spans(matcher):
|
||||||
assert matches[1].text == "Java"
|
assert matches[1].text == "Java"
|
||||||
assert matches[1].label_ == "Java"
|
assert matches[1].label_ == "Java"
|
||||||
|
|
||||||
|
matches = matcher(doc[1:], as_spans=True)
|
||||||
|
assert len(matches) == 1
|
||||||
|
assert isinstance(matches[0], Span)
|
||||||
|
assert matches[0].text == "Java"
|
||||||
|
assert matches[0].label_ == "Java"
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_deprecated(matcher):
|
def test_matcher_deprecated(matcher):
|
||||||
doc = Doc(matcher.vocab, words=["hello", "world"])
|
doc = Doc(matcher.vocab, words=["hello", "world"])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user