mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Fix span offsets for Matcher(as_spans) on spans (#7992)
Fix returned span offsets for `Matcher(as_spans=True)(span)`.
This commit is contained in:
parent
7d5db41ac3
commit
0a22fed634
|
@ -284,7 +284,13 @@ cdef class Matcher:
|
|||
if on_match is not None:
|
||||
on_match(self, doc, i, final_matches)
|
||||
if as_spans:
|
||||
return [Span(doc, start, end, label=key) for key, start, end in final_matches]
|
||||
spans = []
|
||||
for key, start, end in final_matches:
|
||||
if isinstance(doclike, Span):
|
||||
start += doclike.start
|
||||
end += doclike.start
|
||||
spans.append(Span(doc, start, end, label=key))
|
||||
return spans
|
||||
elif with_alignments:
|
||||
# convert alignments List[Dict[str, int]] --> List[int]
|
||||
final_matches = []
|
||||
|
|
|
@ -513,6 +513,12 @@ def test_matcher_as_spans(matcher):
|
|||
assert matches[1].text == "Java"
|
||||
assert matches[1].label_ == "Java"
|
||||
|
||||
matches = matcher(doc[1:], as_spans=True)
|
||||
assert len(matches) == 1
|
||||
assert isinstance(matches[0], Span)
|
||||
assert matches[0].text == "Java"
|
||||
assert matches[0].label_ == "Java"
|
||||
|
||||
|
||||
def test_matcher_deprecated(matcher):
|
||||
doc = Doc(matcher.vocab, words=["hello", "world"])
|
||||
|
|
Loading…
Reference in New Issue
Block a user