mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Return doc offsets in Matcher on spans (#10576)
The returned match offsets were only adjusted for `as_spans`, not generally. Because the `on_match` callbacks are always applied to the doc, the `Matcher` matches on spans should consistently use the doc offsets.
This commit is contained in:
parent
75f7c15187
commit
0e71bd973f
|
@ -252,6 +252,10 @@ cdef class Matcher:
|
|||
# non-overlapping ones this `match` can be either (start, end) or
|
||||
# (start, end, alignments) depending on `with_alignments=` option.
|
||||
for key, *match in matches:
|
||||
# Adjust span matches to doc offsets
|
||||
if isinstance(doclike, Span):
|
||||
match[0] += doclike.start
|
||||
match[1] += doclike.start
|
||||
span_filter = self._filter.get(key)
|
||||
if span_filter is not None:
|
||||
pairs = pairs_by_id.get(key, [])
|
||||
|
@ -282,9 +286,6 @@ cdef class Matcher:
|
|||
if as_spans:
|
||||
final_results = []
|
||||
for key, start, end, *_ in final_matches:
|
||||
if isinstance(doclike, Span):
|
||||
start += doclike.start
|
||||
end += doclike.start
|
||||
final_results.append(Span(doc, start, end, label=key))
|
||||
elif with_alignments:
|
||||
# convert alignments List[Dict[str, int]] --> List[int]
|
||||
|
|
|
@ -591,9 +591,16 @@ def test_matcher_span(matcher):
|
|||
doc = Doc(matcher.vocab, words=text.split())
|
||||
span_js = doc[:3]
|
||||
span_java = doc[4:]
|
||||
assert len(matcher(doc)) == 2
|
||||
assert len(matcher(span_js)) == 1
|
||||
assert len(matcher(span_java)) == 1
|
||||
doc_matches = matcher(doc)
|
||||
span_js_matches = matcher(span_js)
|
||||
span_java_matches = matcher(span_java)
|
||||
assert len(doc_matches) == 2
|
||||
assert len(span_js_matches) == 1
|
||||
assert len(span_java_matches) == 1
|
||||
|
||||
# match offsets always refer to the doc
|
||||
assert doc_matches[0] == span_js_matches[0]
|
||||
assert doc_matches[1] == span_java_matches[0]
|
||||
|
||||
|
||||
def test_matcher_as_spans(matcher):
|
||||
|
|
Loading…
Reference in New Issue
Block a user