Fix PhraseMatcher callback and add tests (#4399)

* Fix callback lookup in PhraseMatcher (string key rather than hash key)
* Add callback tests for Matcher and PhraseMatcher
This commit is contained in:
adrianeboyd 2019-10-08 12:07:02 +02:00 committed by Ines Montani
parent fd4a5341b0
commit 14841d0aa6
3 changed files with 23 additions and 1 deletions

View File

@ -225,7 +225,7 @@ cdef class PhraseMatcher:
for i in range(c_matches.size()): for i in range(c_matches.size()):
matches.append((c_matches[i].match_id, c_matches[i].start, c_matches[i].end)) matches.append((c_matches[i].match_id, c_matches[i].start, c_matches[i].end))
for i, (ent_id, start, end) in enumerate(matches): for i, (ent_id, start, end) in enumerate(matches):
on_match = self._callbacks.get(ent_id) on_match = self._callbacks.get(self.vocab.strings[ent_id])
if on_match is not None: if on_match is not None:
on_match(self, doc, i, matches) on_match(self, doc, i, matches)
return matches return matches

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import pytest import pytest
import re import re
from mock import Mock
from spacy.matcher import Matcher, DependencyMatcher from spacy.matcher import Matcher, DependencyMatcher
from spacy.tokens import Doc, Token from spacy.tokens import Doc, Token
@ -418,3 +419,13 @@ def test_matcher_valid_callback(en_vocab):
with pytest.raises(ValueError): with pytest.raises(ValueError):
matcher.add("TEST", [], [{"TEXT": "test"}]) matcher.add("TEST", [], [{"TEXT": "test"}])
matcher(Doc(en_vocab, words=["test"])) matcher(Doc(en_vocab, words=["test"]))
def test_matcher_callback(en_vocab):
mock = Mock()
matcher = Matcher(en_vocab)
pattern = [{"ORTH": "test"}]
matcher.add("Rule", mock, pattern)
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
matches = matcher(doc)
mock.assert_called_once_with(matcher, doc, 0, matches)

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest
from mock import Mock
from spacy.matcher import PhraseMatcher from spacy.matcher import PhraseMatcher
from spacy.tokens import Doc from spacy.tokens import Doc
from ..util import get_doc from ..util import get_doc
@ -215,3 +216,13 @@ def test_attr_pipeline_checks(en_vocab):
matcher.add("TEST3", None, doc3) matcher.add("TEST3", None, doc3)
matcher = PhraseMatcher(en_vocab, attr="TEXT") matcher = PhraseMatcher(en_vocab, attr="TEXT")
matcher.add("TEST3", None, doc3) matcher.add("TEST3", None, doc3)
def test_phrase_matcher_callback(en_vocab):
mock = Mock()
doc = Doc(en_vocab, words=["I", "like", "Google", "Now", "best"])
pattern = Doc(en_vocab, words=["Google", "Now"])
matcher = PhraseMatcher(en_vocab)
matcher.add("COMPANY", mock, pattern)
matches = matcher(doc)
mock.assert_called_once_with(matcher, doc, 0, matches)