mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Fix PhraseMatcher.remove for overlapping patterns (#4437)
This commit is contained in:
parent
f8f68bb062
commit
98a961a60e
|
@ -102,8 +102,10 @@ cdef class PhraseMatcher:
|
||||||
cdef vector[MapStruct*] path_nodes
|
cdef vector[MapStruct*] path_nodes
|
||||||
cdef vector[key_t] path_keys
|
cdef vector[key_t] path_keys
|
||||||
cdef key_t key_to_remove
|
cdef key_t key_to_remove
|
||||||
for keyword in self._docs[key]:
|
for keyword in sorted(self._docs[key], key=lambda x: len(x), reverse=True):
|
||||||
current_node = self.c_map
|
current_node = self.c_map
|
||||||
|
path_nodes.clear()
|
||||||
|
path_keys.clear()
|
||||||
for token in keyword:
|
for token in keyword:
|
||||||
result = map_get(current_node, token)
|
result = map_get(current_node, token)
|
||||||
if result:
|
if result:
|
||||||
|
|
|
@ -226,3 +226,13 @@ def test_phrase_matcher_callback(en_vocab):
|
||||||
matcher.add("COMPANY", mock, pattern)
|
matcher.add("COMPANY", mock, pattern)
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
mock.assert_called_once_with(matcher, doc, 0, matches)
|
mock.assert_called_once_with(matcher, doc, 0, matches)
|
||||||
|
|
||||||
|
|
||||||
|
def test_phrase_matcher_remove_overlapping_patterns(en_vocab):
|
||||||
|
matcher = PhraseMatcher(en_vocab)
|
||||||
|
pattern1 = Doc(en_vocab, words=["this"])
|
||||||
|
pattern2 = Doc(en_vocab, words=["this", "is"])
|
||||||
|
pattern3 = Doc(en_vocab, words=["this", "is", "a"])
|
||||||
|
pattern4 = Doc(en_vocab, words=["this", "is", "a", "word"])
|
||||||
|
matcher.add("THIS", None, pattern1, pattern2, pattern3, pattern4)
|
||||||
|
matcher.remove("THIS")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user