2019-06-12 14:38:30 +03:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from spacy.matcher import Matcher
|
|
|
|
from spacy.tokens import Doc
|
|
|
|
|
|
|
|
|
|
|
|
def test_issue3839(en_vocab):
|
|
|
|
"""Test that match IDs returned by the matcher are correct, are in the string """
|
|
|
|
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
|
|
|
|
matcher = Matcher(en_vocab)
|
|
|
|
match_id = "PATTERN"
|
|
|
|
pattern1 = [{"LOWER": "terrific"}, {"OP": "?"}, {"LOWER": "group"}]
|
|
|
|
pattern2 = [{"LOWER": "terrific"}, {"OP": "?"}, {"OP": "?"}, {"LOWER": "group"}]
|
2019-10-25 23:21:08 +03:00
|
|
|
matcher.add(match_id, [pattern1])
|
2019-06-12 14:38:30 +03:00
|
|
|
matches = matcher(doc)
|
|
|
|
assert matches[0][0] == en_vocab.strings[match_id]
|
|
|
|
matcher = Matcher(en_vocab)
|
2019-10-25 23:21:08 +03:00
|
|
|
matcher.add(match_id, [pattern2])
|
2019-06-12 14:38:30 +03:00
|
|
|
matches = matcher(doc)
|
|
|
|
assert matches[0][0] == en_vocab.strings[match_id]
|