2018-08-15 16:54:33 +03:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2018-09-27 17:41:57 +03:00
|
|
|
from spacy.lang.en import English
|
|
|
|
from spacy.matcher import Matcher
|
2018-08-15 16:54:33 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_issue2671():
|
2018-09-27 17:41:57 +03:00
|
|
|
"""Ensure the correct entity ID is returned for matches with quantifiers.
|
2018-08-15 18:18:28 +03:00
|
|
|
See also #2675
|
2018-09-27 17:41:57 +03:00
|
|
|
"""
|
2018-08-15 16:54:33 +03:00
|
|
|
nlp = English()
|
|
|
|
matcher = Matcher(nlp.vocab)
|
2018-11-27 03:09:36 +03:00
|
|
|
pattern_id = "test_pattern"
|
|
|
|
pattern = [
|
|
|
|
{"LOWER": "high"},
|
|
|
|
{"IS_PUNCT": True, "OP": "?"},
|
|
|
|
{"LOWER": "adrenaline"},
|
|
|
|
]
|
2018-09-27 17:41:57 +03:00
|
|
|
matcher.add(pattern_id, None, pattern)
|
2018-08-15 16:54:33 +03:00
|
|
|
doc1 = nlp("This is a high-adrenaline situation.")
|
|
|
|
doc2 = nlp("This is a high adrenaline situation.")
|
2018-11-30 19:43:08 +03:00
|
|
|
matches1 = matcher(doc1)
|
|
|
|
for match_id, start, end in matches1:
|
|
|
|
assert nlp.vocab.strings[match_id] == pattern_id
|
|
|
|
matches2 = matcher(doc2)
|
|
|
|
for match_id, start, end in matches2:
|
|
|
|
assert nlp.vocab.strings[match_id] == pattern_id
|