spaCy/spacy/tests/regression/test_issue2671.py

31 lines
968 B
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
2018-09-27 17:41:57 +03:00
import pytest
from spacy.lang.en import English
from spacy.matcher import Matcher
def test_issue2671():
2018-09-27 17:41:57 +03:00
"""Ensure the correct entity ID is returned for matches with quantifiers.
See also #2675
2018-09-27 17:41:57 +03:00
"""
def get_rule_id(nlp, matcher, doc):
matches = matcher(doc)
for match_id, start, end in matches:
rule_id = nlp.vocab.strings[match_id]
span = doc[start:end]
return rule_id
nlp = English()
matcher = Matcher(nlp.vocab)
2018-09-27 17:41:57 +03:00
pattern_id = 'test_pattern'
pattern = [{'LOWER': 'high'},
{'IS_PUNCT': True, 'OP': '?'},
{'LOWER': 'adrenaline'}]
matcher.add(pattern_id, None, pattern)
doc1 = nlp("This is a high-adrenaline situation.")
doc2 = nlp("This is a high adrenaline situation.")
2018-09-27 17:41:57 +03:00
assert get_rule_id(nlp, matcher, doc1) == pattern_id
assert get_rule_id(nlp, matcher, doc2) == pattern_id