* Add test for matcher end-point problem

This commit is contained in:
Matthew Honnibal 2015-11-12 04:56:07 +11:00
parent c288414968
commit cfa4062147

View File

@ -1,6 +1,10 @@
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import pytest import pytest
from spacy.attrs import LOWER
from spacy.matcher import Matcher
@pytest.mark.models @pytest.mark.models
def test_simple_types(EN): def test_simple_types(EN):
tokens = EN(u'Mr. Best flew to New York on Saturday morning.') tokens = EN(u'Mr. Best flew to New York on Saturday morning.')
@ -21,3 +25,28 @@ def test_consistency_bug(EN):
tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', entity=False) tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', entity=False)
ents = EN.matcher(tokens) ents = EN.matcher(tokens)
EN.entity(tokens) EN.entity(tokens)
@pytest.mark.models
def test_unit_end_gazetteer(EN):
'''Test a bug in the interaction between the NER model and the gazetteer'''
matcher = Matcher(EN.vocab,
{'MemberNames':
('PERSON', {},
[
[{LOWER: 'cal'}],
[{LOWER: 'cal'}, {LOWER: 'henderson'}],
]
)
}
)
doc = EN(u'who is cal the manager of?')
if len(list(doc.ents)) == 0:
ents = matcher(doc)
assert len(ents) == 1
nlp.entity(doc)
assert list(nlp.ents)[0].text == 'cal'