2017-01-12 19:51:46 +03:00
|
|
|
# coding: utf-8
|
2016-10-23 15:04:01 +03:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2017-01-12 19:51:46 +03:00
|
|
|
from ...matcher import Matcher
|
|
|
|
from ...attrs import ORTH
|
|
|
|
from ..util import get_doc
|
2016-10-23 15:04:01 +03:00
|
|
|
|
2017-01-12 19:51:46 +03:00
|
|
|
import pytest
|
2016-10-23 15:04:01 +03:00
|
|
|
|
|
|
|
|
2017-01-12 19:51:46 +03:00
|
|
|
@pytest.mark.parametrize('words,entity', [
|
|
|
|
(["Test", "Entity"], "TestEntity")])
|
|
|
|
def test_matcher_add_empty_entity(en_vocab, words, entity):
|
2016-10-23 15:04:01 +03:00
|
|
|
matcher = Matcher(en_vocab)
|
2017-01-12 19:51:46 +03:00
|
|
|
matcher.add_entity(entity)
|
|
|
|
doc = get_doc(en_vocab, words)
|
2016-10-23 15:04:01 +03:00
|
|
|
assert matcher.n_patterns == 0
|
2017-01-12 19:51:46 +03:00
|
|
|
assert matcher(doc) == []
|
2016-10-23 15:04:01 +03:00
|
|
|
|
|
|
|
|
2017-01-12 19:51:46 +03:00
|
|
|
@pytest.mark.parametrize('entity1,entity2,attrs', [
|
|
|
|
("TestEntity", "TestEntity2", {"Hello": "World"})])
|
|
|
|
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
|
2016-10-23 15:04:01 +03:00
|
|
|
matcher = Matcher(en_vocab)
|
2017-01-12 19:51:46 +03:00
|
|
|
matcher.add_entity(entity1)
|
|
|
|
assert matcher.get_entity(entity1) == {}
|
|
|
|
matcher.add_entity(entity2, attrs=attrs)
|
|
|
|
assert matcher.get_entity(entity2) == attrs
|
|
|
|
assert matcher.get_entity(entity1) == {}
|
2016-10-23 15:04:01 +03:00
|
|
|
|
|
|
|
|
2017-01-12 19:51:46 +03:00
|
|
|
@pytest.mark.parametrize('words,entity,attrs',
|
|
|
|
[(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
|
|
|
|
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
|
2016-10-23 15:04:01 +03:00
|
|
|
matcher = Matcher(en_vocab)
|
2017-01-12 19:51:46 +03:00
|
|
|
matcher.add_entity(entity, attrs=attrs)
|
|
|
|
doc = get_doc(en_vocab, words)
|
2016-10-23 15:04:01 +03:00
|
|
|
assert matcher.n_patterns == 0
|
2017-01-12 19:51:46 +03:00
|
|
|
assert matcher(doc) == []
|
|
|
|
|
|
|
|
matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
|
2016-10-23 15:04:01 +03:00
|
|
|
assert matcher.n_patterns == 1
|
2017-01-12 19:51:46 +03:00
|
|
|
|
|
|
|
matches = matcher(doc)
|
2016-10-23 15:04:01 +03:00
|
|
|
assert len(matches) == 1
|
|
|
|
assert len(matches[0]) == 4
|
2017-01-12 19:51:46 +03:00
|
|
|
|
2016-10-23 15:04:01 +03:00
|
|
|
ent_id, label, start, end = matches[0]
|
2017-01-12 19:51:46 +03:00
|
|
|
assert ent_id == matcher.vocab.strings[entity]
|
2016-10-23 15:04:01 +03:00
|
|
|
assert label == 0
|
|
|
|
assert start == 0
|
|
|
|
assert end == 2
|
2017-01-12 19:51:46 +03:00
|
|
|
assert matcher.get_entity(ent_id) == attrs
|