spaCy/spacy/tests/matcher/test_entity_id.py
2017-01-12 17:51:46 +01:00

54 lines
1.6 KiB
Python

# coding: utf-8
from __future__ import unicode_literals
from ...matcher import Matcher
from ...attrs import ORTH
from ..util import get_doc
import pytest
@pytest.mark.parametrize('words,entity', [
(["Test", "Entity"], "TestEntity")])
def test_matcher_add_empty_entity(en_vocab, words, entity):
matcher = Matcher(en_vocab)
matcher.add_entity(entity)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(doc) == []
@pytest.mark.parametrize('entity1,entity2,attrs', [
("TestEntity", "TestEntity2", {"Hello": "World"})])
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
matcher = Matcher(en_vocab)
matcher.add_entity(entity1)
assert matcher.get_entity(entity1) == {}
matcher.add_entity(entity2, attrs=attrs)
assert matcher.get_entity(entity2) == attrs
assert matcher.get_entity(entity1) == {}
@pytest.mark.parametrize('words,entity,attrs',
[(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
matcher = Matcher(en_vocab)
matcher.add_entity(entity, attrs=attrs)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(doc) == []
matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
assert matcher.n_patterns == 1
matches = matcher(doc)
assert len(matches) == 1
assert len(matches[0]) == 4
ent_id, label, start, end = matches[0]
assert ent_id == matcher.vocab.strings[entity]
assert label == 0
assert start == 0
assert end == 2
assert matcher.get_entity(ent_id) == attrs