Modernise matcher tests and split into two files

This commit is contained in:
Ines Montani 2017-01-12 17:51:46 +01:00
parent 27482ebed8
commit b438cfddbc
2 changed files with 49 additions and 40 deletions

View File

@ -1,59 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
import spacy
from spacy.vocab import Vocab
from spacy.matcher import Matcher
from spacy.tokens.doc import Doc
from spacy.attrs import *
from ...matcher import Matcher
from ...attrs import ORTH
from ..util import get_doc
import pytest
@pytest.fixture
def en_vocab():
return spacy.get_lang_class('en').Defaults.create_vocab()
def test_init_matcher(en_vocab):
@pytest.mark.parametrize('words,entity', [
(["Test", "Entity"], "TestEntity")])
def test_matcher_add_empty_entity(en_vocab, words, entity):
matcher = Matcher(en_vocab)
matcher.add_entity(entity)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Some', u'words'])) == []
assert matcher(doc) == []
def test_add_empty_entity(en_vocab):
@pytest.mark.parametrize('entity1,entity2,attrs', [
("TestEntity", "TestEntity2", {"Hello": "World"})])
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity')
matcher.add_entity(entity1)
assert matcher.get_entity(entity1) == {}
matcher.add_entity(entity2, attrs=attrs)
assert matcher.get_entity(entity2) == attrs
assert matcher.get_entity(entity1) == {}
@pytest.mark.parametrize('words,entity,attrs',
[(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
matcher = Matcher(en_vocab)
matcher.add_entity(entity, attrs=attrs)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
assert matcher(doc) == []
def test_get_entity_attrs(en_vocab):
matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity')
entity = matcher.get_entity('TestEntity')
assert entity == {}
matcher.add_entity('TestEntity2', attrs={'Hello': 'World'})
entity = matcher.get_entity('TestEntity2')
assert entity == {'Hello': 'World'}
assert matcher.get_entity('TestEntity') == {}
def test_get_entity_via_match(en_vocab):
matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity', attrs={u'Hello': u'World'})
assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
matcher.add_pattern(u'TestEntity', [{ORTH: u'Test'}, {ORTH: u'Entity'}])
matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
assert matcher.n_patterns == 1
matches = matcher(Doc(en_vocab, words=[u'Test', u'Entity']))
matches = matcher(doc)
assert len(matches) == 1
assert len(matches[0]) == 4
ent_id, label, start, end = matches[0]
assert ent_id == matcher.vocab.strings[u'TestEntity']
assert ent_id == matcher.vocab.strings[entity]
assert label == 0
assert start == 0
assert end == 2
attrs = matcher.get_entity(ent_id)
assert attrs == {u'Hello': u'World'}
assert matcher.get_entity(ent_id) == attrs

View File

@ -0,0 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
from ...matcher import Matcher
from ..util import get_doc
import pytest
@pytest.mark.parametrize('words', [["Some", "words"]])
def test_matcher_init(en_vocab, words):
matcher = Matcher(en_vocab)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(doc) == []