Modernise matcher tests and split into two files

This commit is contained in:
Ines Montani 2017-01-12 17:51:46 +01:00
parent 27482ebed8
commit b438cfddbc
2 changed files with 49 additions and 40 deletions

View File

@ -1,59 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import spacy
from spacy.vocab import Vocab from ...matcher import Matcher
from spacy.matcher import Matcher from ...attrs import ORTH
from spacy.tokens.doc import Doc from ..util import get_doc
from spacy.attrs import *
import pytest import pytest
@pytest.fixture @pytest.mark.parametrize('words,entity', [
def en_vocab(): (["Test", "Entity"], "TestEntity")])
return spacy.get_lang_class('en').Defaults.create_vocab() def test_matcher_add_empty_entity(en_vocab, words, entity):
def test_init_matcher(en_vocab):
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)
matcher.add_entity(entity)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0 assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Some', u'words'])) == [] assert matcher(doc) == []
def test_add_empty_entity(en_vocab): @pytest.mark.parametrize('entity1,entity2,attrs', [
("TestEntity", "TestEntity2", {"Hello": "World"})])
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity') matcher.add_entity(entity1)
assert matcher.get_entity(entity1) == {}
matcher.add_entity(entity2, attrs=attrs)
assert matcher.get_entity(entity2) == attrs
assert matcher.get_entity(entity1) == {}
@pytest.mark.parametrize('words,entity,attrs',
[(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
matcher = Matcher(en_vocab)
matcher.add_entity(entity, attrs=attrs)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0 assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == [] assert matcher(doc) == []
matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
def test_get_entity_attrs(en_vocab):
matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity')
entity = matcher.get_entity('TestEntity')
assert entity == {}
matcher.add_entity('TestEntity2', attrs={'Hello': 'World'})
entity = matcher.get_entity('TestEntity2')
assert entity == {'Hello': 'World'}
assert matcher.get_entity('TestEntity') == {}
def test_get_entity_via_match(en_vocab):
matcher = Matcher(en_vocab)
matcher.add_entity('TestEntity', attrs={u'Hello': u'World'})
assert matcher.n_patterns == 0
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
matcher.add_pattern(u'TestEntity', [{ORTH: u'Test'}, {ORTH: u'Entity'}])
assert matcher.n_patterns == 1 assert matcher.n_patterns == 1
matches = matcher(Doc(en_vocab, words=[u'Test', u'Entity']))
matches = matcher(doc)
assert len(matches) == 1 assert len(matches) == 1
assert len(matches[0]) == 4 assert len(matches[0]) == 4
ent_id, label, start, end = matches[0] ent_id, label, start, end = matches[0]
assert ent_id == matcher.vocab.strings[u'TestEntity'] assert ent_id == matcher.vocab.strings[entity]
assert label == 0 assert label == 0
assert start == 0 assert start == 0
assert end == 2 assert end == 2
attrs = matcher.get_entity(ent_id) assert matcher.get_entity(ent_id) == attrs
assert attrs == {u'Hello': u'World'}

View File

@ -0,0 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
from ...matcher import Matcher
from ..util import get_doc
import pytest
@pytest.mark.parametrize('words', [["Some", "words"]])
def test_matcher_init(en_vocab, words):
matcher = Matcher(en_vocab)
doc = get_doc(en_vocab, words)
assert matcher.n_patterns == 0
assert matcher(doc) == []