Modernise test for adding entities

This commit is contained in:
Ines Montani 2017-01-11 18:54:24 +01:00
parent 6e883f4c00
commit 05447be884

View File

@ -1,42 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import spacy
from spacy.vocab import Vocab from ...pipeline import EntityRecognizer
from spacy.matcher import Matcher from ..util import get_doc
from spacy.tokens.doc import Doc
from spacy.attrs import *
from spacy.pipeline import EntityRecognizer
import pytest import pytest
@pytest.fixture(scope="module") def test_add_entities_set_ents_iob(en_vocab):
def en_vocab(): text = ["This", "is", "a", "lion"]
return spacy.get_lang_class('en').Defaults.create_vocab() doc = get_doc(en_vocab, text)
ner = EntityRecognizer(en_vocab, features=[(2,), (3,)])
ner(doc)
@pytest.fixture(scope="module")
def entity_recognizer(en_vocab):
return EntityRecognizer(en_vocab, features=[(2,), (3,)])
@pytest.fixture
def animal(en_vocab):
return nlp.vocab.strings[u"ANIMAL"]
@pytest.fixture
def doc(en_vocab, entity_recognizer):
doc = Doc(en_vocab, words=[u"this", u"is", u"a", u"lion"])
entity_recognizer(doc)
return doc
def test_set_ents_iob(doc):
assert len(list(doc.ents)) == 0 assert len(list(doc.ents)) == 0
tags = [w.ent_iob_ for w in doc] assert [w.ent_iob_ for w in doc] == (['O'] * len(doc))
assert tags == (['O'] * len(doc))
doc.ents = [(doc.vocab.strings['ANIMAL'], 3, 4)] doc.ents = [(doc.vocab.strings['ANIMAL'], 3, 4)]
tags = [w.ent_iob_ for w in doc] assert [w.ent_iob_ for w in doc] == ['O', 'O', 'O', 'B']
assert tags == ['O', 'O', 'O', 'B']
doc.ents = [(doc.vocab.strings['WORD'], 0, 2)] doc.ents = [(doc.vocab.strings['WORD'], 0, 2)]
tags = [w.ent_iob_ for w in doc] assert [w.ent_iob_ for w in doc] == ['B', 'I', 'O', 'O']
assert tags == ['B', 'I', 'O', 'O']