2014-11-10 08:29:19 +03:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from spacy.ner.pystate import PyState
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def labels():
|
|
|
|
return ['LOC', 'MISC', 'ORG', 'PER']
|
|
|
|
|
|
|
|
|
|
|
|
def test_n_moves(labels):
|
|
|
|
s = PyState(labels, 5)
|
|
|
|
b_moves = len(labels)
|
|
|
|
i_moves = len(labels)
|
|
|
|
l_moves = len(labels)
|
|
|
|
u_moves = len(labels)
|
|
|
|
o_moves = 1
|
|
|
|
assert s.n_classes == b_moves + i_moves + l_moves + u_moves + o_moves
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def sentence():
|
|
|
|
return "Ms. Haag plays Elianti .".split()
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def state(labels, sentence):
|
|
|
|
return PyState(labels, len(sentence))
|
|
|
|
|
|
|
|
|
|
|
|
def test_begin(state, sentence):
|
|
|
|
assert state.n_ents == 0
|
|
|
|
assert state.i == 0
|
|
|
|
state.transition('B-PER')
|
2014-11-10 14:13:46 +03:00
|
|
|
assert state.n_ents == 0
|
2014-11-10 08:29:19 +03:00
|
|
|
assert state.i == 1
|
|
|
|
assert state.open_entity
|
|
|
|
assert state.ent == {'start': 0, 'label': 3, 'end': 0}
|
2014-11-10 14:13:46 +03:00
|
|
|
assert state.is_valid('I-PER')
|
|
|
|
assert not state.is_valid('I-LOC')
|
|
|
|
assert state.is_valid('L-PER')
|
|
|
|
assert not state.is_valid('L-LOC')
|
|
|
|
assert not state.is_valid('O')
|
|
|
|
assert not state.is_valid('U-PER')
|
|
|
|
|
|
|
|
def test_in(state, sentence):
|
|
|
|
state.transition('B-PER')
|
|
|
|
assert state.n_ents == 0
|
|
|
|
state.transition('I-PER')
|
|
|
|
assert state.n_ents == 0
|
|
|
|
assert state.i == 2
|
|
|
|
assert state.is_valid('I-PER')
|
|
|
|
assert state.is_valid('L-PER')
|
|
|
|
assert not state.is_valid('B-PER')
|
|
|
|
assert not state.is_valid('I-LOC')
|
|
|
|
assert not state.is_valid('L-LOC')
|
|
|
|
assert not state.is_valid('U-PER')
|
|
|
|
assert not state.is_valid('O')
|
|
|
|
|
|
|
|
|
|
|
|
def test_last(state, sentence):
|
|
|
|
state.transition('B-PER')
|
|
|
|
assert state.n_ents == 0
|
|
|
|
state.transition('L-PER')
|
|
|
|
assert state.n_ents == 1
|
|
|
|
assert state.i == 2
|
|
|
|
assert not state.open_entity
|
|
|
|
assert state.is_valid('B-PER')
|
|
|
|
assert state.is_valid('B-LOC')
|
|
|
|
assert state.is_valid('U-PER')
|
|
|
|
assert state.is_valid('U-LOC')
|
|
|
|
assert state.is_valid('O')
|
|
|
|
assert not state.is_valid('L-PER')
|
|
|
|
assert not state.is_valid('I-PER')
|
|
|
|
|
|
|
|
|
|
|
|
def test_unit(state, sentence):
|
|
|
|
assert state.n_ents == 0
|
|
|
|
state.transition('U-PER')
|
|
|
|
assert state.n_ents == 1
|
|
|
|
assert state.i == 1
|
|
|
|
assert not state.open_entity
|
|
|
|
assert state.is_valid('B-PER')
|
|
|
|
assert state.is_valid('B-LOC')
|
|
|
|
assert state.is_valid('U-PER')
|
|
|
|
assert state.is_valid('U-LOC')
|
|
|
|
assert state.is_valid('O')
|
|
|
|
assert not state.is_valid('I-PER')
|
|
|
|
assert not state.is_valid('L-PER')
|
|
|
|
|
|
|
|
|
|
|
|
def test_out(state, sentence):
|
|
|
|
assert state.n_ents == 0
|
|
|
|
state.transition('U-PER')
|
|
|
|
assert state.n_ents == 1
|
|
|
|
assert state.i == 1
|
|
|
|
state.transition('O')
|
|
|
|
assert state.i == 2
|
|
|
|
assert not state.open_entity
|
|
|
|
assert state.is_valid('B-PER')
|
|
|
|
assert state.is_valid('B-LOC')
|
|
|
|
assert state.is_valid('U-PER')
|
|
|
|
assert state.is_valid('U-LOC')
|
|
|
|
assert state.is_valid('O')
|
|
|
|
assert not state.is_valid('I-PER')
|
|
|
|
assert not state.is_valid('L-PER')
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def golds(sentence):
|
|
|
|
g = ['B-PER', 'L-PER', 'O', 'U-PER', 'O']
|
|
|
|
assert len(g) == len(sentence)
|
|
|
|
return g
|
|
|
|
|
|
|
|
|
|
|
|
def test_oracle_gold(state, sentence, golds):
|
|
|
|
state.set_golds(golds)
|
|
|
|
assert state.is_gold('B-PER')
|
|
|
|
assert not state.is_gold('B-LOC')
|
|
|
|
assert not state.is_gold('I-PER')
|
|
|
|
assert not state.is_gold('L-PER')
|
|
|
|
assert not state.is_gold('U-PER')
|
|
|
|
assert not state.is_gold('O')
|
|
|
|
state.transition('B-PER')
|
|
|
|
assert state.is_gold('L-PER')
|
|
|
|
state.transition('L-PER')
|
|
|
|
assert state.is_gold('O')
|
|
|
|
assert not state.is_gold('B-PER')
|
|
|
|
state.transition('O')
|
|
|
|
assert not state.is_gold('B-PER')
|
|
|
|
assert not state.is_gold('O')
|
|
|
|
assert state.is_gold('U-PER')
|
|
|
|
state.transition('U-PER')
|
|
|
|
assert state.is_gold('O')
|
|
|
|
state.transition('O')
|
|
|
|
assert state.i == len(sentence)
|
|
|
|
|
|
|
|
|
|
|
|
def test_oracle_miss_entity(state, sentence, golds):
|
|
|
|
state.set_golds(golds)
|
|
|
|
state.transition('O')
|
|
|
|
assert not state.is_gold('L-PER')
|
|
|
|
assert not state.is_gold('U-PER')
|
|
|
|
assert not state.is_gold('I-PER')
|
|
|
|
assert not state.is_gold('B-PER')
|
|
|
|
assert state.is_gold('O')
|
|
|
|
state.transition('O')
|
|
|
|
state.transition('O')
|
|
|
|
assert state.is_gold('U-PER')
|
|
|
|
|
|
|
|
|
|
|
|
def test_oracle_extend_entity(state, sentence, golds):
|
|
|
|
state.set_golds(golds)
|
|
|
|
state.transition('B-PER')
|
|
|
|
assert not state.is_gold('I-PER')
|
|
|
|
state.transition('I-PER')
|
|
|
|
assert state.is_gold('L-PER')
|
|
|
|
assert not state.is_gold('I-PER')
|