mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 18:07:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			157 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			157 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import unicode_literals
 | 
						|
 | 
						|
from spacy.ner.pystate import PyState
 | 
						|
import pytest
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def labels():
 | 
						|
    ent_types = ['LOC', 'MISC', 'ORG', 'PER']
 | 
						|
    moves = ['B', 'I', 'L', 'U']
 | 
						|
    labels = ['NULL', 'EOL', 'O']
 | 
						|
    for move in moves:
 | 
						|
        for ent_type in ent_types:
 | 
						|
            labels.append('%s-%s' % (move, ent_type))
 | 
						|
    return labels
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def sentence():
 | 
						|
    return "Ms. Haag plays Elianti .".split()
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def state(labels, sentence):
 | 
						|
    return PyState(labels, len(sentence))
 | 
						|
 | 
						|
 | 
						|
def test_begin(state, sentence):
 | 
						|
    assert state.n_ents == 0
 | 
						|
    assert state.i == 0
 | 
						|
    state.transition('B-PER')
 | 
						|
    assert state.n_ents == 0
 | 
						|
    assert state.i == 1
 | 
						|
    assert state.open_entity
 | 
						|
    assert state.ent == {'start': 0, 'label': 4, 'end': 0}
 | 
						|
    assert state.is_valid('I-PER')
 | 
						|
    assert not state.is_valid('I-LOC')
 | 
						|
    assert state.is_valid('L-PER')
 | 
						|
    assert not state.is_valid('L-LOC')
 | 
						|
    assert not state.is_valid('O')
 | 
						|
    assert not state.is_valid('U-PER')
 | 
						|
 | 
						|
 | 
						|
def test_in(state, sentence):
 | 
						|
    state.transition('B-PER')
 | 
						|
    assert state.n_ents == 0
 | 
						|
    state.transition('I-PER')
 | 
						|
    assert state.n_ents == 0
 | 
						|
    assert state.i == 2
 | 
						|
    assert state.is_valid('I-PER')
 | 
						|
    assert state.is_valid('L-PER')
 | 
						|
    assert not state.is_valid('B-PER')
 | 
						|
    assert not state.is_valid('I-LOC')
 | 
						|
    assert not state.is_valid('L-LOC')
 | 
						|
    assert not state.is_valid('U-PER')
 | 
						|
    assert not state.is_valid('O')
 | 
						|
 | 
						|
 | 
						|
def test_last(state, sentence):
 | 
						|
    state.transition('B-PER')
 | 
						|
    assert state.n_ents == 0
 | 
						|
    state.transition('L-PER')
 | 
						|
    assert state.n_ents == 1
 | 
						|
    assert state.i == 2
 | 
						|
    assert not state.open_entity
 | 
						|
    assert state.is_valid('B-PER')
 | 
						|
    assert state.is_valid('B-LOC')
 | 
						|
    assert state.is_valid('U-PER')
 | 
						|
    assert state.is_valid('U-LOC')
 | 
						|
    assert state.is_valid('O')
 | 
						|
    assert not state.is_valid('L-PER')
 | 
						|
    assert not state.is_valid('I-PER')
 | 
						|
 | 
						|
 | 
						|
def test_unit(state, sentence):
 | 
						|
    assert state.n_ents == 0
 | 
						|
    state.transition('U-PER')
 | 
						|
    assert state.n_ents == 1
 | 
						|
    assert state.i == 1
 | 
						|
    assert not state.open_entity
 | 
						|
    assert state.is_valid('B-PER')
 | 
						|
    assert state.is_valid('B-LOC')
 | 
						|
    assert state.is_valid('U-PER')
 | 
						|
    assert state.is_valid('U-LOC')
 | 
						|
    assert state.is_valid('O')
 | 
						|
    assert not state.is_valid('I-PER')
 | 
						|
    assert not state.is_valid('L-PER')
 | 
						|
 | 
						|
 | 
						|
def test_out(state, sentence):
 | 
						|
    assert state.n_ents == 0
 | 
						|
    state.transition('U-PER')
 | 
						|
    assert state.n_ents == 1
 | 
						|
    assert state.i == 1
 | 
						|
    state.transition('O')
 | 
						|
    assert state.i == 2
 | 
						|
    assert not state.open_entity
 | 
						|
    assert state.is_valid('B-PER')
 | 
						|
    assert state.is_valid('B-LOC')
 | 
						|
    assert state.is_valid('U-PER')
 | 
						|
    assert state.is_valid('U-LOC')
 | 
						|
    assert state.is_valid('O')
 | 
						|
    assert not state.is_valid('I-PER')
 | 
						|
    assert not state.is_valid('L-PER')
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def golds(sentence):
 | 
						|
    g = ['B-PER', 'L-PER', 'O', 'U-PER', 'O']
 | 
						|
    assert len(g) == len(sentence)
 | 
						|
    return g
 | 
						|
 | 
						|
 | 
						|
def test_oracle_gold(state, sentence, golds):
 | 
						|
    state.set_golds(golds)
 | 
						|
    assert state.is_gold('B-PER')
 | 
						|
    assert not state.is_gold('B-LOC')
 | 
						|
    assert not state.is_gold('I-PER')
 | 
						|
    assert not state.is_gold('L-PER')
 | 
						|
    assert not state.is_gold('U-PER')
 | 
						|
    assert not state.is_gold('O')
 | 
						|
    state.transition('B-PER')
 | 
						|
    assert state.is_gold('L-PER')
 | 
						|
    state.transition('L-PER')
 | 
						|
    assert state.is_gold('O')
 | 
						|
    assert not state.is_gold('B-PER')
 | 
						|
    state.transition('O')
 | 
						|
    assert not state.is_gold('B-PER')
 | 
						|
    assert not state.is_gold('O')
 | 
						|
    assert state.is_gold('U-PER')
 | 
						|
    state.transition('U-PER')
 | 
						|
    assert state.is_gold('O')
 | 
						|
    state.transition('O')
 | 
						|
    assert state.i == len(sentence)
 | 
						|
 | 
						|
 | 
						|
def test_oracle_miss_entity(state, sentence, golds):
 | 
						|
    state.set_golds(golds)
 | 
						|
    state.transition('O')
 | 
						|
    assert not state.is_gold('L-PER')
 | 
						|
    assert not state.is_gold('U-PER')
 | 
						|
    assert not state.is_gold('I-PER')
 | 
						|
    assert not state.is_gold('B-PER')
 | 
						|
    assert state.is_gold('O')
 | 
						|
    state.transition('O')
 | 
						|
    state.transition('O')
 | 
						|
    assert state.is_gold('U-PER')
 | 
						|
 | 
						|
 | 
						|
def test_oracle_extend_entity(state, sentence, golds):
 | 
						|
    state.set_golds(golds)
 | 
						|
    state.transition('B-PER')
 | 
						|
    assert not state.is_gold('I-PER')
 | 
						|
    state.transition('I-PER')
 | 
						|
    assert state.is_gold('L-PER')
 | 
						|
    assert not state.is_gold('I-PER')
 |