mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Reorganize tests
This commit is contained in:
parent
2ef3555d88
commit
d37dca72dd
|
@ -1,35 +0,0 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
from spacy import en
|
||||
from spacy.lexeme import lex_of
|
||||
|
||||
from spacy import LEX, NORM, SHAPE, LAST3
|
||||
|
||||
|
||||
def test_group_by_lex():
|
||||
tokens = en.tokenize("I like the red one and I like the blue one")
|
||||
names, hashes, groups = tokens.group_by(LEX)
|
||||
|
||||
assert len(groups[0]) == 2
|
||||
assert en.unhash(lex_of(groups[0][0])) == 'I'
|
||||
assert names[0] == 'I'
|
||||
assert len(groups[1]) == 2
|
||||
assert en.unhash(lex_of(groups[1][0])) == 'like'
|
||||
assert names[1] == "like"
|
||||
assert len(groups[2]) == 2
|
||||
assert len(groups[3]) == 1
|
||||
|
||||
|
||||
def test_group_by_last3():
|
||||
tokens = en.tokenize("I the blithe swarthy mate ate on the filthy deck")
|
||||
names, hashes, groups = tokens.group_by(LAST3)
|
||||
|
||||
assert len(groups[0]) == 1
|
||||
assert en.unhash(lex_of(groups[0][0])) == 'I'
|
||||
assert len(groups[1]) == 3
|
||||
assert en.unhash(lex_of(groups[1][0])) == 'the'
|
||||
assert len(groups[2]) == 2
|
||||
assert len(groups[3]) == 2
|
||||
assert len(groups[4]) == 1
|
|
@ -1,156 +0,0 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.ner.pystate import PyState
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def labels():
|
||||
ent_types = ['LOC', 'MISC', 'ORG', 'PER']
|
||||
moves = ['B', 'I', 'L', 'U']
|
||||
labels = ['NULL', 'EOL', 'O']
|
||||
for move in moves:
|
||||
for ent_type in ent_types:
|
||||
labels.append('%s-%s' % (move, ent_type))
|
||||
return labels
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sentence():
|
||||
return "Ms. Haag plays Elianti .".split()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def state(labels, sentence):
|
||||
return PyState(labels, len(sentence))
|
||||
|
||||
|
||||
def test_begin(state, sentence):
|
||||
assert state.n_ents == 0
|
||||
assert state.i == 0
|
||||
state.transition('B-PER')
|
||||
assert state.n_ents == 0
|
||||
assert state.i == 1
|
||||
assert state.open_entity
|
||||
assert state.ent == {'start': 0, 'label': 4, 'end': 0}
|
||||
assert state.is_valid('I-PER')
|
||||
assert not state.is_valid('I-LOC')
|
||||
assert state.is_valid('L-PER')
|
||||
assert not state.is_valid('L-LOC')
|
||||
assert not state.is_valid('O')
|
||||
assert not state.is_valid('U-PER')
|
||||
|
||||
|
||||
def test_in(state, sentence):
|
||||
state.transition('B-PER')
|
||||
assert state.n_ents == 0
|
||||
state.transition('I-PER')
|
||||
assert state.n_ents == 0
|
||||
assert state.i == 2
|
||||
assert state.is_valid('I-PER')
|
||||
assert state.is_valid('L-PER')
|
||||
assert not state.is_valid('B-PER')
|
||||
assert not state.is_valid('I-LOC')
|
||||
assert not state.is_valid('L-LOC')
|
||||
assert not state.is_valid('U-PER')
|
||||
assert not state.is_valid('O')
|
||||
|
||||
|
||||
def test_last(state, sentence):
|
||||
state.transition('B-PER')
|
||||
assert state.n_ents == 0
|
||||
state.transition('L-PER')
|
||||
assert state.n_ents == 1
|
||||
assert state.i == 2
|
||||
assert not state.open_entity
|
||||
assert state.is_valid('B-PER')
|
||||
assert state.is_valid('B-LOC')
|
||||
assert state.is_valid('U-PER')
|
||||
assert state.is_valid('U-LOC')
|
||||
assert state.is_valid('O')
|
||||
assert not state.is_valid('L-PER')
|
||||
assert not state.is_valid('I-PER')
|
||||
|
||||
|
||||
def test_unit(state, sentence):
|
||||
assert state.n_ents == 0
|
||||
state.transition('U-PER')
|
||||
assert state.n_ents == 1
|
||||
assert state.i == 1
|
||||
assert not state.open_entity
|
||||
assert state.is_valid('B-PER')
|
||||
assert state.is_valid('B-LOC')
|
||||
assert state.is_valid('U-PER')
|
||||
assert state.is_valid('U-LOC')
|
||||
assert state.is_valid('O')
|
||||
assert not state.is_valid('I-PER')
|
||||
assert not state.is_valid('L-PER')
|
||||
|
||||
|
||||
def test_out(state, sentence):
|
||||
assert state.n_ents == 0
|
||||
state.transition('U-PER')
|
||||
assert state.n_ents == 1
|
||||
assert state.i == 1
|
||||
state.transition('O')
|
||||
assert state.i == 2
|
||||
assert not state.open_entity
|
||||
assert state.is_valid('B-PER')
|
||||
assert state.is_valid('B-LOC')
|
||||
assert state.is_valid('U-PER')
|
||||
assert state.is_valid('U-LOC')
|
||||
assert state.is_valid('O')
|
||||
assert not state.is_valid('I-PER')
|
||||
assert not state.is_valid('L-PER')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def golds(sentence):
|
||||
g = ['B-PER', 'L-PER', 'O', 'U-PER', 'O']
|
||||
assert len(g) == len(sentence)
|
||||
return g
|
||||
|
||||
|
||||
def test_oracle_gold(state, sentence, golds):
|
||||
state.set_golds(golds)
|
||||
assert state.is_gold('B-PER')
|
||||
assert not state.is_gold('B-LOC')
|
||||
assert not state.is_gold('I-PER')
|
||||
assert not state.is_gold('L-PER')
|
||||
assert not state.is_gold('U-PER')
|
||||
assert not state.is_gold('O')
|
||||
state.transition('B-PER')
|
||||
assert state.is_gold('L-PER')
|
||||
state.transition('L-PER')
|
||||
assert state.is_gold('O')
|
||||
assert not state.is_gold('B-PER')
|
||||
state.transition('O')
|
||||
assert not state.is_gold('B-PER')
|
||||
assert not state.is_gold('O')
|
||||
assert state.is_gold('U-PER')
|
||||
state.transition('U-PER')
|
||||
assert state.is_gold('O')
|
||||
state.transition('O')
|
||||
assert state.i == len(sentence)
|
||||
|
||||
|
||||
def test_oracle_miss_entity(state, sentence, golds):
|
||||
state.set_golds(golds)
|
||||
state.transition('O')
|
||||
assert not state.is_gold('L-PER')
|
||||
assert not state.is_gold('U-PER')
|
||||
assert not state.is_gold('I-PER')
|
||||
assert not state.is_gold('B-PER')
|
||||
assert state.is_gold('O')
|
||||
state.transition('O')
|
||||
state.transition('O')
|
||||
assert state.is_gold('U-PER')
|
||||
|
||||
|
||||
def test_oracle_extend_entity(state, sentence, golds):
|
||||
state.set_golds(golds)
|
||||
state.transition('B-PER')
|
||||
assert not state.is_gold('I-PER')
|
||||
state.transition('I-PER')
|
||||
assert state.is_gold('L-PER')
|
||||
assert not state.is_gold('I-PER')
|
|
@ -4,22 +4,25 @@ from __future__ import unicode_literals
|
|||
from spacy.en import English
|
||||
import pytest
|
||||
|
||||
NLU = English()
|
||||
|
||||
def orths(tokens):
|
||||
return [t.orth_ for t in tokens]
|
||||
|
||||
|
||||
def test_simple_two():
|
||||
tokens = NLU('I lost money and pride.', tag=True, parse=False)
|
||||
nlp = English()
|
||||
tokens = nlp('I lost money and pride.', tag=True, parse=True)
|
||||
pride = tokens[4]
|
||||
for t in tokens:
|
||||
print t.orth_, t.tag_, t.head.orth_
|
||||
assert orths(pride.conjuncts) == ['money', 'pride']
|
||||
money = tokens[2]
|
||||
assert orths(money.conjuncts) == ['money', 'pride']
|
||||
|
||||
|
||||
def test_comma_three():
|
||||
tokens = NLU('I found my wallet, phone and keys.')
|
||||
nlp = English()
|
||||
tokens = nlp('I found my wallet, phone and keys.')
|
||||
keys = tokens[-2]
|
||||
assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
|
||||
wallet = tokens[3]
|
Loading…
Reference in New Issue
Block a user