Update tests

Remove beam test

Update test

Unskip

Unskip tests
This commit is contained in:
Matthew Honnibal 2020-06-21 01:12:28 +02:00
parent 5ca4c19ef2
commit 6fa9a19e52
5 changed files with 6 additions and 124 deletions

View File

@ -44,8 +44,6 @@ def _train_parser(parser):
return parser return parser
# Segfaulting due to refactor. Need to fix.
@pytest.mark.skip
def test_add_label(parser): def test_add_label(parser):
parser = _train_parser(parser) parser = _train_parser(parser)
parser.add_label("right") parser.add_label("right")
@ -64,8 +62,6 @@ def test_add_label(parser):
assert doc[2].dep_ == "left" assert doc[2].dep_ == "left"
# segfaulting due to refactor. need to fix.
@pytest.mark.skip
def test_add_label_deserializes_correctly(): def test_add_label_deserializes_correctly():
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0} config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
ner1 = EntityRecognizer(Vocab(), default_ner(), **config) ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
@ -82,8 +78,6 @@ def test_add_label_deserializes_correctly():
for i in range(ner1.moves.n_moves): for i in range(ner1.moves.n_moves):
assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i) assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
# segfaulting due to refactor. need to fix.
@pytest.mark.skip
@pytest.mark.parametrize( @pytest.mark.parametrize(
"pipe_cls,n_moves,model", "pipe_cls,n_moves,model",
[(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())], [(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],

View File

@ -13,8 +13,9 @@ from spacy.syntax.arc_eager import ArcEager
def get_sequence_costs(M, words, heads, deps, transitions): def get_sequence_costs(M, words, heads, deps, transitions):
doc = Doc(Vocab(), words=words) doc = Doc(Vocab(), words=words)
example = Example.from_dict(doc, {"heads": heads, "deps": deps}) example = Example.from_dict(doc, {"heads": heads, "deps": deps})
state = StateClass(doc) states, golds, _ = M.init_gold_batch([example])
M.preprocess_gold(example) state = states[0]
gold = golds[0]
cost_history = [] cost_history = []
for gold_action in transitions: for gold_action in transitions:
state_costs = {} state_costs = {}
@ -23,6 +24,7 @@ def get_sequence_costs(M, words, heads, deps, transitions):
state_costs[name] = M.get_cost(state, gold, i) state_costs[name] = M.get_cost(state, gold, i)
M.transition(state, gold_action) M.transition(state, gold_action)
cost_history.append(state_costs) cost_history.append(state_costs)
gold.update(state)
return state, cost_history return state, cost_history
@ -59,7 +61,6 @@ def gold(doc, words):
raise NotImplementedError raise NotImplementedError
@pytest.mark.xfail
def test_oracle_four_words(arc_eager, vocab): def test_oracle_four_words(arc_eager, vocab):
words = ["a", "b", "c", "d"] words = ["a", "b", "c", "d"]
heads = [1, 1, 3, 3] heads = [1, 1, 3, 3]
@ -144,12 +145,11 @@ def test_get_oracle_actions():
parser.moves.add_action(1, "") parser.moves.add_action(1, "")
parser.moves.add_action(1, "") parser.moves.add_action(1, "")
parser.moves.add_action(4, "ROOT") parser.moves.add_action(4, "ROOT")
heads, deps = projectivize(heads, deps)
for i, (head, dep) in enumerate(zip(heads, deps)): for i, (head, dep) in enumerate(zip(heads, deps)):
if head > i: if head > i:
parser.moves.add_action(2, dep) parser.moves.add_action(2, dep)
elif head < i: elif head < i:
parser.moves.add_action(3, dep) parser.moves.add_action(3, dep)
heads, deps = projectivize(heads, deps)
example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps}) example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
parser.moves.preprocess_gold(example)
parser.moves.get_oracle_sequence(example) parser.moves.get_oracle_sequence(example)

View File

@ -1,100 +0,0 @@
import pytest
import numpy
from spacy.vocab import Vocab
from spacy.language import Language
from spacy.pipeline.defaults import default_parser
from spacy.pipeline import DependencyParser
from spacy.syntax.arc_eager import ArcEager
from spacy.tokens import Doc
from spacy.syntax.stateclass import StateClass
@pytest.fixture
def vocab():
return Vocab()
@pytest.fixture
def moves(vocab):
aeager = ArcEager(vocab.strings, {})
aeager.add_action(2, "nsubj")
aeager.add_action(3, "dobj")
aeager.add_action(2, "aux")
return aeager
@pytest.fixture
def docs(vocab):
return [Doc(vocab, words=["Rats", "bite", "things"])]
@pytest.fixture
def states(docs):
return [StateClass(doc) for doc in docs]
@pytest.fixture
def tokvecs(docs, vector_size):
output = []
for doc in docs:
vec = numpy.random.uniform(-0.1, 0.1, (len(doc), vector_size))
output.append(numpy.asarray(vec))
return output
@pytest.fixture
def batch_size(docs):
return len(docs)
@pytest.fixture
def beam_width():
return 4
@pytest.fixture
def vector_size():
return 6
@pytest.fixture
def beam(moves, states, golds, beam_width):
return ParserBeam(moves, states, golds, width=beam_width, density=0.0)
@pytest.fixture
def scores(moves, batch_size, beam_width):
return [
numpy.asarray(
numpy.random.uniform(-0.1, 0.1, (batch_size, moves.n_moves)), dtype="f"
)
for _ in range(batch_size)
]
# All tests below are skipped after removing Beam stuff during the Example/GoldParse refactor
@pytest.mark.skip
def test_create_beam(beam):
pass
@pytest.mark.skip
def test_beam_advance(beam, scores):
beam.advance(scores)
@pytest.mark.skip
def test_beam_advance_too_few_scores(beam, scores):
with pytest.raises(IndexError):
beam.advance(scores[:-1])
@pytest.mark.skip
def test_beam_parse():
nlp = Language()
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser")
nlp.parser.add_label("nsubj")
nlp.parser.begin_training([], token_vector_width=8, hidden_width=8)
doc = nlp.make_doc("Australia is a country")
nlp.parser(doc, beam_width=2)

View File

@ -22,7 +22,6 @@ TRAIN_DATA = [
] ]
@pytest.mark.skip # Segfault
def test_parser_root(en_tokenizer): def test_parser_root(en_tokenizer):
text = "i don't have other assistance" text = "i don't have other assistance"
heads = [3, 2, 1, 0, 1, -2] heads = [3, 2, 1, 0, 1, -2]
@ -33,9 +32,8 @@ def test_parser_root(en_tokenizer):
assert t.dep != 0, t.text assert t.dep != 0, t.text
#@pytest.mark.xfail @pytest.mark.xfail
#@pytest.mark.parametrize("text", ["Hello"]) #@pytest.mark.parametrize("text", ["Hello"])
@pytest.mark.skip # Segfault
def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text): def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
tokens = en_tokenizer(text) tokens = en_tokenizer(text)
doc = get_doc( doc = get_doc(
@ -48,7 +46,6 @@ def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
assert doc[0].dep != 0 assert doc[0].dep != 0
@pytest.mark.skip # Segfault
def test_parser_initial(en_tokenizer, en_parser): def test_parser_initial(en_tokenizer, en_parser):
text = "I ate the pizza with anchovies." text = "I ate the pizza with anchovies."
# heads = [1, 0, 1, -2, -3, -1, -5] # heads = [1, 0, 1, -2, -3, -1, -5]
@ -61,7 +58,6 @@ def test_parser_initial(en_tokenizer, en_parser):
assert tokens[3].head.i == 3 assert tokens[3].head.i == 3
@pytest.mark.skip # Segfault
def test_parser_parse_subtrees(en_tokenizer, en_parser): def test_parser_parse_subtrees(en_tokenizer, en_parser):
text = "The four wheels on the bus turned quickly" text = "The four wheels on the bus turned quickly"
heads = [2, 1, 4, -1, 1, -2, 0, -1] heads = [2, 1, 4, -1, 1, -2, 0, -1]
@ -76,7 +72,6 @@ def test_parser_parse_subtrees(en_tokenizer, en_parser):
assert len(list(doc[2].subtree)) == 6 assert len(list(doc[2].subtree)) == 6
@pytest.mark.skip # Segfault
def test_parser_merge_pp(en_tokenizer): def test_parser_merge_pp(en_tokenizer):
text = "A phrase with another phrase occurs" text = "A phrase with another phrase occurs"
heads = [1, 4, -1, 1, -2, 0] heads = [1, 4, -1, 1, -2, 0]
@ -95,7 +90,6 @@ def test_parser_merge_pp(en_tokenizer):
assert doc[3].text == "occurs" assert doc[3].text == "occurs"
@pytest.mark.skip # Segfault
def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser): def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
text = "a b c d e" text = "a b c d e"
@ -170,7 +164,6 @@ def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
assert tokens[4].head.i == 4 assert tokens[4].head.i == 4
@pytest.mark.skip # Segfault
def test_parser_set_sent_starts(en_vocab): def test_parser_set_sent_starts(en_vocab):
# fmt: off # fmt: off
words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n'] words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n']
@ -187,7 +180,6 @@ def test_parser_set_sent_starts(en_vocab):
for token in sent: for token in sent:
assert token.head in sent assert token.head in sent
@pytest.mark.skip
def test_overfitting_IO(): def test_overfitting_IO():
# Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
nlp = English() nlp = English()

View File

@ -33,14 +33,12 @@ def parser(vocab):
return parser return parser
@pytest.mark.skip # Segfaults
def test_no_sentences(parser): def test_no_sentences(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc = parser(doc) doc = parser(doc)
assert len(list(doc.sents)) >= 1 assert len(list(doc.sents)) >= 1
@pytest.mark.skip # Segfaults
def test_sents_1(parser): def test_sents_1(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[2].sent_start = True doc[2].sent_start = True
@ -54,7 +52,6 @@ def test_sents_1(parser):
assert len(list(doc.sents)) == 2 assert len(list(doc.sents)) == 2
@pytest.mark.skip # Segfaults
def test_sents_1_2(parser): def test_sents_1_2(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = True doc[1].sent_start = True
@ -63,7 +60,6 @@ def test_sents_1_2(parser):
assert len(list(doc.sents)) >= 3 assert len(list(doc.sents)) >= 3
@pytest.mark.skip # Segfaults
def test_sents_1_3(parser): def test_sents_1_3(parser):
doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
doc[1].sent_start = True doc[1].sent_start = True