mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
Update tests
Remove beam test Update test Unskip Unskip tests
This commit is contained in:
parent
5ca4c19ef2
commit
6fa9a19e52
|
@ -44,8 +44,6 @@ def _train_parser(parser):
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
# Segfaulting due to refactor. Need to fix.
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_add_label(parser):
|
def test_add_label(parser):
|
||||||
parser = _train_parser(parser)
|
parser = _train_parser(parser)
|
||||||
parser.add_label("right")
|
parser.add_label("right")
|
||||||
|
@ -64,8 +62,6 @@ def test_add_label(parser):
|
||||||
assert doc[2].dep_ == "left"
|
assert doc[2].dep_ == "left"
|
||||||
|
|
||||||
|
|
||||||
# segfaulting due to refactor. need to fix.
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_add_label_deserializes_correctly():
|
def test_add_label_deserializes_correctly():
|
||||||
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
||||||
ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
|
ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
|
||||||
|
@ -82,8 +78,6 @@ def test_add_label_deserializes_correctly():
|
||||||
for i in range(ner1.moves.n_moves):
|
for i in range(ner1.moves.n_moves):
|
||||||
assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
|
assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
|
||||||
|
|
||||||
# segfaulting due to refactor. need to fix.
|
|
||||||
@pytest.mark.skip
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"pipe_cls,n_moves,model",
|
"pipe_cls,n_moves,model",
|
||||||
[(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],
|
[(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],
|
||||||
|
|
|
@ -13,8 +13,9 @@ from spacy.syntax.arc_eager import ArcEager
|
||||||
def get_sequence_costs(M, words, heads, deps, transitions):
|
def get_sequence_costs(M, words, heads, deps, transitions):
|
||||||
doc = Doc(Vocab(), words=words)
|
doc = Doc(Vocab(), words=words)
|
||||||
example = Example.from_dict(doc, {"heads": heads, "deps": deps})
|
example = Example.from_dict(doc, {"heads": heads, "deps": deps})
|
||||||
state = StateClass(doc)
|
states, golds, _ = M.init_gold_batch([example])
|
||||||
M.preprocess_gold(example)
|
state = states[0]
|
||||||
|
gold = golds[0]
|
||||||
cost_history = []
|
cost_history = []
|
||||||
for gold_action in transitions:
|
for gold_action in transitions:
|
||||||
state_costs = {}
|
state_costs = {}
|
||||||
|
@ -23,6 +24,7 @@ def get_sequence_costs(M, words, heads, deps, transitions):
|
||||||
state_costs[name] = M.get_cost(state, gold, i)
|
state_costs[name] = M.get_cost(state, gold, i)
|
||||||
M.transition(state, gold_action)
|
M.transition(state, gold_action)
|
||||||
cost_history.append(state_costs)
|
cost_history.append(state_costs)
|
||||||
|
gold.update(state)
|
||||||
return state, cost_history
|
return state, cost_history
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,7 +61,6 @@ def gold(doc, words):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
def test_oracle_four_words(arc_eager, vocab):
|
def test_oracle_four_words(arc_eager, vocab):
|
||||||
words = ["a", "b", "c", "d"]
|
words = ["a", "b", "c", "d"]
|
||||||
heads = [1, 1, 3, 3]
|
heads = [1, 1, 3, 3]
|
||||||
|
@ -144,12 +145,11 @@ def test_get_oracle_actions():
|
||||||
parser.moves.add_action(1, "")
|
parser.moves.add_action(1, "")
|
||||||
parser.moves.add_action(1, "")
|
parser.moves.add_action(1, "")
|
||||||
parser.moves.add_action(4, "ROOT")
|
parser.moves.add_action(4, "ROOT")
|
||||||
|
heads, deps = projectivize(heads, deps)
|
||||||
for i, (head, dep) in enumerate(zip(heads, deps)):
|
for i, (head, dep) in enumerate(zip(heads, deps)):
|
||||||
if head > i:
|
if head > i:
|
||||||
parser.moves.add_action(2, dep)
|
parser.moves.add_action(2, dep)
|
||||||
elif head < i:
|
elif head < i:
|
||||||
parser.moves.add_action(3, dep)
|
parser.moves.add_action(3, dep)
|
||||||
heads, deps = projectivize(heads, deps)
|
|
||||||
example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
|
example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
|
||||||
parser.moves.preprocess_gold(example)
|
|
||||||
parser.moves.get_oracle_sequence(example)
|
parser.moves.get_oracle_sequence(example)
|
||||||
|
|
|
@ -1,100 +0,0 @@
|
||||||
import pytest
|
|
||||||
import numpy
|
|
||||||
from spacy.vocab import Vocab
|
|
||||||
from spacy.language import Language
|
|
||||||
from spacy.pipeline.defaults import default_parser
|
|
||||||
from spacy.pipeline import DependencyParser
|
|
||||||
from spacy.syntax.arc_eager import ArcEager
|
|
||||||
from spacy.tokens import Doc
|
|
||||||
from spacy.syntax.stateclass import StateClass
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def vocab():
|
|
||||||
return Vocab()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def moves(vocab):
|
|
||||||
aeager = ArcEager(vocab.strings, {})
|
|
||||||
aeager.add_action(2, "nsubj")
|
|
||||||
aeager.add_action(3, "dobj")
|
|
||||||
aeager.add_action(2, "aux")
|
|
||||||
return aeager
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def docs(vocab):
|
|
||||||
return [Doc(vocab, words=["Rats", "bite", "things"])]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def states(docs):
|
|
||||||
return [StateClass(doc) for doc in docs]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def tokvecs(docs, vector_size):
|
|
||||||
output = []
|
|
||||||
for doc in docs:
|
|
||||||
vec = numpy.random.uniform(-0.1, 0.1, (len(doc), vector_size))
|
|
||||||
output.append(numpy.asarray(vec))
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def batch_size(docs):
|
|
||||||
return len(docs)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def beam_width():
|
|
||||||
return 4
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def vector_size():
|
|
||||||
return 6
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def beam(moves, states, golds, beam_width):
|
|
||||||
return ParserBeam(moves, states, golds, width=beam_width, density=0.0)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def scores(moves, batch_size, beam_width):
|
|
||||||
return [
|
|
||||||
numpy.asarray(
|
|
||||||
numpy.random.uniform(-0.1, 0.1, (batch_size, moves.n_moves)), dtype="f"
|
|
||||||
)
|
|
||||||
for _ in range(batch_size)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# All tests below are skipped after removing Beam stuff during the Example/GoldParse refactor
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_create_beam(beam):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_beam_advance(beam, scores):
|
|
||||||
beam.advance(scores)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_beam_advance_too_few_scores(beam, scores):
|
|
||||||
with pytest.raises(IndexError):
|
|
||||||
beam.advance(scores[:-1])
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_beam_parse():
|
|
||||||
nlp = Language()
|
|
||||||
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
|
||||||
nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser")
|
|
||||||
nlp.parser.add_label("nsubj")
|
|
||||||
nlp.parser.begin_training([], token_vector_width=8, hidden_width=8)
|
|
||||||
doc = nlp.make_doc("Australia is a country")
|
|
||||||
nlp.parser(doc, beam_width=2)
|
|
|
@ -22,7 +22,6 @@ TRAIN_DATA = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_root(en_tokenizer):
|
def test_parser_root(en_tokenizer):
|
||||||
text = "i don't have other assistance"
|
text = "i don't have other assistance"
|
||||||
heads = [3, 2, 1, 0, 1, -2]
|
heads = [3, 2, 1, 0, 1, -2]
|
||||||
|
@ -33,9 +32,8 @@ def test_parser_root(en_tokenizer):
|
||||||
assert t.dep != 0, t.text
|
assert t.dep != 0, t.text
|
||||||
|
|
||||||
|
|
||||||
#@pytest.mark.xfail
|
@pytest.mark.xfail
|
||||||
#@pytest.mark.parametrize("text", ["Hello"])
|
#@pytest.mark.parametrize("text", ["Hello"])
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
|
def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
|
||||||
tokens = en_tokenizer(text)
|
tokens = en_tokenizer(text)
|
||||||
doc = get_doc(
|
doc = get_doc(
|
||||||
|
@ -48,7 +46,6 @@ def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
|
||||||
assert doc[0].dep != 0
|
assert doc[0].dep != 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_initial(en_tokenizer, en_parser):
|
def test_parser_initial(en_tokenizer, en_parser):
|
||||||
text = "I ate the pizza with anchovies."
|
text = "I ate the pizza with anchovies."
|
||||||
# heads = [1, 0, 1, -2, -3, -1, -5]
|
# heads = [1, 0, 1, -2, -3, -1, -5]
|
||||||
|
@ -61,7 +58,6 @@ def test_parser_initial(en_tokenizer, en_parser):
|
||||||
assert tokens[3].head.i == 3
|
assert tokens[3].head.i == 3
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_parse_subtrees(en_tokenizer, en_parser):
|
def test_parser_parse_subtrees(en_tokenizer, en_parser):
|
||||||
text = "The four wheels on the bus turned quickly"
|
text = "The four wheels on the bus turned quickly"
|
||||||
heads = [2, 1, 4, -1, 1, -2, 0, -1]
|
heads = [2, 1, 4, -1, 1, -2, 0, -1]
|
||||||
|
@ -76,7 +72,6 @@ def test_parser_parse_subtrees(en_tokenizer, en_parser):
|
||||||
assert len(list(doc[2].subtree)) == 6
|
assert len(list(doc[2].subtree)) == 6
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_merge_pp(en_tokenizer):
|
def test_parser_merge_pp(en_tokenizer):
|
||||||
text = "A phrase with another phrase occurs"
|
text = "A phrase with another phrase occurs"
|
||||||
heads = [1, 4, -1, 1, -2, 0]
|
heads = [1, 4, -1, 1, -2, 0]
|
||||||
|
@ -95,7 +90,6 @@ def test_parser_merge_pp(en_tokenizer):
|
||||||
assert doc[3].text == "occurs"
|
assert doc[3].text == "occurs"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
|
def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
|
||||||
text = "a b c d e"
|
text = "a b c d e"
|
||||||
|
|
||||||
|
@ -170,7 +164,6 @@ def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
|
||||||
assert tokens[4].head.i == 4
|
assert tokens[4].head.i == 4
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfault
|
|
||||||
def test_parser_set_sent_starts(en_vocab):
|
def test_parser_set_sent_starts(en_vocab):
|
||||||
# fmt: off
|
# fmt: off
|
||||||
words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n']
|
words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n']
|
||||||
|
@ -187,7 +180,6 @@ def test_parser_set_sent_starts(en_vocab):
|
||||||
for token in sent:
|
for token in sent:
|
||||||
assert token.head in sent
|
assert token.head in sent
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_overfitting_IO():
|
def test_overfitting_IO():
|
||||||
# Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
|
# Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -33,14 +33,12 @@ def parser(vocab):
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfaults
|
|
||||||
def test_no_sentences(parser):
|
def test_no_sentences(parser):
|
||||||
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
||||||
doc = parser(doc)
|
doc = parser(doc)
|
||||||
assert len(list(doc.sents)) >= 1
|
assert len(list(doc.sents)) >= 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfaults
|
|
||||||
def test_sents_1(parser):
|
def test_sents_1(parser):
|
||||||
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
||||||
doc[2].sent_start = True
|
doc[2].sent_start = True
|
||||||
|
@ -54,7 +52,6 @@ def test_sents_1(parser):
|
||||||
assert len(list(doc.sents)) == 2
|
assert len(list(doc.sents)) == 2
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfaults
|
|
||||||
def test_sents_1_2(parser):
|
def test_sents_1_2(parser):
|
||||||
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
||||||
doc[1].sent_start = True
|
doc[1].sent_start = True
|
||||||
|
@ -63,7 +60,6 @@ def test_sents_1_2(parser):
|
||||||
assert len(list(doc.sents)) >= 3
|
assert len(list(doc.sents)) >= 3
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip # Segfaults
|
|
||||||
def test_sents_1_3(parser):
|
def test_sents_1_3(parser):
|
||||||
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
||||||
doc[1].sent_start = True
|
doc[1].sent_start = True
|
||||||
|
|
Loading…
Reference in New Issue
Block a user