Update tests

Remove beam test Update test Unskip Unskip tests
2025-07-04 11:53:09 +03:00 · 2020-06-21 01:12:28 +02:00 · 2020-06-21 01:12:28 +02:00 · 6fa9a19e52
commit 6fa9a19e52
parent 5ca4c19ef2
5 changed files with 6 additions and 124 deletions
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -44,8 +44,6 @@ def _train_parser(parser):
    return parser
 # Segfaulting due to refactor. Need to fix.
@pytest.mark.skip
 def test_add_label(parser):
    parser = _train_parser(parser)
    parser.add_label("right")
@ -64,8 +62,6 @@ def test_add_label(parser):
    assert doc[2].dep_ == "left"
 # segfaulting due to refactor. need to fix.
@pytest.mark.skip
 def test_add_label_deserializes_correctly():
    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
    ner1 = EntityRecognizer(Vocab(), default_ner(), **config)
@ -82,8 +78,6 @@ def test_add_label_deserializes_correctly():
    for i in range(ner1.moves.n_moves):
        assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
 # segfaulting due to refactor. need to fix.
@pytest.mark.skip
@pytest.mark.parametrize(
    "pipe_cls,n_moves,model",
    [(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())],
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@ -13,8 +13,9 @@ from spacy.syntax.arc_eager import ArcEager
 def get_sequence_costs(M, words, heads, deps, transitions):
    doc = Doc(Vocab(), words=words)
    example = Example.from_dict(doc, {"heads": heads, "deps": deps})
-    state = StateClass(doc)
+    states, golds, _ = M.init_gold_batch([example])
-    M.preprocess_gold(example)
+    state = states[0]
    gold = golds[0]
    cost_history = []
    for gold_action in transitions:
        state_costs = {}
@ -23,6 +24,7 @@ def get_sequence_costs(M, words, heads, deps, transitions):
            state_costs[name] = M.get_cost(state, gold, i)
        M.transition(state, gold_action)
        cost_history.append(state_costs)
        gold.update(state)
    return state, cost_history
@ -59,7 +61,6 @@ def gold(doc, words):
        raise NotImplementedError
@pytest.mark.xfail
 def test_oracle_four_words(arc_eager, vocab):
    words = ["a", "b", "c", "d"]
    heads = [1, 1, 3, 3]
@ -144,12 +145,11 @@ def test_get_oracle_actions():
    parser.moves.add_action(1, "")
    parser.moves.add_action(1, "")
    parser.moves.add_action(4, "ROOT")
    heads, deps = projectivize(heads, deps)
    for i, (head, dep) in enumerate(zip(heads, deps)):
        if head > i:
            parser.moves.add_action(2, dep)
        elif head < i:
            parser.moves.add_action(3, dep)
    heads, deps = projectivize(heads, deps)
    example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps})
    parser.moves.preprocess_gold(example)
    parser.moves.get_oracle_sequence(example)
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@ -1,100 +0,0 @@
 import pytest
 import numpy
 from spacy.vocab import Vocab
 from spacy.language import Language
 from spacy.pipeline.defaults import default_parser
 from spacy.pipeline import DependencyParser
 from spacy.syntax.arc_eager import ArcEager
 from spacy.tokens import Doc
 from spacy.syntax.stateclass import StateClass
@pytest.fixture
 def vocab():
    return Vocab()
@pytest.fixture
 def moves(vocab):
    aeager = ArcEager(vocab.strings, {})
    aeager.add_action(2, "nsubj")
    aeager.add_action(3, "dobj")
    aeager.add_action(2, "aux")
    return aeager
@pytest.fixture
 def docs(vocab):
    return [Doc(vocab, words=["Rats", "bite", "things"])]
@pytest.fixture
 def states(docs):
    return [StateClass(doc) for doc in docs]
@pytest.fixture
 def tokvecs(docs, vector_size):
    output = []
    for doc in docs:
        vec = numpy.random.uniform(-0.1, 0.1, (len(doc), vector_size))
        output.append(numpy.asarray(vec))
    return output
@pytest.fixture
 def batch_size(docs):
    return len(docs)
@pytest.fixture
 def beam_width():
    return 4
@pytest.fixture
 def vector_size():
    return 6
@pytest.fixture
 def beam(moves, states, golds, beam_width):
    return ParserBeam(moves, states, golds, width=beam_width, density=0.0)
@pytest.fixture
 def scores(moves, batch_size, beam_width):
    return [
        numpy.asarray(
            numpy.random.uniform(-0.1, 0.1, (batch_size, moves.n_moves)), dtype="f"
        )
        for _ in range(batch_size)
    ]
 # All tests below are skipped after removing Beam stuff during the Example/GoldParse refactor
@pytest.mark.skip
 def test_create_beam(beam):
    pass
@pytest.mark.skip
 def test_beam_advance(beam, scores):
    beam.advance(scores)
@pytest.mark.skip
 def test_beam_advance_too_few_scores(beam, scores):
    with pytest.raises(IndexError):
        beam.advance(scores[:-1])
@pytest.mark.skip
 def test_beam_parse():
    nlp = Language()
    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width":  1, "beam_update_prob": 1.0}
    nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser")
    nlp.parser.add_label("nsubj")
    nlp.parser.begin_training([], token_vector_width=8, hidden_width=8)
    doc = nlp.make_doc("Australia is a country")
    nlp.parser(doc, beam_width=2)
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@ -22,7 +22,6 @@ TRAIN_DATA = [
 ]
@pytest.mark.skip # Segfault
 def test_parser_root(en_tokenizer):
    text = "i don't have other assistance"
    heads = [3, 2, 1, 0, 1, -2]
@ -33,9 +32,8 @@ def test_parser_root(en_tokenizer):
        assert t.dep != 0, t.text
-#@pytest.mark.xfail
+@pytest.mark.xfail
 #@pytest.mark.parametrize("text", ["Hello"])
@pytest.mark.skip # Segfault
 def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
    tokens = en_tokenizer(text)
    doc = get_doc(
@ -48,7 +46,6 @@ def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text):
    assert doc[0].dep != 0
@pytest.mark.skip # Segfault
 def test_parser_initial(en_tokenizer, en_parser):
    text = "I ate the pizza with anchovies."
    # heads = [1, 0, 1, -2, -3, -1, -5]
@ -61,7 +58,6 @@ def test_parser_initial(en_tokenizer, en_parser):
    assert tokens[3].head.i == 3
@pytest.mark.skip # Segfault
 def test_parser_parse_subtrees(en_tokenizer, en_parser):
    text = "The four wheels on the bus turned quickly"
    heads = [2, 1, 4, -1, 1, -2, 0, -1]
@ -76,7 +72,6 @@ def test_parser_parse_subtrees(en_tokenizer, en_parser):
    assert len(list(doc[2].subtree)) == 6
@pytest.mark.skip # Segfault
 def test_parser_merge_pp(en_tokenizer):
    text = "A phrase with another phrase occurs"
    heads = [1, 4, -1, 1, -2, 0]
@ -95,7 +90,6 @@ def test_parser_merge_pp(en_tokenizer):
    assert doc[3].text == "occurs"
@pytest.mark.skip # Segfault
 def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
    text = "a b c d e"
@ -170,7 +164,6 @@ def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser):
    assert tokens[4].head.i == 4
@pytest.mark.skip # Segfault
 def test_parser_set_sent_starts(en_vocab):
    # fmt: off
    words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n']
@ -187,7 +180,6 @@ def test_parser_set_sent_starts(en_vocab):
        for token in sent:
            assert token.head in sent
@pytest.mark.skip
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly
    nlp = English()
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@ -33,14 +33,12 @@ def parser(vocab):
    return parser
@pytest.mark.skip # Segfaults
 def test_no_sentences(parser):
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc = parser(doc)
    assert len(list(doc.sents)) >= 1
@pytest.mark.skip # Segfaults
 def test_sents_1(parser):
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc[2].sent_start = True
@ -54,7 +52,6 @@ def test_sents_1(parser):
    assert len(list(doc.sents)) == 2
@pytest.mark.skip # Segfaults
 def test_sents_1_2(parser):
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc[1].sent_start = True
@ -63,7 +60,6 @@ def test_sents_1_2(parser):
    assert len(list(doc.sents)) >= 3
@pytest.mark.skip # Segfaults
 def test_sents_1_3(parser):
    doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
    doc[1].sent_start = True