diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py index 093d4e266..4afa11963 100644 --- a/spacy/tests/parser/test_add_label.py +++ b/spacy/tests/parser/test_add_label.py @@ -44,8 +44,6 @@ def _train_parser(parser): return parser -# Segfaulting due to refactor. Need to fix. -@pytest.mark.skip def test_add_label(parser): parser = _train_parser(parser) parser.add_label("right") @@ -64,8 +62,6 @@ def test_add_label(parser): assert doc[2].dep_ == "left" -# segfaulting due to refactor. need to fix. -@pytest.mark.skip def test_add_label_deserializes_correctly(): config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0} ner1 = EntityRecognizer(Vocab(), default_ner(), **config) @@ -82,8 +78,6 @@ def test_add_label_deserializes_correctly(): for i in range(ner1.moves.n_moves): assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i) -# segfaulting due to refactor. need to fix. -@pytest.mark.skip @pytest.mark.parametrize( "pipe_cls,n_moves,model", [(DependencyParser, 5, default_parser()), (EntityRecognizer, 4, default_ner())], diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py index 39f682a34..c2ab94500 100644 --- a/spacy/tests/parser/test_arc_eager_oracle.py +++ b/spacy/tests/parser/test_arc_eager_oracle.py @@ -13,8 +13,9 @@ from spacy.syntax.arc_eager import ArcEager def get_sequence_costs(M, words, heads, deps, transitions): doc = Doc(Vocab(), words=words) example = Example.from_dict(doc, {"heads": heads, "deps": deps}) - state = StateClass(doc) - M.preprocess_gold(example) + states, golds, _ = M.init_gold_batch([example]) + state = states[0] + gold = golds[0] cost_history = [] for gold_action in transitions: state_costs = {} @@ -23,6 +24,7 @@ def get_sequence_costs(M, words, heads, deps, transitions): state_costs[name] = M.get_cost(state, gold, i) M.transition(state, gold_action) cost_history.append(state_costs) + gold.update(state) return state, cost_history @@ -59,7 +61,6 @@ def gold(doc, words): raise NotImplementedError -@pytest.mark.xfail def test_oracle_four_words(arc_eager, vocab): words = ["a", "b", "c", "d"] heads = [1, 1, 3, 3] @@ -144,12 +145,11 @@ def test_get_oracle_actions(): parser.moves.add_action(1, "") parser.moves.add_action(1, "") parser.moves.add_action(4, "ROOT") + heads, deps = projectivize(heads, deps) for i, (head, dep) in enumerate(zip(heads, deps)): if head > i: parser.moves.add_action(2, dep) elif head < i: parser.moves.add_action(3, dep) - heads, deps = projectivize(heads, deps) example = Example.from_dict(doc, {"words": words, "tags": tags, "heads": heads, "deps": deps}) - parser.moves.preprocess_gold(example) parser.moves.get_oracle_sequence(example) diff --git a/spacy/tests/parser/test_nn_beam.py b/spacy/tests/parser/test_nn_beam.py deleted file mode 100644 index 30e0264f4..000000000 --- a/spacy/tests/parser/test_nn_beam.py +++ /dev/null @@ -1,100 +0,0 @@ -import pytest -import numpy -from spacy.vocab import Vocab -from spacy.language import Language -from spacy.pipeline.defaults import default_parser -from spacy.pipeline import DependencyParser -from spacy.syntax.arc_eager import ArcEager -from spacy.tokens import Doc -from spacy.syntax.stateclass import StateClass - - -@pytest.fixture -def vocab(): - return Vocab() - - -@pytest.fixture -def moves(vocab): - aeager = ArcEager(vocab.strings, {}) - aeager.add_action(2, "nsubj") - aeager.add_action(3, "dobj") - aeager.add_action(2, "aux") - return aeager - - -@pytest.fixture -def docs(vocab): - return [Doc(vocab, words=["Rats", "bite", "things"])] - - -@pytest.fixture -def states(docs): - return [StateClass(doc) for doc in docs] - - -@pytest.fixture -def tokvecs(docs, vector_size): - output = [] - for doc in docs: - vec = numpy.random.uniform(-0.1, 0.1, (len(doc), vector_size)) - output.append(numpy.asarray(vec)) - return output - - -@pytest.fixture -def batch_size(docs): - return len(docs) - - -@pytest.fixture -def beam_width(): - return 4 - - -@pytest.fixture -def vector_size(): - return 6 - - -@pytest.fixture -def beam(moves, states, golds, beam_width): - return ParserBeam(moves, states, golds, width=beam_width, density=0.0) - - -@pytest.fixture -def scores(moves, batch_size, beam_width): - return [ - numpy.asarray( - numpy.random.uniform(-0.1, 0.1, (batch_size, moves.n_moves)), dtype="f" - ) - for _ in range(batch_size) - ] - - -# All tests below are skipped after removing Beam stuff during the Example/GoldParse refactor -@pytest.mark.skip -def test_create_beam(beam): - pass - - -@pytest.mark.skip -def test_beam_advance(beam, scores): - beam.advance(scores) - - -@pytest.mark.skip -def test_beam_advance_too_few_scores(beam, scores): - with pytest.raises(IndexError): - beam.advance(scores[:-1]) - - -@pytest.mark.skip -def test_beam_parse(): - nlp = Language() - config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0} - nlp.add_pipe(DependencyParser(nlp.vocab, default_parser(), **config), name="parser") - nlp.parser.add_label("nsubj") - nlp.parser.begin_training([], token_vector_width=8, hidden_width=8) - doc = nlp.make_doc("Australia is a country") - nlp.parser(doc, beam_width=2) diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py index 80d91e7ae..0d9e257b9 100644 --- a/spacy/tests/parser/test_parse.py +++ b/spacy/tests/parser/test_parse.py @@ -22,7 +22,6 @@ TRAIN_DATA = [ ] -@pytest.mark.skip # Segfault def test_parser_root(en_tokenizer): text = "i don't have other assistance" heads = [3, 2, 1, 0, 1, -2] @@ -33,9 +32,8 @@ def test_parser_root(en_tokenizer): assert t.dep != 0, t.text -#@pytest.mark.xfail +@pytest.mark.xfail #@pytest.mark.parametrize("text", ["Hello"]) -@pytest.mark.skip # Segfault def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text): tokens = en_tokenizer(text) doc = get_doc( @@ -48,7 +46,6 @@ def test_parser_parse_one_word_sentence(en_tokenizer, en_parser, text): assert doc[0].dep != 0 -@pytest.mark.skip # Segfault def test_parser_initial(en_tokenizer, en_parser): text = "I ate the pizza with anchovies." # heads = [1, 0, 1, -2, -3, -1, -5] @@ -61,7 +58,6 @@ def test_parser_initial(en_tokenizer, en_parser): assert tokens[3].head.i == 3 -@pytest.mark.skip # Segfault def test_parser_parse_subtrees(en_tokenizer, en_parser): text = "The four wheels on the bus turned quickly" heads = [2, 1, 4, -1, 1, -2, 0, -1] @@ -76,7 +72,6 @@ def test_parser_parse_subtrees(en_tokenizer, en_parser): assert len(list(doc[2].subtree)) == 6 -@pytest.mark.skip # Segfault def test_parser_merge_pp(en_tokenizer): text = "A phrase with another phrase occurs" heads = [1, 4, -1, 1, -2, 0] @@ -95,7 +90,6 @@ def test_parser_merge_pp(en_tokenizer): assert doc[3].text == "occurs" -@pytest.mark.skip # Segfault def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser): text = "a b c d e" @@ -170,7 +164,6 @@ def test_parser_arc_eager_finalize_state(en_tokenizer, en_parser): assert tokens[4].head.i == 4 -@pytest.mark.skip # Segfault def test_parser_set_sent_starts(en_vocab): # fmt: off words = ['Ein', 'Satz', '.', 'Außerdem', 'ist', 'Zimmer', 'davon', 'überzeugt', ',', 'dass', 'auch', 'epige-', '\n', 'netische', 'Mechanismen', 'eine', 'Rolle', 'spielen', ',', 'also', 'Vorgänge', ',', 'die', '\n', 'sich', 'darauf', 'auswirken', ',', 'welche', 'Gene', 'abgelesen', 'werden', 'und', '\n', 'welche', 'nicht', '.', '\n'] @@ -187,7 +180,6 @@ def test_parser_set_sent_starts(en_vocab): for token in sent: assert token.head in sent -@pytest.mark.skip def test_overfitting_IO(): # Simple test to try and quickly overfit the dependency parser - ensuring the ML models work correctly nlp = English() diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py index 9a2e1cfe8..5a29d84f4 100644 --- a/spacy/tests/parser/test_preset_sbd.py +++ b/spacy/tests/parser/test_preset_sbd.py @@ -33,14 +33,12 @@ def parser(vocab): return parser -@pytest.mark.skip # Segfaults def test_no_sentences(parser): doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = parser(doc) assert len(list(doc.sents)) >= 1 -@pytest.mark.skip # Segfaults def test_sents_1(parser): doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc[2].sent_start = True @@ -54,7 +52,6 @@ def test_sents_1(parser): assert len(list(doc.sents)) == 2 -@pytest.mark.skip # Segfaults def test_sents_1_2(parser): doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc[1].sent_start = True @@ -63,7 +60,6 @@ def test_sents_1_2(parser): assert len(list(doc.sents)) >= 3 -@pytest.mark.skip # Segfaults def test_sents_1_3(parser): doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc[1].sent_start = True