spaCy/spacy/tests/parser/test_state.py
Matthew Honnibal 8656a08777
Add beam_parser and beam_ner components for v3 (#6369)
* Get basic beam tests working

* Get basic beam tests working

* Compile _beam_utils

* Remove prints

* Test beam density

* Beam parser seems to train

* Draft beam NER

* Upd beam

* Add hypothesis as dev dependency

* Implement missing is-gold-parse method

* Implement early update

* Fix state hashing

* Fix test

* Fix test

* Default to non-beam in parser constructor

* Improve oracle for beam

* Start refactoring beam

* Update test

* Refactor beam

* Update nn

* Refactor beam and weight by cost

* Update ner beam settings

* Update test

* Add __init__.pxd

* Upd test

* Fix test

* Upd test

* Fix test

* Remove ring buffer history from StateC

* WIP change arc-eager transitions

* Add state tests

* Support ternary sent start values

* Fix arc eager

* Fix NER

* Pass oracle cut size for beam

* Fix ner test

* Fix beam

* Improve StateC.clone

* Improve StateClass.borrow

* Work directly with StateC, not StateClass

* Remove print statements

* Fix state copy

* Improve state class

* Refactor parser oracles

* Fix arc eager oracle

* Fix arc eager oracle

* Use a vector to implement the stack

* Refactor state data structure

* Fix alignment of sent start

* Add get_aligned_sent_starts method

* Add test for ae oracle when bad sentence starts

* Fix sentence segment handling

* Avoid Reduce that inserts illegal sentence

* Update preset SBD test

* Fix test

* Remove prints

* Fix sent starts in Example

* Improve python API of StateClass

* Tweak comments and debug output of arc eager

* Upd test

* Fix state test

* Fix state test
2020-12-13 09:08:32 +08:00

75 lines
1.8 KiB
Python

import pytest
from spacy.tokens.doc import Doc
from spacy.vocab import Vocab
from spacy.pipeline._parser_internals.stateclass import StateClass
@pytest.fixture
def vocab():
return Vocab()
@pytest.fixture
def doc(vocab):
return Doc(vocab, words=["a", "b", "c", "d"])
def test_init_state(doc):
state = StateClass(doc)
assert state.stack == []
assert state.queue == list(range(len(doc)))
assert not state.is_final()
assert state.buffer_length() == 4
def test_push_pop(doc):
state = StateClass(doc)
state.push()
assert state.buffer_length() == 3
assert state.stack == [0]
assert 0 not in state.queue
state.push()
assert state.stack == [1, 0]
assert 1 not in state.queue
assert state.buffer_length() == 2
state.pop()
assert state.stack == [0]
assert 1 not in state.queue
def test_stack_depth(doc):
state = StateClass(doc)
assert state.stack_depth() == 0
assert state.buffer_length() == len(doc)
state.push()
assert state.buffer_length() == 3
assert state.stack_depth() == 1
def test_H(doc):
state = StateClass(doc)
assert state.H(0) == -1
state.add_arc(1, 0, 0)
assert state.arcs == [{"head": 1, "child": 0, "label": 0}]
assert state.H(0) == 1
state.add_arc(3, 1, 0)
assert state.H(1) == 3
def test_L(doc):
state = StateClass(doc)
assert state.L(2, 1) == -1
state.add_arc(2, 1, 0)
assert state.arcs == [{"head": 2, "child": 1, "label": 0}]
assert state.L(2, 1) == 1
state.add_arc(2, 0, 0)
assert state.L(2, 1) == 0
assert state.n_L(2) == 2
def test_R(doc):
state = StateClass(doc)
assert state.R(0, 1) == -1
state.add_arc(0, 1, 0)
assert state.arcs == [{"head": 0, "child": 1, "label": 0}]
assert state.R(0, 1) == 1
state.add_arc(0, 2, 0)
assert state.R(0, 1) == 2
assert state.n_R(0) == 2