mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Get basic beam tests working * Get basic beam tests working * Compile _beam_utils * Remove prints * Test beam density * Beam parser seems to train * Draft beam NER * Upd beam * Add hypothesis as dev dependency * Implement missing is-gold-parse method * Implement early update * Fix state hashing * Fix test * Fix test * Default to non-beam in parser constructor * Improve oracle for beam * Start refactoring beam * Update test * Refactor beam * Update nn * Refactor beam and weight by cost * Update ner beam settings * Update test * Add __init__.pxd * Upd test * Fix test * Upd test * Fix test * Remove ring buffer history from StateC * WIP change arc-eager transitions * Add state tests * Support ternary sent start values * Fix arc eager * Fix NER * Pass oracle cut size for beam * Fix ner test * Fix beam * Improve StateC.clone * Improve StateClass.borrow * Work directly with StateC, not StateClass * Remove print statements * Fix state copy * Improve state class * Refactor parser oracles * Fix arc eager oracle * Fix arc eager oracle * Use a vector to implement the stack * Refactor state data structure * Fix alignment of sent start * Add get_aligned_sent_starts method * Add test for ae oracle when bad sentence starts * Fix sentence segment handling * Avoid Reduce that inserts illegal sentence * Update preset SBD test * Fix test * Remove prints * Fix sent starts in Example * Improve python API of StateClass * Tweak comments and debug output of arc eager * Upd test * Fix state test * Fix state test
		
			
				
	
	
		
			75 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			75 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| 
 | |
| from spacy.tokens.doc import Doc
 | |
| from spacy.vocab import Vocab
 | |
| from spacy.pipeline._parser_internals.stateclass import StateClass
 | |
| 
 | |
| @pytest.fixture
 | |
| def vocab():
 | |
|     return Vocab()
 | |
| 
 | |
| @pytest.fixture
 | |
| def doc(vocab):
 | |
|     return Doc(vocab, words=["a", "b", "c", "d"])
 | |
| 
 | |
| def test_init_state(doc):
 | |
|     state = StateClass(doc)
 | |
|     assert state.stack == []
 | |
|     assert state.queue == list(range(len(doc)))
 | |
|     assert not state.is_final()
 | |
|     assert state.buffer_length() == 4
 | |
| 
 | |
| def test_push_pop(doc):
 | |
|     state = StateClass(doc)
 | |
|     state.push()
 | |
|     assert state.buffer_length() == 3
 | |
|     assert state.stack == [0]
 | |
|     assert 0 not in state.queue
 | |
|     state.push()
 | |
|     assert state.stack == [1, 0]
 | |
|     assert 1 not in state.queue
 | |
|     assert state.buffer_length() == 2
 | |
|     state.pop()
 | |
|     assert state.stack == [0]
 | |
|     assert 1 not in state.queue
 | |
| 
 | |
| def test_stack_depth(doc):
 | |
|     state = StateClass(doc)
 | |
|     assert state.stack_depth() == 0
 | |
|     assert state.buffer_length() == len(doc)
 | |
|     state.push()
 | |
|     assert state.buffer_length() == 3
 | |
|     assert state.stack_depth() == 1
 | |
| 
 | |
| 
 | |
| def test_H(doc):
 | |
|     state = StateClass(doc)
 | |
|     assert state.H(0) == -1
 | |
|     state.add_arc(1, 0, 0)
 | |
|     assert state.arcs == [{"head": 1, "child": 0, "label": 0}]
 | |
|     assert state.H(0) == 1
 | |
|     state.add_arc(3, 1, 0)
 | |
|     assert state.H(1) == 3
 | |
| 
 | |
| 
 | |
| def test_L(doc):
 | |
|     state = StateClass(doc)
 | |
|     assert state.L(2, 1) == -1
 | |
|     state.add_arc(2, 1, 0)
 | |
|     assert state.arcs == [{"head": 2, "child": 1, "label": 0}]
 | |
|     assert state.L(2, 1) == 1
 | |
|     state.add_arc(2, 0, 0)
 | |
|     assert state.L(2, 1) == 0
 | |
|     assert state.n_L(2) == 2
 | |
| 
 | |
| 
 | |
| def test_R(doc):
 | |
|     state = StateClass(doc)
 | |
|     assert state.R(0, 1) == -1
 | |
|     state.add_arc(0, 1, 0)
 | |
|     assert state.arcs == [{"head": 0, "child": 1, "label": 0}]
 | |
|     assert state.R(0, 1) == 1
 | |
|     state.add_arc(0, 2, 0)
 | |
|     assert state.R(0, 1) == 2
 | |
|     assert state.n_R(0) == 2
 |