Add state tests, esp. for split function

2025-10-02 18:06:46 +03:00 · 2018-03-30 13:25:46 +02:00 · 2018-03-30 13:25:46 +02:00 · e0375132bd
commit e0375132bd
parent e826b85cf0
1 changed files with 68 additions and 0 deletions
--- a/spacy/tests/parser/test_split_word.py
+++ b/spacy/tests/parser/test_split_word.py
@ -0,0 +1,68 @@
 import pytest
 from ...tokens.doc import Doc
 from ...vocab import Vocab
 from ...syntax.stateclass import StateClass
 def get_doc(words, vocab=None):
    if vocab is None:
        vocab = Vocab()
    return Doc(vocab, words=list(words))
 def test_push():
    '''state.push_stack() should take the first word in the queue (aka buffer)
    and put it on the stack, popping that word from the queue.'''
    doc = get_doc('abcd')
    state = StateClass(doc)
    assert state.get_B(0) == 0
    state.push_stack()
    assert state.get_B(0) == 1
 def test_pop():
    '''state.pop_stack() should remove the top word from the stack.'''
    doc = get_doc('abcd')
    state = StateClass(doc)
    assert state.get_B(0) == 0
    state.push_stack()
    state.push_stack()
    assert state.get_S(0) == 1
    assert state.get_S(1) == 0
    state.pop_stack()
    assert state.get_S(0) == 0
 def toy_split():
    def _realloc(data, new_size):
        additions = new_size - len(data)
        return data + ['']*additions
    length = 10
    sent = list(range(length))
    sent = [None]*pad + sent + [None]*pad # pad
    ptr = pad
    i = 5
    n = 2
    ptr -= pad
    i += pad
    sent = _realloc(sent, length+n+(pad*2))
    n_moved = (length + (pad*2)) - i+1
 def test_split():
    '''state.split_token should take the ith word of the buffer, and split it
    into n+1 pieces. n is 0-indexed, i.e. split(i, 0) is a noop, and split(i, 1)
    creates 1 new token.'''
    doc = get_doc('abcd')
    state = StateClass(doc)
    assert len(state) == len(doc)
    state.split_token(1, 2)
    assert len(state) == len(doc)+2
    stdoc = state.get_doc(doc.vocab)
    assert stdoc[0].text == 'a'
    assert stdoc[1].text == 'b'
    assert stdoc[2].text == 'b'
    assert stdoc[3].text == 'b'
    assert stdoc[4].text == 'c'
    assert stdoc[5].text == 'd'