mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-10 15:14:56 +03:00
Add state tests, esp. for split function
This commit is contained in:
parent
e826b85cf0
commit
e0375132bd
68
spacy/tests/parser/test_split_word.py
Normal file
68
spacy/tests/parser/test_split_word.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
import pytest
|
||||
|
||||
from ...tokens.doc import Doc
|
||||
from ...vocab import Vocab
|
||||
from ...syntax.stateclass import StateClass
|
||||
|
||||
|
||||
def get_doc(words, vocab=None):
|
||||
if vocab is None:
|
||||
vocab = Vocab()
|
||||
return Doc(vocab, words=list(words))
|
||||
|
||||
def test_push():
|
||||
'''state.push_stack() should take the first word in the queue (aka buffer)
|
||||
and put it on the stack, popping that word from the queue.'''
|
||||
doc = get_doc('abcd')
|
||||
state = StateClass(doc)
|
||||
assert state.get_B(0) == 0
|
||||
state.push_stack()
|
||||
assert state.get_B(0) == 1
|
||||
|
||||
def test_pop():
|
||||
'''state.pop_stack() should remove the top word from the stack.'''
|
||||
doc = get_doc('abcd')
|
||||
state = StateClass(doc)
|
||||
assert state.get_B(0) == 0
|
||||
state.push_stack()
|
||||
state.push_stack()
|
||||
assert state.get_S(0) == 1
|
||||
assert state.get_S(1) == 0
|
||||
state.pop_stack()
|
||||
assert state.get_S(0) == 0
|
||||
|
||||
|
||||
def toy_split():
|
||||
def _realloc(data, new_size):
|
||||
additions = new_size - len(data)
|
||||
return data + ['']*additions
|
||||
length = 10
|
||||
sent = list(range(length))
|
||||
sent = [None]*pad + sent + [None]*pad # pad
|
||||
ptr = pad
|
||||
i = 5
|
||||
n = 2
|
||||
|
||||
ptr -= pad
|
||||
i += pad
|
||||
sent = _realloc(sent, length+n+(pad*2))
|
||||
n_moved = (length + (pad*2)) - i+1
|
||||
|
||||
|
||||
|
||||
def test_split():
|
||||
'''state.split_token should take the ith word of the buffer, and split it
|
||||
into n+1 pieces. n is 0-indexed, i.e. split(i, 0) is a noop, and split(i, 1)
|
||||
creates 1 new token.'''
|
||||
doc = get_doc('abcd')
|
||||
state = StateClass(doc)
|
||||
assert len(state) == len(doc)
|
||||
state.split_token(1, 2)
|
||||
assert len(state) == len(doc)+2
|
||||
stdoc = state.get_doc(doc.vocab)
|
||||
assert stdoc[0].text == 'a'
|
||||
assert stdoc[1].text == 'b'
|
||||
assert stdoc[2].text == 'b'
|
||||
assert stdoc[3].text == 'b'
|
||||
assert stdoc[4].text == 'c'
|
||||
assert stdoc[5].text == 'd'
|
Loading…
Reference in New Issue
Block a user