Add tests for Doc creation

This commit is contained in:
Matthew Honnibal 2017-10-11 03:21:23 +02:00
parent d70fba6807
commit 2c118ab3a6

View File

@ -0,0 +1,37 @@
'''Test Doc sets up tokens correctly.'''
from __future__ import unicode_literals
import pytest
from ...vocab import Vocab
from ...tokens.doc import Doc
from ...lemmatizerlookup import Lemmatizer
@pytest.fixture
def lemmatizer():
return Lemmatizer({'dogs': 'dog', 'boxen': 'box', 'mice': 'mouse'})
@pytest.fixture
def vocab(lemmatizer):
return Vocab(lemmatizer=lemmatizer)
def test_empty_doc(vocab):
doc = Doc(vocab)
assert len(doc) == 0
def test_single_word(vocab):
doc = Doc(vocab, words=['a'])
assert doc.text == 'a '
doc = Doc(vocab, words=['a'], spaces=[False])
assert doc.text == 'a'
def test_lookup_lemmatization(vocab):
doc = Doc(vocab, words=['dogs', 'dogses'])
assert doc[0].text == 'dogs'
assert doc[0].lemma_ == 'dog'
assert doc[1].text == 'dogses'
assert doc[1].lemma_ == 'dogses'