diff --git a/spacy/tests/doc/test_creation.py b/spacy/tests/doc/test_creation.py new file mode 100644 index 000000000..edadbf086 --- /dev/null +++ b/spacy/tests/doc/test_creation.py @@ -0,0 +1,37 @@ +'''Test Doc sets up tokens correctly.''' +from __future__ import unicode_literals +import pytest + +from ...vocab import Vocab +from ...tokens.doc import Doc +from ...lemmatizerlookup import Lemmatizer + + +@pytest.fixture +def lemmatizer(): + return Lemmatizer({'dogs': 'dog', 'boxen': 'box', 'mice': 'mouse'}) + + +@pytest.fixture +def vocab(lemmatizer): + return Vocab(lemmatizer=lemmatizer) + + +def test_empty_doc(vocab): + doc = Doc(vocab) + assert len(doc) == 0 + + +def test_single_word(vocab): + doc = Doc(vocab, words=['a']) + assert doc.text == 'a ' + doc = Doc(vocab, words=['a'], spaces=[False]) + assert doc.text == 'a' + + +def test_lookup_lemmatization(vocab): + doc = Doc(vocab, words=['dogs', 'dogses']) + assert doc[0].text == 'dogs' + assert doc[0].lemma_ == 'dog' + assert doc[1].text == 'dogses' + assert doc[1].lemma_ == 'dogses'