spaCy/spacy/tests/doc/test_creation.py
Ines Montani db55577c45
Drop Python 2.7 and 3.5 (#4828)
* Remove unicode declarations

* Remove Python 3.5 and 2.7 from CI

* Don't require pathlib

* Replace compat helpers

* Remove OrderedDict

* Use f-strings

* Set Cython compiler language level

* Fix typo

* Re-add OrderedDict for Table

* Update setup.cfg

* Revert CONTRIBUTING.md

* Revert lookups.md

* Revert top-level.md

* Small adjustments and docs [ci skip]
2019-12-22 01:53:56 +01:00

38 lines
878 B
Python

import pytest
from spacy.vocab import Vocab
from spacy.tokens import Doc
from spacy.lemmatizer import Lemmatizer
from spacy.lookups import Lookups
@pytest.fixture
def lemmatizer():
lookups = Lookups()
lookups.add_table("lemma_lookup", {"dogs": "dog", "boxen": "box", "mice": "mouse"})
return Lemmatizer(lookups)
@pytest.fixture
def vocab(lemmatizer):
return Vocab(lemmatizer=lemmatizer)
def test_empty_doc(vocab):
doc = Doc(vocab)
assert len(doc) == 0
def test_single_word(vocab):
doc = Doc(vocab, words=["a"])
assert doc.text == "a "
doc = Doc(vocab, words=["a"], spaces=[False])
assert doc.text == "a"
def test_lookup_lemmatization(vocab):
doc = Doc(vocab, words=["dogs", "dogses"])
assert doc[0].text == "dogs"
assert doc[0].lemma_ == "dog"
assert doc[1].text == "dogses"
assert doc[1].lemma_ == "dogses"