diff --git a/tests/test_unicode_lemmas.py b/tests/test_unicode_lemmas.py new file mode 100644 index 000000000..42e5866d6 --- /dev/null +++ b/tests/test_unicode_lemmas.py @@ -0,0 +1,15 @@ +# encoding=utf8 +from __future__ import unicode_literals + +from spacy.en import English +import pytest + + +@pytest.fixture +def tokens(): + return English()(u'ćode codé') + + +def test_unicode(tokens): + assert tokens[0].lemma_ == u'ćode' + assert tokens[1].lemma_ == u'codé'