From ec25976416131c50f235d2e9ba63c8180252281a Mon Sep 17 00:00:00 2001 From: Someon Date: Wed, 11 Mar 2015 01:23:06 +0200 Subject: [PATCH] Test lemma_ with unicode input --- tests/test_unicode_lemmas.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/test_unicode_lemmas.py diff --git a/tests/test_unicode_lemmas.py b/tests/test_unicode_lemmas.py new file mode 100644 index 000000000..42e5866d6 --- /dev/null +++ b/tests/test_unicode_lemmas.py @@ -0,0 +1,15 @@ +# encoding=utf8 +from __future__ import unicode_literals + +from spacy.en import English +import pytest + + +@pytest.fixture +def tokens(): + return English()(u'ćode codé') + + +def test_unicode(tokens): + assert tokens[0].lemma_ == u'ćode' + assert tokens[1].lemma_ == u'codé'