diff --git a/spacy/tests/lang/en/test_lemmatizer.py b/spacy/tests/lang/en/test_lemmatizer.py index d02ae1700..00f02ccb4 100644 --- a/spacy/tests/lang/en/test_lemmatizer.py +++ b/spacy/tests/lang/en/test_lemmatizer.py @@ -2,12 +2,18 @@ from __future__ import unicode_literals import pytest +from ....tokens.doc import Doc @pytest.fixture def en_lemmatizer(EN): return EN.Defaults.create_lemmatizer() +@pytest.mark.models('en') +def test_doc_lemmatization(EN): + doc = Doc(EN.vocab, words=['bleed']) + doc[0].tag_ = 'VBP' + assert doc[0].lemma_ == 'bleed' @pytest.mark.models('en') @pytest.mark.parametrize('text,lemmas', [("aardwolves", ["aardwolf"]), @@ -19,6 +25,16 @@ def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas): assert en_lemmatizer.noun(text) == set(lemmas) +@pytest.mark.models('en') +@pytest.mark.parametrize('text,lemmas', [("bleed", ["bleed"]), + ("feed", ["feed"]), + ("need", ["need"]), + ("ring", ["ring"]), + ("axes", ["axis", "axe", "ax"])]) +def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas): + assert en_lemmatizer.noun(text) == set(lemmas) + + @pytest.mark.xfail @pytest.mark.models('en') def test_en_lemmatizer_base_forms(en_lemmatizer):