Improve lemmatization tests, re #1296

2025-12-10 19:54:17 +03:00 · 2017-09-04 15:17:44 +02:00 · 2017-09-04 15:17:44 +02:00 · 644d6c9e1a
commit 644d6c9e1a
parent c0eaba8b28
1 changed files with 16 additions and 0 deletions
--- a/spacy/tests/lang/en/test_lemmatizer.py
+++ b/spacy/tests/lang/en/test_lemmatizer.py
@ -2,12 +2,18 @@
 from __future__ import unicode_literals
 import pytest
 from ....tokens.doc import Doc
@pytest.fixture
 def en_lemmatizer(EN):
    return EN.Defaults.create_lemmatizer()
@pytest.mark.models('en')
 def test_doc_lemmatization(EN):
    doc = Doc(EN.vocab, words=['bleed'])
    doc[0].tag_ = 'VBP'
    assert doc[0].lemma_ == 'bleed'
@pytest.mark.models('en')
@pytest.mark.parametrize('text,lemmas', [("aardwolves", ["aardwolf"]),
@ -19,6 +25,16 @@ def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas):
    assert en_lemmatizer.noun(text) == set(lemmas)
@pytest.mark.models('en')
@pytest.mark.parametrize('text,lemmas', [("bleed", ["bleed"]),
                                         ("feed", ["feed"]),
                                         ("need", ["need"]),
                                         ("ring", ["ring"]),
                                         ("axes", ["axis", "axe", "ax"])])
 def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas):
    assert en_lemmatizer.noun(text) == set(lemmas)
@pytest.mark.xfail
@pytest.mark.models('en')
 def test_en_lemmatizer_base_forms(en_lemmatizer):