spaCy/spacy/tests/regression/test_issue4104.py
AJ Rader 2f3648700c Correction of default lemmatizer lookup in English (Issue # 4104) (#4110)
* pytest file for issue4104 established

* edited default lookup english lemmatizer for spun; fixes issue 4102

* eliminated parameterization and sorted dictionary dependnency in issue 4104 test

* added contributor agreement
2019-08-15 11:39:10 +02:00

15 lines
500 B
Python

# coding: utf8
from __future__ import unicode_literals
from ..util import get_doc
def test_issue4104(en_vocab):
"""Test that English lookup lemmatization of spun & dry are correct
expected mapping = {'dry': 'dry', 'spun': 'spin', 'spun-dry': 'spin-dry'}
"""
text = 'dry spun spun-dry'
doc = get_doc(en_vocab, [t for t in text.split(" ")])
# using a simple list to preserve order
expected = ['dry', 'spin', 'spin-dry']
assert [token.lemma_ for token in doc] == expected