mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-10 17:26:42 +03:00
2f3648700c
* pytest file for issue4104 established * edited default lookup english lemmatizer for spun; fixes issue 4102 * eliminated parameterization and sorted dictionary dependnency in issue 4104 test * added contributor agreement
15 lines
500 B
Python
15 lines
500 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from ..util import get_doc
|
|
|
|
def test_issue4104(en_vocab):
|
|
"""Test that English lookup lemmatization of spun & dry are correct
|
|
expected mapping = {'dry': 'dry', 'spun': 'spin', 'spun-dry': 'spin-dry'}
|
|
"""
|
|
text = 'dry spun spun-dry'
|
|
doc = get_doc(en_vocab, [t for t in text.split(" ")])
|
|
# using a simple list to preserve order
|
|
expected = ['dry', 'spin', 'spin-dry']
|
|
assert [token.lemma_ for token in doc] == expected
|