spaCy/tests/regression/test_issue4104.py

16 lines
504 B
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
from ..util import get_doc
2019-08-18 16:09:16 +03:00
def test_issue4104(en_vocab):
"""Test that English lookup lemmatization of spun & dry are correct
expected mapping = {'dry': 'dry', 'spun': 'spin', 'spun-dry': 'spin-dry'}
2019-08-18 16:09:16 +03:00
"""
text = "dry spun spun-dry"
doc = get_doc(en_vocab, [t for t in text.split(" ")])
# using a simple list to preserve order
2019-08-18 16:09:16 +03:00
expected = ["dry", "spin", "spin-dry"]
assert [token.lemma_ for token in doc] == expected