spaCy/spacy/tests/regression/test_issue595.py

44 lines
899 B
Python

from __future__ import unicode_literals
import pytest
from ...symbols import POS, VERB, VerbForm_inf
from ...tokens import Doc
from ...vocab import Vocab
from ...lemmatizer import Lemmatizer
@pytest.fixture
def index():
return {'verb': {}}
@pytest.fixture
def exceptions():
return {'verb': {}}
@pytest.fixture
def rules():
return {"verb": [["ed", "e"]]}
@pytest.fixture
def lemmatizer(index, exceptions, rules):
return Lemmatizer(index, exceptions, rules)
@pytest.fixture
def tag_map():
return {'VB': {POS: VERB, 'morph': VerbForm_inf}}
@pytest.fixture
def vocab(lemmatizer, tag_map):
return Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
def test_not_lemmatize_base_forms(vocab, lemmatizer):
doc = Doc(vocab, words=["Do", "n't", "feed", "the", "dog"])
feed = doc[2]
feed.tag_ = u'VB'
assert feed.text == u'feed'
assert feed.lemma_ == u'feed'