spaCy/tests/test_lemmatizer.py

35 lines
871 B
Python
Raw Normal View History

2014-12-21 13:02:44 +03:00
from spacy.en.lemmatizer import Lemmatizer, read_index, read_exc
2014-12-07 17:39:13 +03:00
from spacy.util import DATA_DIR
from os import path
import pytest
def test_read_index():
wn = path.join(DATA_DIR, 'wordnet')
index = read_index(path.join(wn, 'index.noun'))
assert 'man' in index
assert 'plantes' not in index
assert 'plant' in index
def test_read_exc():
wn = path.join(DATA_DIR, 'wordnet')
exc = read_exc(path.join(wn, 'verb.exc'))
assert exc['was'] == ('be',)
@pytest.fixture
def lemmatizer():
2014-12-23 05:18:59 +03:00
return Lemmatizer(path.join(DATA_DIR, 'wordnet'), 0, 0, 0)
2014-12-07 17:39:13 +03:00
def test_noun_lemmas(lemmatizer):
do = lemmatizer.noun
assert do('aardwolves') == set(['aardwolf'])
assert do('aardwolf') == set(['aardwolf'])
assert do('planets') == set(['planet'])
assert do('ring') == set(['ring'])
assert do('axes') == set(['axis', 'axe', 'ax'])