spaCy/tests/test_lemmatizer.py

from __future__ import unicode_literals

from spacy.en.lemmatizer import Lemmatizer, read_index, read_exc
from spacy.en import LOCAL_DATA_DIR
from os import path

import pytest


def test_read_index():
    wn = path.join(LOCAL_DATA_DIR, 'wordnet')
    index = read_index(path.join(wn, 'index.noun'))
    assert 'man' in index
    assert 'plantes' not in index
    assert 'plant' in index


def test_read_exc():
    wn = path.join(LOCAL_DATA_DIR, 'wordnet')
    exc = read_exc(path.join(wn, 'verb.exc'))
    assert exc['was'] == ('be',)


@pytest.fixture
def lemmatizer():
    return Lemmatizer(path.join(LOCAL_DATA_DIR, 'wordnet'), 0, 0, 0)


def test_noun_lemmas(lemmatizer):
    do = lemmatizer.noun

    assert do('aardwolves') == set(['aardwolf'])
    assert do('aardwolf') == set(['aardwolf'])
    assert do('planets') == set(['planet'])
    assert do('ring') == set(['ring'])
    assert do('axes') == set(['axis', 'axe', 'ax'])
* Fix encoding in lemmatizer tests 2015-01-05 03:53:30 +03:00			`from __future__ import unicode_literals`

Upd lemmatizer test 2014-12-21 13:02:44 +03:00			`from spacy.en.lemmatizer import Lemmatizer, read_index, read_exc`
* Tests passing after refactor. API has obvious warts, particularly in Token and Lexeme 2015-01-14 16:33:16 +03:00			`from spacy.en import LOCAL_DATA_DIR`
* Add WordNet lemmatizer 2014-12-07 17:39:13 +03:00			`from os import path`

			`import pytest`


			`def test_read_index():`
* Tests passing after refactor. API has obvious warts, particularly in Token and Lexeme 2015-01-14 16:33:16 +03:00			`wn = path.join(LOCAL_DATA_DIR, 'wordnet')`
* Add WordNet lemmatizer 2014-12-07 17:39:13 +03:00			`index = read_index(path.join(wn, 'index.noun'))`
			`assert 'man' in index`
			`assert 'plantes' not in index`
			`assert 'plant' in index`


			`def test_read_exc():`
* Tests passing after refactor. API has obvious warts, particularly in Token and Lexeme 2015-01-14 16:33:16 +03:00			`wn = path.join(LOCAL_DATA_DIR, 'wordnet')`
* Add WordNet lemmatizer 2014-12-07 17:39:13 +03:00			`exc = read_exc(path.join(wn, 'verb.exc'))`
			`assert exc['was'] == ('be',)`


			`@pytest.fixture`
			`def lemmatizer():`
* Tests passing after refactor. API has obvious warts, particularly in Token and Lexeme 2015-01-14 16:33:16 +03:00			`return Lemmatizer(path.join(LOCAL_DATA_DIR, 'wordnet'), 0, 0, 0)`
* Add WordNet lemmatizer 2014-12-07 17:39:13 +03:00

			`def test_noun_lemmas(lemmatizer):`
			`do = lemmatizer.noun`

			`assert do('aardwolves') == set(['aardwolf'])`
			`assert do('aardwolf') == set(['aardwolf'])`
			`assert do('planets') == set(['planet'])`
			`assert do('ring') == set(['ring'])`
			`assert do('axes') == set(['axis', 'axe', 'ax'])`