spaCy/spacy/tests/morphology/test_morph_features.py
Ines Montani cf65a80f36 Refactor lemmatizer and data table integration (#4353)
* Move test

* Allow default in Lookups.get_table

* Start with blank tables in Lookups.from_bytes

* Refactor lemmatizer to hold instance of Lookups

* Get lookups table within the lemmatization methods to make sure it references the correct table (even if the table was replaced or modified, e.g. when loading a model from disk)
* Deprecate other arguments on Lemmatizer.__init__ and expect Lookups for consistency
* Remove old and unsupported Lemmatizer.load classmethod
* Refactor language-specific lemmatizers to inherit as much as possible from base class and override only what they need

* Update tests and docs

* Fix more tests

* Fix lemmatizer

* Upgrade pytest to try and fix weird CI errors

* Try pytest 4.6.5
2019-10-01 21:36:03 +02:00

51 lines
1.4 KiB
Python

# coding: utf-8
from __future__ import unicode_literals
import pytest
from spacy.morphology import Morphology
from spacy.strings import StringStore, get_string_id
from spacy.lemmatizer import Lemmatizer
from spacy.lookups import Lookups
@pytest.fixture
def morphology():
lemmatizer = Lemmatizer(Lookups())
return Morphology(StringStore(), {}, lemmatizer)
def test_init(morphology):
pass
def test_add_morphology_with_string_names(morphology):
morphology.add({"Case_gen", "Number_sing"})
def test_add_morphology_with_int_ids(morphology):
morphology.add({get_string_id("Case_gen"), get_string_id("Number_sing")})
def test_add_morphology_with_mix_strings_and_ints(morphology):
morphology.add({get_string_id("PunctSide_ini"), "VerbType_aux"})
def test_morphology_tags_hash_distinctly(morphology):
tag1 = morphology.add({"PunctSide_ini", "VerbType_aux"})
tag2 = morphology.add({"Case_gen", "Number_sing"})
assert tag1 != tag2
def test_morphology_tags_hash_independent_of_order(morphology):
tag1 = morphology.add({"Case_gen", "Number_sing"})
tag2 = morphology.add({"Number_sing", "Case_gen"})
assert tag1 == tag2
def test_update_morphology_tag(morphology):
tag1 = morphology.add({"Case_gen"})
tag2 = morphology.update(tag1, {"Number_sing"})
assert tag1 != tag2
tag3 = morphology.add({"Number_sing", "Case_gen"})
assert tag2 == tag3