Add tests for some rule-based lemmatizers

This commit is contained in:
Adriane Boyd 2023-02-16 10:31:54 +01:00
parent e4af62c89f
commit 0bf99827b1

View File

@ -1,8 +1,10 @@
import pytest import pytest
import pickle import pickle
import spacy
from spacy import util, registry from spacy import util, registry
from spacy.about import __lookups_url__
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lookups import Lookups from spacy.lookups import Lookups, load_lookups_data_from_url
from ..util import make_tempdir from ..util import make_tempdir
@ -111,3 +113,15 @@ def test_lemmatizer_serialize(nlp):
# Make sure that lemmatizer cache can be pickled # Make sure that lemmatizer cache can be pickled
pickle.dumps(lemmatizer2) pickle.dumps(lemmatizer2)
@pytest.mark.parametrize("lang", ("ca", "en"))
def test_lemmatizer_load_lookups_from_url(lang):
nlp = spacy.blank(lang)
lemmatizer = nlp.add_pipe("lemmatizer")
req_tables, opt_tables = lemmatizer.get_lookups_config(mode=lemmatizer.mode)
lookups = load_lookups_data_from_url(
nlp.lang, req_tables + opt_tables, __lookups_url__
)
lemmatizer.initialize(lookups=lookups)
assert set(lemmatizer.lookups.tables) == set(req_tables + opt_tables)