Use morph hash in lemmatizer cache key (#7690)

Use the morph hash rather than the `MorphAnalysis` object in the cache
key so that the `Lemmatizer` can be pickled.
This commit is contained in:
Adriane Boyd 2021-04-08 13:22:38 +02:00 committed by GitHub
parent 3e5bd5055e
commit 8008e2f75b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 2 deletions

View File

@ -175,7 +175,7 @@ class Lemmatizer(Pipe):
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
""" """
cache_key = (token.orth, token.pos, token.morph) cache_key = (token.orth, token.pos, token.morph.key)
if cache_key in self.cache: if cache_key in self.cache:
return self.cache[cache_key] return self.cache[cache_key]
string = token.text string = token.text

View File

@ -1,6 +1,7 @@
import pytest import pytest
import logging import logging
import mock import mock
import pickle
from spacy import util, registry from spacy import util, registry
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lookups import Lookups from spacy.lookups import Lookups
@ -106,6 +107,9 @@ def test_lemmatizer_serialize(nlp):
doc2 = nlp2.make_doc("coping") doc2 = nlp2.make_doc("coping")
doc2[0].pos_ = "VERB" doc2[0].pos_ = "VERB"
assert doc2[0].lemma_ == "" assert doc2[0].lemma_ == ""
doc2 = lemmatizer(doc2) doc2 = lemmatizer2(doc2)
assert doc2[0].text == "coping" assert doc2[0].text == "coping"
assert doc2[0].lemma_ == "cope" assert doc2[0].lemma_ == "cope"
# Make sure that lemmatizer cache can be pickled
b = pickle.dumps(lemmatizer2)