Use morph hash in lemmatizer cache key (#7690)

Use the morph hash rather than the `MorphAnalysis` object in the cache
key so that the `Lemmatizer` can be pickled.
This commit is contained in:
Adriane Boyd 2021-04-08 13:22:38 +02:00 committed by GitHub
parent 3e5bd5055e
commit 8008e2f75b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 2 deletions

View File

@ -175,7 +175,7 @@ class Lemmatizer(Pipe):
DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
"""
cache_key = (token.orth, token.pos, token.morph)
cache_key = (token.orth, token.pos, token.morph.key)
if cache_key in self.cache:
return self.cache[cache_key]
string = token.text

View File

@ -1,6 +1,7 @@
import pytest
import logging
import mock
import pickle
from spacy import util, registry
from spacy.lang.en import English
from spacy.lookups import Lookups
@ -106,6 +107,9 @@ def test_lemmatizer_serialize(nlp):
doc2 = nlp2.make_doc("coping")
doc2[0].pos_ = "VERB"
assert doc2[0].lemma_ == ""
doc2 = lemmatizer(doc2)
doc2 = lemmatizer2(doc2)
assert doc2[0].text == "coping"
assert doc2[0].lemma_ == "cope"
# Make sure that lemmatizer cache can be pickled
b = pickle.dumps(lemmatizer2)