From 3ee1b2bd1f2a2e5f2ff45041a44328deeaaf0069 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 4 Sep 2024 14:29:34 +0200 Subject: [PATCH] Fix Spanish lemmatizer --- spacy/lang/es/lemmatizer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spacy/lang/es/lemmatizer.py b/spacy/lang/es/lemmatizer.py index ee5d38e84..77f45681f 100644 --- a/spacy/lang/es/lemmatizer.py +++ b/spacy/lang/es/lemmatizer.py @@ -63,13 +63,16 @@ class SpanishLemmatizer(Lemmatizer): self.cache[cache_key] = lemmas return lemmas - def select_rule(self, pos: str, features: List[str]) -> Optional[str]: + def select_rule(self, pos: str, features: List[str]) -> str: groups = self.lookups.get_table("lemma_rules_groups") if pos in groups: for group in groups[pos]: if set(group[1]).issubset(features): return group[0] - return None + # In v3, returning None here apparently took advantage of a bug in the string store + # that didn't raise an error on None as a value to decode. We emulate the previous + # behaviour by returning "" here, which should not match any lookups as before. + return "" def lemmatize_adj( self, word: str, features: List[str], rule: str, index: List[str]