mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-29 11:26:28 +03:00
Fix apparent bug in Spanish lemmatizer. Not sure why this emerges in v4 not in v3
This commit is contained in:
parent
4eec3bfad1
commit
64b22be76e
|
@ -53,12 +53,15 @@ class SpanishLemmatizer(Lemmatizer):
|
||||||
else:
|
else:
|
||||||
rule_pos = pos
|
rule_pos = pos
|
||||||
rule = self.select_rule(rule_pos, list(features))
|
rule = self.select_rule(rule_pos, list(features))
|
||||||
index = self.lookups.get_table("lemma_index").get(rule_pos, [])
|
if rule is None:
|
||||||
lemmas = getattr(self, "lemmatize_" + rule_pos)(
|
lemmas = [string]
|
||||||
string, features, rule, index
|
else:
|
||||||
)
|
index = self.lookups.get_table("lemma_index").get(rule_pos, [])
|
||||||
# Remove duplicates but preserve the ordering
|
lemmas = getattr(self, "lemmatize_" + rule_pos)(
|
||||||
lemmas = list(dict.fromkeys(lemmas))
|
string, features, rule, index
|
||||||
|
)
|
||||||
|
# Remove duplicates but preserve the ordering
|
||||||
|
lemmas = list(dict.fromkeys(lemmas))
|
||||||
|
|
||||||
self.cache[cache_key] = lemmas
|
self.cache[cache_key] = lemmas
|
||||||
return lemmas
|
return lemmas
|
||||||
|
@ -203,6 +206,7 @@ class SpanishLemmatizer(Lemmatizer):
|
||||||
word (str): The word to lemmatize.
|
word (str): The word to lemmatize.
|
||||||
features (List[str]): The morphological features as a list of Feat=Val
|
features (List[str]): The morphological features as a list of Feat=Val
|
||||||
pairs.
|
pairs.
|
||||||
|
rule (str): The rule ID to use
|
||||||
index (List[str]): The POS-specific lookup list.
|
index (List[str]): The POS-specific lookup list.
|
||||||
|
|
||||||
RETURNS (List[str]): The list of lemmas.
|
RETURNS (List[str]): The list of lemmas.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user