mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Remove is_base_form from French lemmatizer (#5733)
Remove English-specific is_base_form from French lemmatizer.
This commit is contained in:
parent
3d83721551
commit
923affd091
|
@ -45,9 +45,6 @@ class FrenchLemmatizer(Lemmatizer):
|
||||||
univ_pos = "sconj"
|
univ_pos = "sconj"
|
||||||
else:
|
else:
|
||||||
return [self.lookup(string)]
|
return [self.lookup(string)]
|
||||||
# See Issue #435 for example of where this logic is requied.
|
|
||||||
if self.is_base_form(univ_pos, morphology):
|
|
||||||
return list(set([string.lower()]))
|
|
||||||
index_table = self.lookups.get_table("lemma_index", {})
|
index_table = self.lookups.get_table("lemma_index", {})
|
||||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||||
|
@ -59,43 +56,6 @@ class FrenchLemmatizer(Lemmatizer):
|
||||||
)
|
)
|
||||||
return lemmas
|
return lemmas
|
||||||
|
|
||||||
def is_base_form(self, univ_pos, morphology=None):
|
|
||||||
"""
|
|
||||||
Check whether we're dealing with an uninflected paradigm, so we can
|
|
||||||
avoid lemmatization entirely.
|
|
||||||
"""
|
|
||||||
morphology = {} if morphology is None else morphology
|
|
||||||
others = [
|
|
||||||
key
|
|
||||||
for key in morphology
|
|
||||||
if key not in (POS, "Number", "POS", "VerbForm", "Tense")
|
|
||||||
]
|
|
||||||
if univ_pos == "noun" and morphology.get("Number") == "sing":
|
|
||||||
return True
|
|
||||||
elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
|
|
||||||
return True
|
|
||||||
# This maps 'VBP' to base form -- probably just need 'IS_BASE'
|
|
||||||
# morphology
|
|
||||||
elif univ_pos == "verb" and (
|
|
||||||
morphology.get("VerbForm") == "fin"
|
|
||||||
and morphology.get("Tense") == "pres"
|
|
||||||
and morphology.get("Number") is None
|
|
||||||
and not others
|
|
||||||
):
|
|
||||||
return True
|
|
||||||
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
|
||||||
return True
|
|
||||||
elif VerbForm_inf in morphology:
|
|
||||||
return True
|
|
||||||
elif VerbForm_none in morphology:
|
|
||||||
return True
|
|
||||||
elif Number_sing in morphology:
|
|
||||||
return True
|
|
||||||
elif Degree_pos in morphology:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def noun(self, string, morphology=None):
|
def noun(self, string, morphology=None):
|
||||||
return self(string, "noun", morphology)
|
return self(string, "noun", morphology)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user