mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix inconsistent lemmas (#9405)
* Add util function to unique lists and preserve order * Use unique function instead of list(set()) list(set()) has the issue that it's not consistent between runs of the Python interpreter, so order can vary. list(set()) calls were left in a few places where they were behind calls to sorted(). I think in this case the calls to list() can be removed, but this commit doesn't do that. * Use the existing pattern for this
This commit is contained in:
parent
fd7edbc645
commit
fd759a881b
|
@ -76,6 +76,6 @@ class CatalanLemmatizer(Lemmatizer):
|
|||
forms.append(self.lookup_lemmatize(token)[0])
|
||||
if not forms:
|
||||
forms.append(string)
|
||||
forms = list(set(forms))
|
||||
forms = list(dict.fromkeys(forms))
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
|
|
@ -75,6 +75,6 @@ class FrenchLemmatizer(Lemmatizer):
|
|||
forms.append(self.lookup_lemmatize(token)[0])
|
||||
if not forms:
|
||||
forms.append(string)
|
||||
forms = list(set(forms))
|
||||
forms = list(dict.fromkeys(forms))
|
||||
self.cache[cache_key] = forms
|
||||
return forms
|
||||
|
|
|
@ -97,7 +97,7 @@ class DutchLemmatizer(Lemmatizer):
|
|||
return forms
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
forms = list(set(oov_forms))
|
||||
forms = list(dict.fromkeys(oov_forms))
|
||||
# Back-off through remaining return value candidates.
|
||||
if forms:
|
||||
for form in forms:
|
||||
|
|
|
@ -56,7 +56,7 @@ class RussianLemmatizer(Lemmatizer):
|
|||
if not len(filtered_analyses):
|
||||
return [string.lower()]
|
||||
if morphology is None or (len(morphology) == 1 and POS in morphology):
|
||||
return list(set([analysis.normal_form for analysis in filtered_analyses]))
|
||||
return list(dict.fromkeys([analysis.normal_form for analysis in filtered_analyses]))
|
||||
if univ_pos in ("ADJ", "DET", "NOUN", "PROPN"):
|
||||
features_to_compare = ["Case", "Number", "Gender"]
|
||||
elif univ_pos == "NUM":
|
||||
|
@ -87,7 +87,7 @@ class RussianLemmatizer(Lemmatizer):
|
|||
filtered_analyses.append(analysis)
|
||||
if not len(filtered_analyses):
|
||||
return [string.lower()]
|
||||
return list(set([analysis.normal_form for analysis in filtered_analyses]))
|
||||
return list(dict.fromkeys([analysis.normal_form for analysis in filtered_analyses]))
|
||||
|
||||
def pymorphy2_lookup_lemmatize(self, token: Token) -> List[str]:
|
||||
string = token.text
|
||||
|
|
|
@ -1403,7 +1403,7 @@ def get_arg_names(func: Callable) -> List[str]:
|
|||
RETURNS (List[str]): The argument names.
|
||||
"""
|
||||
argspec = inspect.getfullargspec(func)
|
||||
return list(set([*argspec.args, *argspec.kwonlyargs]))
|
||||
return list(dict.fromkeys([*argspec.args, *argspec.kwonlyargs]))
|
||||
|
||||
|
||||
def combine_score_weights(
|
||||
|
|
Loading…
Reference in New Issue
Block a user