Fix non-deterministic deduplication in Greek lemmatizer (#8421)

This commit is contained in:
Adriane Boyd 2021-06-17 09:11:01 +02:00 committed by GitHub
parent 994bed2fe2
commit 02bac8f269
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -57,6 +57,6 @@ class GreekLemmatizer(Lemmatizer):
forms.extend(oov_forms)
if not forms:
forms.append(string)
forms = list(set(forms))
forms = list(dict.fromkeys(forms))
self.cache[cache_key] = forms
return forms