fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author") (#13816)

This commit is contained in:
d0ngw 2025-05-27 07:20:26 +08:00 committed by GitHub
parent b205ff65e6
commit 46613e27cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -218,6 +218,9 @@ class Lemmatizer(Pipe):
if not form:
pass
elif form in index or not form.isalpha():
if form in index:
forms.insert(0, form)
else:
forms.append(form)
else:
oov_forms.append(form)