fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author")

This commit is contained in:
d0ngw 2025-05-09 18:42:08 +08:00 committed by GitHub
parent 98a19df91a
commit 12292b36f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -241,6 +241,9 @@ class Lemmatizer(Pipe):
if not form: if not form:
pass pass
elif form in index or not form.isalpha(): elif form in index or not form.isalpha():
if form in index:
forms.insert(0, form)
else:
forms.append(form) forms.append(form)
else: else:
oov_forms.append(form) oov_forms.append(form)