From 12292b36f97d9a0a510eb982cda104cd0d63499f Mon Sep 17 00:00:00 2001 From: d0ngw Date: Fri, 9 May 2025 18:42:08 +0800 Subject: [PATCH] fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author") --- spacy/pipeline/lemmatizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 09e501595..0580b7b61 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -241,7 +241,10 @@ class Lemmatizer(Pipe): if not form: pass elif form in index or not form.isalpha(): - forms.append(form) + if form in index: + forms.insert(0, form) + else: + forms.append(form) else: oov_forms.append(form) # Remove duplicates but preserve the ordering of applied "rules"