From 46613e27cf59357720333195db5729147eecee9b Mon Sep 17 00:00:00 2001 From: d0ngw Date: Tue, 27 May 2025 07:20:26 +0800 Subject: [PATCH] fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author") (#13816) --- spacy/pipeline/lemmatizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index c08d59a3b..26867b473 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -218,7 +218,10 @@ class Lemmatizer(Pipe): if not form: pass elif form in index or not form.isalpha(): - forms.append(form) + if form in index: + forms.insert(0, form) + else: + forms.append(form) else: oov_forms.append(form) # Remove duplicates but preserve the ordering of applied "rules"