mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-23 14:39:52 +03:00
fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author") (#13816)
This commit is contained in:
parent
b205ff65e6
commit
46613e27cf
|
@ -218,7 +218,10 @@ class Lemmatizer(Pipe):
|
||||||
if not form:
|
if not form:
|
||||||
pass
|
pass
|
||||||
elif form in index or not form.isalpha():
|
elif form in index or not form.isalpha():
|
||||||
forms.append(form)
|
if form in index:
|
||||||
|
forms.insert(0, form)
|
||||||
|
else:
|
||||||
|
forms.append(form)
|
||||||
else:
|
else:
|
||||||
oov_forms.append(form)
|
oov_forms.append(form)
|
||||||
# Remove duplicates but preserve the ordering of applied "rules"
|
# Remove duplicates but preserve the ordering of applied "rules"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user