mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
fix: match hyphenated words to lemmas in index_table (e.g. "co-authored" -> "co-author")
This commit is contained in:
parent
98a19df91a
commit
12292b36f9
|
@ -241,7 +241,10 @@ class Lemmatizer(Pipe):
|
||||||
if not form:
|
if not form:
|
||||||
pass
|
pass
|
||||||
elif form in index or not form.isalpha():
|
elif form in index or not form.isalpha():
|
||||||
forms.append(form)
|
if form in index:
|
||||||
|
forms.insert(0, form)
|
||||||
|
else:
|
||||||
|
forms.append(form)
|
||||||
else:
|
else:
|
||||||
oov_forms.append(form)
|
oov_forms.append(form)
|
||||||
# Remove duplicates but preserve the ordering of applied "rules"
|
# Remove duplicates but preserve the ordering of applied "rules"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user