mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-04 13:13:10 +03:00
Lemmatizer honors exceptions: Fix #1387
This commit is contained in:
parent
e81a608173
commit
ffb50d21a0
|
@ -78,15 +78,16 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
# forms.append(string)
|
# forms.append(string)
|
||||||
forms.extend(exceptions.get(string, []))
|
forms.extend(exceptions.get(string, []))
|
||||||
oov_forms = []
|
oov_forms = []
|
||||||
for old, new in rules:
|
if not forms:
|
||||||
if string.endswith(old):
|
for old, new in rules:
|
||||||
form = string[:len(string) - len(old)] + new
|
if string.endswith(old):
|
||||||
if not form:
|
form = string[:len(string) - len(old)] + new
|
||||||
pass
|
if not form:
|
||||||
elif form in index or not form.isalpha():
|
pass
|
||||||
forms.append(form)
|
elif form in index or not form.isalpha():
|
||||||
else:
|
forms.append(form)
|
||||||
oov_forms.append(form)
|
else:
|
||||||
|
oov_forms.append(form)
|
||||||
if not forms:
|
if not forms:
|
||||||
forms.extend(oov_forms)
|
forms.extend(oov_forms)
|
||||||
if not forms:
|
if not forms:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user