mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Lemmatizer honors exceptions: Fix #1387
This commit is contained in:
parent
e81a608173
commit
ffb50d21a0
|
@ -78,15 +78,16 @@ def lemmatize(string, index, exceptions, rules):
|
|||
# forms.append(string)
|
||||
forms.extend(exceptions.get(string, []))
|
||||
oov_forms = []
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
form = string[:len(string) - len(old)] + new
|
||||
if not form:
|
||||
pass
|
||||
elif form in index or not form.isalpha():
|
||||
forms.append(form)
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
if not forms:
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
form = string[:len(string) - len(old)] + new
|
||||
if not form:
|
||||
pass
|
||||
elif form in index or not form.isalpha():
|
||||
forms.append(form)
|
||||
else:
|
||||
oov_forms.append(form)
|
||||
if not forms:
|
||||
forms.extend(oov_forms)
|
||||
if not forms:
|
||||
|
|
Loading…
Reference in New Issue
Block a user