mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
apply patch
This commit is contained in:
parent
a8cfde46d3
commit
25c29f072d
|
@ -86,13 +86,16 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
#if string in index:
|
#if string in index:
|
||||||
# forms.append(string)
|
# forms.append(string)
|
||||||
forms.extend(exceptions.get(string, []))
|
forms.extend(exceptions.get(string, []))
|
||||||
|
oov_forms = []
|
||||||
for old, new in rules:
|
for old, new in rules:
|
||||||
if string.endswith(old):
|
if string.endswith(old):
|
||||||
form = string[:len(string) - len(old)] + new
|
form = string[:len(string) - len(old)] + new
|
||||||
if form in index or not form.isalpha():
|
if form in index or not form.isalpha():
|
||||||
forms.append(form)
|
forms.append(form)
|
||||||
|
else:
|
||||||
|
oov_forms.append(form)
|
||||||
if not forms:
|
if not forms:
|
||||||
forms.append(string)
|
forms.extend(oov_forms)
|
||||||
return set(forms)
|
return set(forms)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user