Port over changes from #1389

This commit is contained in:
ines 2017-10-14 13:32:55 +02:00
parent cd6a29dce7
commit 3516aa0cea
2 changed files with 32 additions and 9 deletions

View File

@ -94,15 +94,16 @@ def lemmatize(string, index, exceptions, rules):
# forms.append(string)
forms.extend(exceptions.get(string, []))
oov_forms = []
for old, new in rules:
if string.endswith(old):
form = string[:len(string) - len(old)] + new
if not form:
pass
elif form in index or not form.isalpha():
forms.append(form)
else:
oov_forms.append(form)
if not forms:
for old, new in rules:
if string.endswith(old):
form = string[:len(string) - len(old)] + new
if not form:
pass
elif form in index or not form.isalpha():
forms.append(form)
else:
oov_forms.append(form)
if not forms:
forms.extend(oov_forms)
if not forms:

View File

@ -0,0 +1,22 @@
# coding: utf-8
from __future__ import unicode_literals
from ...symbols import POS, VERB, VerbForm_part
from ...vocab import Vocab
from ...lemmatizer import Lemmatizer
from ..util import get_doc
import pytest
def test_issue1387():
tag_map = {'VBG': {POS: VERB, VerbForm_part: True}}
index = {"verb": ("cope","cop")}
exc = {"verb": {"coping": ("cope",)}}
rules = {"verb": [["ing", ""]]}
lemmatizer = Lemmatizer(index, exc, rules)
vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
doc = get_doc(vocab, ["coping"])
doc[0].tag_ = 'VBG'
assert doc[0].text == "coping"
assert doc[0].lemma_ == "cope"