diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 53519e4f1..bd2ca766a 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -94,15 +94,16 @@ def lemmatize(string, index, exceptions, rules): # forms.append(string) forms.extend(exceptions.get(string, [])) oov_forms = [] - for old, new in rules: - if string.endswith(old): - form = string[:len(string) - len(old)] + new - if not form: - pass - elif form in index or not form.isalpha(): - forms.append(form) - else: - oov_forms.append(form) + if not forms: + for old, new in rules: + if string.endswith(old): + form = string[:len(string) - len(old)] + new + if not form: + pass + elif form in index or not form.isalpha(): + forms.append(form) + else: + oov_forms.append(form) if not forms: forms.extend(oov_forms) if not forms: diff --git a/spacy/tests/regression/test_issue1387.py b/spacy/tests/regression/test_issue1387.py new file mode 100644 index 000000000..4bd0092d0 --- /dev/null +++ b/spacy/tests/regression/test_issue1387.py @@ -0,0 +1,22 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ...symbols import POS, VERB, VerbForm_part +from ...vocab import Vocab +from ...lemmatizer import Lemmatizer +from ..util import get_doc + +import pytest + + +def test_issue1387(): + tag_map = {'VBG': {POS: VERB, VerbForm_part: True}} + index = {"verb": ("cope","cop")} + exc = {"verb": {"coping": ("cope",)}} + rules = {"verb": [["ing", ""]]} + lemmatizer = Lemmatizer(index, exc, rules) + vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map) + doc = get_doc(vocab, ["coping"]) + doc[0].tag_ = 'VBG' + assert doc[0].text == "coping" + assert doc[0].lemma_ == "cope"