diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index d7541c56b..1112bcee3 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -78,15 +78,16 @@ def lemmatize(string, index, exceptions, rules): # forms.append(string) forms.extend(exceptions.get(string, [])) oov_forms = [] - for old, new in rules: - if string.endswith(old): - form = string[:len(string) - len(old)] + new - if not form: - pass - elif form in index or not form.isalpha(): - forms.append(form) - else: - oov_forms.append(form) + if not forms: + for old, new in rules: + if string.endswith(old): + form = string[:len(string) - len(old)] + new + if not form: + pass + elif form in index or not form.isalpha(): + forms.append(form) + else: + oov_forms.append(form) if not forms: forms.extend(oov_forms) if not forms: diff --git a/spacy/tests/regression/test_issue1387.py b/spacy/tests/regression/test_issue1387.py new file mode 100644 index 000000000..c5f01d145 --- /dev/null +++ b/spacy/tests/regression/test_issue1387.py @@ -0,0 +1,22 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ...symbols import POS, VERB, VerbForm_part +from ...vocab import Vocab +from ...lemmatizer import Lemmatizer +from ..util import get_doc + +import pytest + +def test_issue1387(): + tag_map = {'VBG': {POS: VERB, VerbForm_part: True}} + index = {"verb": ("cope","cop")} + exc = {"verb": {"coping": ("cope",)}} + rules = {"verb": [["ing", ""]]} + lemmatizer = Lemmatizer(index, exc, rules) + vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map) + doc = get_doc(vocab, ["coping"]) + doc[0].tag_ = 'VBG' + assert doc[0].text == "coping" + assert doc[0].lemma_ == "cope" + diff --git a/spacy/tests/tagger/test_lemmatizer.py b/spacy/tests/tagger/test_lemmatizer.py index 5db0d0b2c..91ed7d2f1 100644 --- a/spacy/tests/tagger/test_lemmatizer.py +++ b/spacy/tests/tagger/test_lemmatizer.py @@ -47,3 +47,20 @@ def test_tagger_lemmatizer_lemma_assignment(EN): assert all(t.lemma_ == '' for t in doc) EN.tagger(doc) assert all(t.lemma_ != '' for t in doc) + + +from ...symbols import POS, VERB, VerbForm_part +from ...vocab import Vocab +from ...lemmatizer import Lemmatizer +from ..util import get_doc +def test_tagger_lemmatizer_exceptions(): + index = {"verb": ("cope","cop")} + exc = {"verb": {"coping": ("cope",)}} + rules = {"verb": [["ing", ""]]} + tag_map = {'VBG': {POS: VERB, VerbForm_part: True}} + lemmatizer = Lemmatizer(index, exc, rules) + vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map) + doc = get_doc(vocab, ["coping"]) + doc[0].tag_ = 'VBG' + assert doc[0].text == "coping" + assert doc[0].lemma_ == "cope"