From 25c29f072d32784290f51ac8a21490bc58405bb2 Mon Sep 17 00:00:00 2001 From: Juan Miguel Cejuela Date: Wed, 1 Mar 2017 21:44:17 +0100 Subject: [PATCH] apply patch --- spacy/lemmatizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 960467a0b..630334bf7 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -86,13 +86,16 @@ def lemmatize(string, index, exceptions, rules): #if string in index: # forms.append(string) forms.extend(exceptions.get(string, [])) + oov_forms = [] for old, new in rules: if string.endswith(old): form = string[:len(string) - len(old)] + new if form in index or not form.isalpha(): forms.append(form) + else: + oov_forms.append(form) if not forms: - forms.append(string) + forms.extend(oov_forms) return set(forms)