Fix french lemmatization (#3180)

This commit is contained in:
foufaster 2019-01-27 06:01:30 +01:00 committed by Matthew Honnibal
parent f9ca09caa0
commit 8bd85fd9d5
2 changed files with 11 additions and 1 deletions

View File

@ -131,7 +131,7 @@ def lemmatize(string, index, exceptions, rules):
if not forms:
forms.extend(oov_forms)
if not forms and string in LOOKUP.keys():
forms.append(LOOKUP[string])
forms.append(LOOKUP[string][0])
if not forms:
forms.append(string)
return list(set(forms))

View File

@ -0,0 +1,10 @@
from __future__ import unicode_literals
import pytest
import spacy
@pytest.mark.models('fr')
def test_issue1959(FR):
texts = ['Je suis la mauvaise herbe', "Me, myself and moi"]
for text in texts:
FR(text)