German lemmatizer additions (#2529)

* lemma of was-> was

* added new pairs issue @2486

* added article tests
This commit is contained in:
Duygu Altinok 2018-07-09 11:10:15 +02:00 committed by Matthew Honnibal
parent c21efea9bb
commit 00b9a58558
2 changed files with 389 additions and 2 deletions

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,9 @@ import pytest
@pytest.mark.parametrize('string,lemma', [('Abgehängten', 'Abgehängte'),
('engagierte', 'engagieren'),
('schließt', 'schließen'),
('vorgebenden', 'vorgebend')])
('vorgebenden', 'vorgebend'),
('die', 'der'),
('Die', 'der')])
def test_lemmatizer_lookup_assigns(de_tokenizer, string, lemma):
tokens = de_tokenizer(string)
assert tokens[0].lemma_ == lemma