mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-25 03:13:41 +03:00
* Add tests for unicode puncuation character lemmatization
This commit is contained in:
parent
c5b2c4ead8
commit
b71ba2eed5
|
@ -1,3 +1,4 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
|
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
|
||||||
|
@ -34,3 +35,14 @@ def test_noun_lemmas(lemmatizer):
|
||||||
assert do('planets') == set(['planet'])
|
assert do('planets') == set(['planet'])
|
||||||
assert do('ring') == set(['ring'])
|
assert do('ring') == set(['ring'])
|
||||||
assert do('axes') == set(['axis', 'axe', 'ax'])
|
assert do('axes') == set(['axis', 'axe', 'ax'])
|
||||||
|
|
||||||
|
|
||||||
|
def test_smart_quotes(lemmatizer):
|
||||||
|
do = lemmatizer.punct
|
||||||
|
assert do('“') == set(['``'])
|
||||||
|
assert do('“') == set(['``'])
|
||||||
|
|
||||||
|
|
||||||
|
def test_smart_quotes(lemmatizer):
|
||||||
|
do = lemmatizer.punct
|
||||||
|
assert do('–') == set(["--"])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user