* Add tests for unicode puncuation character lemmatization

This commit is contained in:
Matthew Honnibal 2015-10-09 18:43:14 +11:00
parent c5b2c4ead8
commit b71ba2eed5

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
@ -34,3 +35,14 @@ def test_noun_lemmas(lemmatizer):
assert do('planets') == set(['planet'])
assert do('ring') == set(['ring'])
assert do('axes') == set(['axis', 'axe', 'ax'])
def test_smart_quotes(lemmatizer):
do = lemmatizer.punct
assert do('') == set(['``'])
assert do('') == set(['``'])
def test_smart_quotes(lemmatizer):
do = lemmatizer.punct
assert do('') == set(["--"])