mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Add tests for unicode puncuation character lemmatization
This commit is contained in:
parent
c5b2c4ead8
commit
b71ba2eed5
|
@ -1,3 +1,4 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
|
||||
|
@ -34,3 +35,14 @@ def test_noun_lemmas(lemmatizer):
|
|||
assert do('planets') == set(['planet'])
|
||||
assert do('ring') == set(['ring'])
|
||||
assert do('axes') == set(['axis', 'axe', 'ax'])
|
||||
|
||||
|
||||
def test_smart_quotes(lemmatizer):
|
||||
do = lemmatizer.punct
|
||||
assert do('“') == set(['``'])
|
||||
assert do('“') == set(['``'])
|
||||
|
||||
|
||||
def test_smart_quotes(lemmatizer):
|
||||
do = lemmatizer.punct
|
||||
assert do('–') == set(["--"])
|
||||
|
|
Loading…
Reference in New Issue
Block a user