spaCy/tests/vocab/test_shape.py

34 lines
474 B
Python
Raw Normal View History

2014-08-30 21:00:10 +04:00
from __future__ import unicode_literals
import pytest
from spacy.orth import word_shape as ws
def test_capitalized():
assert ws('Nasa') == 'Xxxx'
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_truncate():
2014-11-06 20:42:54 +03:00
assert ws('capitalized') == 'xxxx'
2014-08-30 21:00:10 +04:00
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_digits():
2014-11-06 20:42:54 +03:00
assert ws('999999999') == 'dddd'
2014-08-30 21:00:10 +04:00
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_mix():
assert ws('C3P0') == 'XdXd'
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_punct():
assert ws(',') == ','
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_space():
assert ws('\n') == '\n'
2015-04-19 22:39:18 +03:00
2014-08-30 21:00:10 +04:00
def test_punct_seq():
assert ws('``,-') == '``,-'