spaCy/tests/test_times.py

25 lines
831 B
Python
Raw Normal View History

2015-04-07 05:52:25 +03:00
from __future__ import unicode_literals
from spacy.en import English
import pytest
NLU = English()
2015-04-19 22:39:18 +03:00
2015-04-07 05:52:25 +03:00
def test_am_pm():
numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
variants = ['a.m.', 'am', 'p.m.', 'pm']
spaces = ['', ' ']
for num in numbers:
for var in variants:
for space in spaces:
string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var)
2015-04-16 05:50:40 +03:00
tokens = NLU(string, merge_mwes=True)
2015-04-07 05:52:25 +03:00
assert tokens[4].orth_ == '%s%s%s' % (num, space, var)
ents = list(tokens.ents)
assert len(ents) == 1
assert ents[0].label_ == 'TIME', string
if ents[0].start == 4 and ents[0].end == 5:
assert ents[0].orth_ == '%s%s%s' % (num, space, var)