spaCy/tests/spans/test_times.py

from __future__ import unicode_literals

import pytest


@pytest.mark.models
def test_am_pm(en_nlp):
    numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
    variants = ['a.m.', 'am', 'p.m.', 'pm']
    spaces = ['', ' ']
    for num in numbers:
        for var in variants:
            for space in spaces:
                string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var)
                tokens = en_nlp(string, merge_mwes=True)
                assert tokens[4].orth_ == '%s%s%s' % (num, space, var)
                ents = list(tokens.ents)
                assert len(ents) == 1, ents
                assert ents[0].label_ == 'TIME', string
                if ents[0].start == 4 and ents[0].end == 5:
                    assert ents[0].orth_ == '%s%s%s' % (num, space, var)