# coding: utf-8
"""Test that tokenizer exceptions are handled correctly."""


from __future__ import unicode_literals

import pytest


@pytest.mark.parametrize('text', ["e.g.", "p.m.", "Jan.", "Dec.", "Inc."])
def test_tokenizer_handles_abbr(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 1


def test_tokenizer_handles_exc_in_text(en_tokenizer):
    text = "It's mediocre i.e. bad."
    tokens = en_tokenizer(text)
    assert len(tokens) == 6
    assert tokens[3].text == "i.e."