2017-03-12 15:07:28 +03:00
|
|
|
|
# coding: utf8
|
2017-02-24 20:22:49 +03:00
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
2018-07-24 21:09:06 +03:00
|
|
|
|
from ... import util
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope='module')
|
|
|
|
|
def fr_tokenizer():
|
|
|
|
|
return util.get_lang_class('fr').Defaults.create_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
2017-02-24 20:22:49 +03:00
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('text', ["au-delàs", "pair-programmâmes",
|
|
|
|
|
"terra-formées", "σ-compacts"])
|
|
|
|
|
def test_issue852(fr_tokenizer, text):
|
|
|
|
|
"""Test that French tokenizer exceptions are imported correctly."""
|
|
|
|
|
tokens = fr_tokenizer(text)
|
|
|
|
|
assert len(tokens) == 1
|