diff --git a/spacy/tests/regression/test_issue852.py b/spacy/tests/regression/test_issue852.py new file mode 100644 index 000000000..19a883ffd --- /dev/null +++ b/spacy/tests/regression/test_issue852.py @@ -0,0 +1,12 @@ +# encoding: utf8 +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize('text', ["au-delàs", "pair-programmâmes", + "terra-formées", "σ-compacts"]) +def test_issue852(fr_tokenizer, text): + """Test that French tokenizer exceptions are imported correctly.""" + tokens = fr_tokenizer(text) + assert len(tokens) == 1