spaCy/spacy/tests/lang/ca/test_prefix_suffix_infix.py

# coding: utf-8
from __future__ import unicode_literals

import pytest


@pytest.mark.parametrize(
    "text,expected_tokens", [("d'un", ["d'", "un"]), ("s'ha", ["s'", "ha"])]
)
def test_contractions(ca_tokenizer, text, expected_tokens):
    """ Test that the contractions are split into two tokens"""
    tokens = ca_tokenizer(text)
    assert len(tokens) == 2
Improve Catalan tokenization accuracy (#3225) * small hyphen clean up for French * catalan infix similar to french 2019-02-04 12:37:19 +03:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import pytest`


			`@pytest.mark.parametrize(`
			`"text,expected_tokens", [("d'un", ["d'", "un"]), ("s'ha", ["s'", "ha"])]`
			`)`
			`def test_contractions(ca_tokenizer, text, expected_tokens):`
			`""" Test that the contractions are split into two tokens"""`
			`tokens = ca_tokenizer(text)`
			`assert len(tokens) == 2`