spaCy/spacy/tests/lang/sv/test_prefix_suffix_infix.py
Ines Montani db55577c45
Drop Python 2.7 and 3.5 (#4828)
* Remove unicode declarations

* Remove Python 3.5 and 2.7 from CI

* Don't require pathlib

* Replace compat helpers

* Remove OrderedDict

* Use f-strings

* Set Cython compiler language level

* Fix typo

* Re-add OrderedDict for Table

* Update setup.cfg

* Revert CONTRIBUTING.md

* Revert lookups.md

* Revert top-level.md

* Small adjustments and docs [ci skip]
2019-12-22 01:53:56 +01:00

35 lines
1.0 KiB
Python

import pytest
@pytest.mark.parametrize("text", ["(under)"])
def test_tokenizer_splits_no_special(sv_tokenizer, text):
tokens = sv_tokenizer(text)
assert len(tokens) == 3
@pytest.mark.parametrize("text", ["gitta'r", "Björn's", "Lars'"])
def test_tokenizer_handles_no_punct(sv_tokenizer, text):
tokens = sv_tokenizer(text)
assert len(tokens) == 1
@pytest.mark.parametrize("text", ["svart.Gul", "Hej.Världen"])
def test_tokenizer_splits_period_infix(sv_tokenizer, text):
tokens = sv_tokenizer(text)
assert len(tokens) == 3
@pytest.mark.parametrize("text", ["Hej,Världen", "en,två"])
def test_tokenizer_splits_comma_infix(sv_tokenizer, text):
tokens = sv_tokenizer(text)
assert len(tokens) == 3
assert tokens[0].text == text.split(",")[0]
assert tokens[1].text == ","
assert tokens[2].text == text.split(",")[1]
@pytest.mark.parametrize("text", ["svart...Gul", "svart...gul"])
def test_tokenizer_splits_ellipsis_infix(sv_tokenizer, text):
tokens = sv_tokenizer(text)
assert len(tokens) == 3