From c4d8fb60bff07cefee41e693cdb8a885487a030d Mon Sep 17 00:00:00 2001 From: Lise Brinck Date: Wed, 22 Feb 2023 14:01:42 +0100 Subject: [PATCH] add unittest for explosion#12311 --- spacy/tests/lang/sv/test_prefix_suffix_infix.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/tests/lang/sv/test_prefix_suffix_infix.py b/spacy/tests/lang/sv/test_prefix_suffix_infix.py index bbb0ff415..0aa495992 100644 --- a/spacy/tests/lang/sv/test_prefix_suffix_infix.py +++ b/spacy/tests/lang/sv/test_prefix_suffix_infix.py @@ -32,3 +32,10 @@ def test_tokenizer_splits_comma_infix(sv_tokenizer, text): def test_tokenizer_splits_ellipsis_infix(sv_tokenizer, text): tokens = sv_tokenizer(text) assert len(tokens) == 3 + + +@pytest.mark.issue(12311) +@pytest.mark.parametrize("text", ["99:e", "c:a", "EU:s", "Maj:t"]) +def test_sv_tokenizer_handles_colon(sv_tokenizer, text): + tokens = sv_tokenizer(text) + assert len(tokens) == 1