From 82ce3cc3f0e21133263ac6a3972d7969afad9004 Mon Sep 17 00:00:00 2001 From: sani Date: Mon, 8 May 2023 00:54:12 +0800 Subject: [PATCH] fix token len --- spacy/tests/lang/ms/test_prefix_suffix_infix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tests/lang/ms/test_prefix_suffix_infix.py b/spacy/tests/lang/ms/test_prefix_suffix_infix.py index 3d2528254..eee561345 100644 --- a/spacy/tests/lang/ms/test_prefix_suffix_infix.py +++ b/spacy/tests/lang/ms/test_prefix_suffix_infix.py @@ -61,7 +61,7 @@ def test_ms_tokenizer_splits_uneven_wrap_interact(id_tokenizer, text): @pytest.mark.parametrize( - "text,length", [("kerana", 1), ("Mahathir-Anwar", 3), ("Tun Dr. Ismail-Abdul Rahman", 4)] + "text,length", [("kerana", 1), ("Mahathir-Anwar", 3), ("Tun Dr. Ismail-Abdul Rahman", 6)] ) def test_my_tokenizer_splits_hyphens(ms_tokenizer, text, length): tokens = ms_tokenizer(text)