From 8dc265ac0c2bbea683d900f64c5080a23879c9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Lind=20Kristiansen?= Date: Fri, 24 Nov 2017 11:29:37 +0100 Subject: [PATCH] Add test for tokenization of 'i.' for Danish. --- spacy/tests/lang/da/test_exceptions.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/tests/lang/da/test_exceptions.py b/spacy/tests/lang/da/test_exceptions.py index d89fafd2c..6d105d718 100644 --- a/spacy/tests/lang/da/test_exceptions.py +++ b/spacy/tests/lang/da/test_exceptions.py @@ -13,3 +13,10 @@ def test_da_tokenizer_handles_exc_in_text(da_tokenizer): tokens = da_tokenizer(text) assert len(tokens) == 5 assert tokens[2].text == "bl.a." + +def test_da_tokenizer_handles_custom_base_exc(da_tokenizer): + text = "Her er noget du kan kigge i." + tokens = da_tokenizer(text) + assert len(tokens) == 8 + assert tokens[6].text == "i" + assert tokens[7].text == "."