Add test for tokenization of 'i.' for Danish.

This commit is contained in:
Søren Lind Kristiansen 2017-11-24 11:29:37 +01:00
parent ac8116510d
commit 8dc265ac0c

View File

@ -13,3 +13,10 @@ def test_da_tokenizer_handles_exc_in_text(da_tokenizer):
tokens = da_tokenizer(text)
assert len(tokens) == 5
assert tokens[2].text == "bl.a."
def test_da_tokenizer_handles_custom_base_exc(da_tokenizer):
text = "Her er noget du kan kigge i."
tokens = da_tokenizer(text)
assert len(tokens) == 8
assert tokens[6].text == "i"
assert tokens[7].text == "."