Test #656, #624: special case rules for tokenizer with attributes.

2026-01-08 09:41:11 +03:00 · 2016-11-25 12:44:13 +01:00 · 2016-11-25 12:44:13 +01:00 · 6652f2a135
commit 6652f2a135
parent 1e0f566d95
1 changed files with 48 additions and 0 deletions
--- a/spacy/tests/unit/test_tokenizer.py
+++ b/spacy/tests/unit/test_tokenizer.py
@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+import pytest
+import re
+
+from ...vocab import Vocab
+from ...tokenizer import Tokenizer
+
+
+@pytest.fixture
+def vocab():
+    return Vocab(tag_map={'NN': {'pos': 'NOUN'}})
+
+@pytest.fixture
+def rules():
+    return {}
+
+@pytest.fixture
+def prefix_search():
+    return None
+
+@pytest.fixture
+def suffix_search():
+    return None
+
+@pytest.fixture
+def infix_finditer():
+    return None
+
+
+@pytest.fixture
+def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer):
+    return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer)
+
+
+def test_add_special_case(tokenizer):
+    tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}])
+    doc = tokenizer('dog')
+    assert doc[0].text == 'd'
+    assert doc[1].text == 'og'
+
+
+def test_special_case_tag(tokenizer):
+    tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}])
+    doc = tokenizer('dog')
+    assert doc[0].text == 'd'
+    assert doc[0].tag_ == 'NN'
+    assert doc[0].pos_ == 'NOUN'
+    assert doc[1].text == 'og'