Test #656, #624: special case rules for tokenizer with attributes.

This commit is contained in:
Matthew Honnibal 2016-11-25 12:44:13 +01:00
parent 1e0f566d95
commit 6652f2a135

View File

@ -0,0 +1,48 @@
from __future__ import unicode_literals
import pytest
import re
from ...vocab import Vocab
from ...tokenizer import Tokenizer
@pytest.fixture
def vocab():
return Vocab(tag_map={'NN': {'pos': 'NOUN'}})
@pytest.fixture
def rules():
return {}
@pytest.fixture
def prefix_search():
return None
@pytest.fixture
def suffix_search():
return None
@pytest.fixture
def infix_finditer():
return None
@pytest.fixture
def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer):
return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer)
def test_add_special_case(tokenizer):
tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}])
doc = tokenizer('dog')
assert doc[0].text == 'd'
assert doc[1].text == 'og'
def test_special_case_tag(tokenizer):
tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}])
doc = tokenizer('dog')
assert doc[0].text == 'd'
assert doc[0].tag_ == 'NN'
assert doc[0].pos_ == 'NOUN'
assert doc[1].text == 'og'