mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
parent
1e0f566d95
commit
6652f2a135
48
spacy/tests/unit/test_tokenizer.py
Normal file
48
spacy/tests/unit/test_tokenizer.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import pytest
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ...vocab import Vocab
|
||||||
|
from ...tokenizer import Tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def vocab():
|
||||||
|
return Vocab(tag_map={'NN': {'pos': 'NOUN'}})
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def rules():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def prefix_search():
|
||||||
|
return None
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def suffix_search():
|
||||||
|
return None
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def infix_finditer():
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer):
|
||||||
|
return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_special_case(tokenizer):
|
||||||
|
tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}])
|
||||||
|
doc = tokenizer('dog')
|
||||||
|
assert doc[0].text == 'd'
|
||||||
|
assert doc[1].text == 'og'
|
||||||
|
|
||||||
|
|
||||||
|
def test_special_case_tag(tokenizer):
|
||||||
|
tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}])
|
||||||
|
doc = tokenizer('dog')
|
||||||
|
assert doc[0].text == 'd'
|
||||||
|
assert doc[0].tag_ == 'NN'
|
||||||
|
assert doc[0].pos_ == 'NOUN'
|
||||||
|
assert doc[1].text == 'og'
|
Loading…
Reference in New Issue
Block a user