mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			49 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			49 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import unicode_literals
 | 
						|
import pytest
 | 
						|
import re
 | 
						|
 | 
						|
from ...vocab import Vocab
 | 
						|
from ...tokenizer import Tokenizer
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def vocab():
 | 
						|
    return Vocab(tag_map={'NN': {'pos': 'NOUN'}})
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def rules():
 | 
						|
    return {}
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def prefix_search():
 | 
						|
    return None
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def suffix_search():
 | 
						|
    return None
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def infix_finditer():
 | 
						|
    return None
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer):
 | 
						|
    return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer)
 | 
						|
 | 
						|
 | 
						|
def test_add_special_case(tokenizer):
 | 
						|
    tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}])
 | 
						|
    doc = tokenizer('dog')
 | 
						|
    assert doc[0].text == 'd'
 | 
						|
    assert doc[1].text == 'og'
 | 
						|
 | 
						|
 | 
						|
def test_special_case_tag(tokenizer):
 | 
						|
    tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}])
 | 
						|
    doc = tokenizer('dog')
 | 
						|
    assert doc[0].text == 'd'
 | 
						|
    assert doc[0].tag_ == 'NN'
 | 
						|
    assert doc[0].pos_ == 'NOUN'
 | 
						|
    assert doc[1].text == 'og'
 |