mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			60 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import unicode_literals
 | 
						|
import spacy
 | 
						|
from spacy.vocab import Vocab
 | 
						|
from spacy.matcher import Matcher
 | 
						|
from spacy.tokens.doc import Doc
 | 
						|
from spacy.attrs import *
 | 
						|
 | 
						|
import pytest
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def en_vocab():
 | 
						|
    return spacy.get_lang_class('en').Defaults.create_vocab()
 | 
						|
 | 
						|
 | 
						|
def test_init_matcher(en_vocab):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    assert matcher.n_patterns == 0
 | 
						|
    assert matcher(Doc(en_vocab, words=[u'Some', u'words'])) == []
 | 
						|
 | 
						|
 | 
						|
def test_add_empty_entity(en_vocab):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity('TestEntity')
 | 
						|
    assert matcher.n_patterns == 0
 | 
						|
    assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
 | 
						|
 | 
						|
 | 
						|
def test_get_entity_attrs(en_vocab):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity('TestEntity')
 | 
						|
    entity = matcher.get_entity('TestEntity')
 | 
						|
    assert entity == {} 
 | 
						|
    matcher.add_entity('TestEntity2', attrs={'Hello': 'World'})
 | 
						|
    entity = matcher.get_entity('TestEntity2')
 | 
						|
    assert entity == {'Hello': 'World'} 
 | 
						|
    assert matcher.get_entity('TestEntity') == {}
 | 
						|
 | 
						|
 | 
						|
def test_get_entity_via_match(en_vocab):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity('TestEntity', attrs={u'Hello': u'World'})
 | 
						|
    assert matcher.n_patterns == 0
 | 
						|
    assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
 | 
						|
    matcher.add_pattern(u'TestEntity', [{ORTH: u'Test'}, {ORTH: u'Entity'}])
 | 
						|
    assert matcher.n_patterns == 1
 | 
						|
    matches = matcher(Doc(en_vocab, words=[u'Test', u'Entity']))
 | 
						|
    assert len(matches) == 1
 | 
						|
    assert len(matches[0]) == 4
 | 
						|
    ent_id, label, start, end = matches[0]
 | 
						|
    assert ent_id == matcher.vocab.strings[u'TestEntity']
 | 
						|
    assert label == 0
 | 
						|
    assert start == 0
 | 
						|
    assert end == 2
 | 
						|
    attrs = matcher.get_entity(ent_id)
 | 
						|
    assert attrs == {u'Hello': u'World'}
 | 
						|
 | 
						|
 | 
						|
 |