mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			54 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			54 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf-8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
from ...matcher import Matcher
 | 
						|
from ...attrs import ORTH
 | 
						|
from ..util import get_doc
 | 
						|
 | 
						|
import pytest
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize('words,entity', [
 | 
						|
    (["Test", "Entity"], "TestEntity")])
 | 
						|
def test_matcher_add_empty_entity(en_vocab, words, entity):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity(entity)
 | 
						|
    doc = get_doc(en_vocab, words)
 | 
						|
    assert matcher.n_patterns == 0
 | 
						|
    assert matcher(doc) == []
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize('entity1,entity2,attrs', [
 | 
						|
    ("TestEntity", "TestEntity2", {"Hello": "World"})])
 | 
						|
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity(entity1)
 | 
						|
    assert matcher.get_entity(entity1) == {}
 | 
						|
    matcher.add_entity(entity2, attrs=attrs)
 | 
						|
    assert matcher.get_entity(entity2) == attrs
 | 
						|
    assert matcher.get_entity(entity1) == {}
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize('words,entity,attrs',
 | 
						|
    [(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
 | 
						|
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
 | 
						|
    matcher = Matcher(en_vocab)
 | 
						|
    matcher.add_entity(entity, attrs=attrs)
 | 
						|
    doc = get_doc(en_vocab, words)
 | 
						|
    assert matcher.n_patterns == 0
 | 
						|
    assert matcher(doc) == []
 | 
						|
 | 
						|
    matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
 | 
						|
    assert matcher.n_patterns == 1
 | 
						|
 | 
						|
    matches = matcher(doc)
 | 
						|
    assert len(matches) == 1
 | 
						|
    assert len(matches[0]) == 4
 | 
						|
 | 
						|
    ent_id, label, start, end = matches[0]
 | 
						|
    assert ent_id == matcher.vocab.strings[entity]
 | 
						|
    assert label == 0
 | 
						|
    assert start == 0
 | 
						|
    assert end == 2
 | 
						|
    assert matcher.get_entity(ent_id) == attrs
 |