mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			38 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			38 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf-8
 | 
						|
from __future__ import unicode_literals
 | 
						|
import pytest
 | 
						|
 | 
						|
from ...matcher import Matcher
 | 
						|
from ...vocab import Vocab
 | 
						|
from ...attrs import LOWER
 | 
						|
from ...tokens import Doc
 | 
						|
 | 
						|
 | 
						|
def test_basic_case():
 | 
						|
    """Test Matcher matches with '*' operator and Boolean flag"""
 | 
						|
    matcher = Matcher(Vocab(
 | 
						|
                lex_attr_getters={LOWER: lambda string: string.lower()}))
 | 
						|
    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
 | 
						|
    matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'LOWER': 'and'}, {'LOWER': 'frank'}])
 | 
						|
    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
 | 
						|
    match = matcher(doc)
 | 
						|
    assert len(match) == 1
 | 
						|
    ent_id, start, end = match[0]
 | 
						|
    assert start == 0
 | 
						|
    assert end == 4
 | 
						|
 | 
						|
 | 
						|
def test_issue850():
 | 
						|
    """The variable-length pattern matches the
 | 
						|
    succeeding token. Check we handle the ambiguity correctly."""
 | 
						|
    matcher = Matcher(Vocab(
 | 
						|
                lex_attr_getters={LOWER: lambda string: string.lower()}))
 | 
						|
    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
 | 
						|
    matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'IS_ANY_TOKEN': True}, {'LOWER': 'frank'}])
 | 
						|
    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
 | 
						|
    match = matcher(doc)
 | 
						|
    assert len(match) == 1
 | 
						|
    ent_id, start, end = match[0]
 | 
						|
    assert start == 0
 | 
						|
    assert end == 4
 |