mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 13:41:21 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			59 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			59 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import unicode_literals
 | |
| import pytest
 | |
| 
 | |
| from ...matcher import Matcher
 | |
| from ...tokens import Doc
 | |
| from ...vocab import Vocab
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     'string,start,end',
 | |
|     [
 | |
|         ('a', 0, 1),
 | |
|         ('a b', 0, 2),
 | |
|         ('a c', 0, 1),
 | |
|         ('a b c', 0, 2),
 | |
|         ('a b b c', 0, 2),
 | |
|         ('a b b', 0, 2),
 | |
|     ]
 | |
| )
 | |
| def test_issue1450_matcher_end_zero_plus(string, start, end):
 | |
|     '''Test matcher works when patterns end with * operator.
 | |
|     
 | |
|     Original example (rewritten to avoid model usage)
 | |
| 
 | |
|     nlp = spacy.load('en_core_web_sm')
 | |
|     matcher = Matcher(nlp.vocab)
 | |
|     matcher.add(
 | |
|         "TSTEND",
 | |
|         on_match_1,
 | |
|         [
 | |
|             {TAG: "JJ", LOWER: "new"},
 | |
|             {TAG: "NN", 'OP': "*"}
 | |
|         ]
 | |
|     )
 | |
|     doc = nlp(u'Could you create a new ticket for me?')
 | |
|     print([(w.tag_, w.text, w.lower_) for w in doc])
 | |
|     matches = matcher(doc)
 | |
|     print(matches)
 | |
|     assert len(matches) == 1
 | |
|     assert matches[0][1] == 4
 | |
|     assert matches[0][2] == 5
 | |
|     '''
 | |
|     matcher = Matcher(Vocab())
 | |
|     matcher.add(
 | |
|         "TSTEND",
 | |
|         None,
 | |
|         [
 | |
|             {'ORTH': "a"},
 | |
|             {'ORTH': "b", 'OP': "*"}
 | |
|         ]
 | |
|     )
 | |
|     doc = Doc(Vocab(), words=string.split())
 | |
|     matches = matcher(doc)
 | |
|     if start is None or end is None:
 | |
|         assert matches == []
 | |
|     
 | |
|     assert matches[0][1] == start
 | |
|     assert matches[0][2] == end
 |