mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			23 lines
		
	
	
		
			720 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			23 lines
		
	
	
		
			720 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
from spacy.matcher import PhraseMatcher
 | 
						|
from spacy.tokens import Doc
 | 
						|
 | 
						|
 | 
						|
def test_issue3972(en_vocab):
 | 
						|
    """Test that the PhraseMatcher returns duplicates for duplicate match IDs.
 | 
						|
    """
 | 
						|
    matcher = PhraseMatcher(en_vocab)
 | 
						|
    matcher.add("A", None, Doc(en_vocab, words=["New", "York"]))
 | 
						|
    matcher.add("B", None, Doc(en_vocab, words=["New", "York"]))
 | 
						|
    doc = Doc(en_vocab, words=["I", "live", "in", "New", "York"])
 | 
						|
    matches = matcher(doc)
 | 
						|
 | 
						|
    assert len(matches) == 2
 | 
						|
 | 
						|
    # We should have a match for each of the two rules
 | 
						|
    found_ids = [en_vocab.strings[ent_id] for (ent_id, _, _) in matches]
 | 
						|
    assert "A" in found_ids
 | 
						|
    assert "B" in found_ids
 |