mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	* restore load_nlp.VECTORS in the child process * add unit test * fix test * remove unnecessary import * add utf8 encoding * import unicode_literals
		
			
				
	
	
		
			37 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			37 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf8
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| from spacy.lang.en import English
 | |
| from spacy.pipeline import EntityRuler
 | |
| 
 | |
| 
 | |
| def test_issue4849():
 | |
|     nlp = English()
 | |
| 
 | |
|     ruler = EntityRuler(
 | |
|         nlp, patterns=[
 | |
|             {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
 | |
|             {"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
 | |
|         ],
 | |
|         phrase_matcher_attr="LOWER"
 | |
|     )
 | |
| 
 | |
|     nlp.add_pipe(ruler)
 | |
| 
 | |
|     text = """
 | |
|     The left is starting to take aim at Democratic front-runner Joe Biden.
 | |
|     Sen. Bernie Sanders joined in her criticism: "There is no 'middle ground' when it comes to climate policy."
 | |
|     """
 | |
| 
 | |
|     # USING 1 PROCESS
 | |
|     count_ents = 0
 | |
|     for doc in nlp.pipe([text], n_process=1):
 | |
|         count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
 | |
|     assert(count_ents == 2)
 | |
| 
 | |
|     # USING 2 PROCESSES
 | |
|     count_ents = 0
 | |
|     for doc in nlp.pipe([text], n_process=2):
 | |
|         count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
 | |
|     assert (count_ents == 2)
 |